def choose_actions(self, subject, actioni): action_values={} for a in world.possible_actions(): IG=self.alldata[subject][actioni]['ActionValues'][a][0] PG=self.alldata[subject][actioni]['ActionValues'][a][1] #PGv=1-PG #invert, IG is final entropy, minimized! PG=0 -> PGv=1; PG=1 -> PGv=0 #action_values[a]=self.theta*IG+(1-self.theta)*PGv action_values[a]=-self.theta*IG-(1-self.theta)*PG #min_value=action_values[min(action_values, key=lambda x: x[1])] #take the min value --WRONG!!! min_value=min(action_values.itervalues()) #min_actions=[a for a in action_values.keys() if action_values[a]==min_value] #find ALL actions that achieve it --should be ok, cleaner below min_actions=[a for a,v in action_values.iteritems() if v==min_value] #discard repeated action choice if actioni>0: prev_action=self.alldata[subject][actioni-1]['SubjectAction'] else: prev_action=() if len(set(min_actions)-set([prev_action])) == 0: del action_values[prev_action] min_value=min(action_values.itervalues()) min_actions=[a for a,v in action_values.iteritems() if v==min_value] else: min_actions=set(min_actions)-set([prev_action]) return list(min_actions)
def choose_actions(self, subject, actioni): action_values = {} for a in world.possible_actions(): IG = self.alldata[subject][actioni]['ActionValues'][a][0] PG = self.alldata[subject][actioni]['ActionValues'][a][1] #PGv=1-PG #invert, IG is final entropy, minimized! PG=0 -> PGv=1; PG=1 -> PGv=0 #action_values[a]=self.theta*IG+(1-self.theta)*PGv action_values[a] = -self.theta * IG - (1 - self.theta) * PG #min_value=action_values[min(action_values, key=lambda x: x[1])] #take the min value --WRONG!!! min_value = min(action_values.itervalues()) #min_actions=[a for a in action_values.keys() if action_values[a]==min_value] #find ALL actions that achieve it --should be ok, cleaner below min_actions = [ a for a, v in action_values.iteritems() if v == min_value ] #discard repeated action choice if actioni > 0: prev_action = self.alldata[subject][actioni - 1]['SubjectAction'] else: prev_action = () if len(set(min_actions) - set([prev_action])) == 0: del action_values[prev_action] min_value = min(action_values.itervalues()) min_actions = [ a for a, v in action_values.iteritems() if v == min_value ] else: min_actions = set(min_actions) - set([prev_action]) return list(min_actions)
def choose_action(self, prev_data=None): mingain=1000 astars=[] for a in world.possible_actions(): this_gain=self.entropy_gain(a, prev_data) #print a, this_gain if this_gain <= mingain: astars.append(a) mingain=this_gain return random.choice(astars)
def choose_action(self, prev_data=None): mingain = 1000 astars = [] for a in world.possible_actions(): this_gain = self.entropy_gain(a, prev_data) #print a, this_gain if this_gain <= mingain: astars.append(a) mingain = this_gain return random.choice(astars)
def choose_actions(self, prev_data=[]): """ Same as choose actions, but return all equivalents """ mingain = 1000 astars = [] for a in world.possible_actions(): if len(prev_data) > 0: if a == prev_data[-1].action: continue this_gain = self.expected_final_entropy(a, prev_data) if this_gain < mingain: astars = [a] mingain = this_gain elif this_gain == mingain: astars.append(a) return astars, mingain
def analyze(self): data=self.load_data() subjects=data.get_kids()[:self.N] for subject in subjects: print 'Analyzing subject {0}...'.format(subject) self.alldata[subject]={}#defaultdict(dict) max_action=min(data.get_kid_nactions(subject),self.A) subject_sequence=data.data[subject][:max_action] for actioni in range(max_action): #print 'Action {0} of {1}'.format(actioni, max_action) self.alldata[subject][actioni]={} subject_action=data.data[subject][actioni].get_action() self.alldata[subject][actioni]['SubjectAction']=subject_action theory_model=learners.TheoryLearner() pg_model=learners.ActivePlayer() # model_actions, model_gain=pg_model.choose_actions(subject_sequence[:actioni]) self.alldata[subject][actioni]['ActionValues']={} for a in world.possible_actions(): #EIG=theory_model.expected_final_entropy(a, subject_sequence[:actioni]) EIG=-1*theory_model.expected_information_gain(a, subject_sequence[:actioni]) PG=pg_model.success_probability(a, subject_sequence[:actioni]) self.alldata[subject][actioni]['ActionValues'][a]=(EIG,PG) # theory_model=learners.TheoryLearner() # model_actions, model_gain=theory_model.choose_actions(subject_sequence[:actioni]) # self.alldata[subject][actioni]['TMA']=model_actions # self.alldata[subject][actioni]['SEIG']=\ # entropy_gains.theory_expected_final_entropy(subject_action, subject_sequence[:actioni]) # self.alldata[subject][actioni]['TMEIG']=model_gain # pg_model=learners.ActivePlayer() # model_actions, model_gain=pg_model.choose_actions(subject_sequence[:actioni]) # self.alldata[subject][actioni]['PMA']=model_actions # self.alldata[subject][actioni]['PMSP']=model_gain self.save()
def choose_actions(self, prev_data=[]): """ Same as choose actions, but return all equivalents """ mingain=1000 astars=[] for a in world.possible_actions(): if len(prev_data)>0: if a==prev_data[-1].action: continue this_gain=self.expected_final_entropy(a, prev_data) if this_gain < mingain: astars=[a] mingain=this_gain elif this_gain == mingain: astars.append(a) return astars, mingain
def choose_actions(self, prev_data=[]): """ Same as choose actions, but return all equivalents """ maxprob = 0 astars = [] for a in world.possible_actions(): if len(prev_data) > 0: if a == prev_data[-1].action: continue this_prob = self.success_probability(a, prev_data) if this_prob > maxprob: astars = [a] maxprob = this_prob elif this_prob == maxprob: astars.append(a) #choice=random.choice(astars) return astars, maxprob
def choose_actions(self, prev_data=[]): """ Same as choose actions, but return all equivalents """ maxprob=0 astars=[] for a in world.possible_actions(): if len(prev_data)>0: if a==prev_data[-1].action: continue this_prob=self.success_probability(a, prev_data) if this_prob > maxprob: astars=[a] maxprob=this_prob elif this_prob == maxprob: astars.append(a) #choice=random.choice(astars) return astars, maxprob
def choose_action(self, prev_data=[]): #mingain=1000 maxprob = 0 astars = [] for a in world.possible_actions(): if len(prev_data) > 0: if a == prev_data[-1].action: continue #this_gain=self.expected_final_entropy(a, prev_data) this_prob = self.success_probability(a, prev_data) if this_prob > maxprob: astars = [a] maxprob = this_prob #mingain=this_gain #elif this_gain == mingain: elif this_prob == maxprob: astars.append(a) choice = random.choice(astars) #self.experience.append(world.make_action(choice)) return choice
def choose_action(self, prev_data=[]): #mingain=1000 maxprob=0 astars=[] for a in world.possible_actions(): if len(prev_data)>0: if a==prev_data[-1].action: continue #this_gain=self.expected_final_entropy(a, prev_data) this_prob=self.success_probability(a, prev_data) if this_prob > maxprob: astars=[a] maxprob=this_prob #mingain=this_gain #elif this_gain == mingain: elif this_prob == maxprob: astars.append(a) choice=random.choice(astars) #self.experience.append(world.make_action(choice)) return choice
def choose_action(self, prev_data=[]): #None): mingain = 1000 astars = [] for a in world.possible_actions(): if len(prev_data) > 0: if a == prev_data[-1].action: continue this_gain = self.expected_final_entropy(a, prev_data) if this_gain < mingain: astars = [a] mingain = this_gain elif this_gain == mingain: astars.append(a) #while True: # print astars, prev_data[-1].action choice = random.choice(astars) # if len(prev_data)==0: # break # elif choice!=prev_data[-1].action: # break #self.experience.append(world.make_action(choice)) return choice
def choose_action(self, prev_data=[]):#None): mingain=1000 astars=[] for a in world.possible_actions(): if len(prev_data)>0: if a==prev_data[-1].action: continue this_gain=self.expected_final_entropy(a, prev_data) if this_gain < mingain: astars=[a] mingain=this_gain elif this_gain == mingain: astars.append(a) #while True: # print astars, prev_data[-1].action choice=random.choice(astars) # if len(prev_data)==0: # break # elif choice!=prev_data[-1].action: # break #self.experience.append(world.make_action(choice)) return choice
lposc = [0] * 12 hposc = [0] * 12 lposs = [0] * 12 hposs = [0] * 12 lposi = [0] * 12 hposi = [0] * 12 data = Data.Data() data.read(astext=False) dind = data.data[data.get_kids()[3]] [d.display() for d in dind] #dind[-1].active=False print low_model.p_data_action(dind[0], world.possible_actions()[3]) print high_model.p_data_action(dind[0], world.possible_actions()[3]) dind[0].active = True print low_model.p_data_action(dind[0], world.possible_actions()[3]) print high_model.p_data_action(dind[0], world.possible_actions()[3]) norm = True lostart = time.clock() for t in range(12): lposc[t] = low_model.p_theory_data(t, dcol, normalized=norm) lposs[t] = low_model.p_theory_data(t, dsha, normalized=norm) lposi[t] = low_model.p_theory_data(t, dind, normalized=norm) # l=[]
if acts2[i].active: col=0 if acts2[i].machine[0]==acts2[i].toy[0]: condition[i]='color' else: condition[i]='shape' else: if acts2[i].machine[0]!=acts2[i].toy[0] and acts2[i].machine[1]!=acts2[i].toy[1]: col=2 else: col=1 table[row,col]+=1 import world wacts=world.possible_actions() kacts=np.zeros((6,3),dtype=int) for i in range(30): toy1=acts1[i].toy mac1=acts1[i].machine toy2=acts2[i].toy mac2=acts2[i].machine if acts1[i].active: col=0 if toy1[0]==mac1[0]: prop1='col_p' elif toy1[1]==mac1[1]: prop1='sha_p' else: if toy1[0]==mac1[0]:
today='141119/' #batch='ep-0.05/' batch='varyep/' data_directory=output_directory+today#+batch n_act=2 model='jointfull' filename=model+'-'+str(n_act)+'_tru-20'+'_rreal.txt' datajoi=np.loadtxt(data_directory+filename) print data_directory+filename model='hypfull' filename=model+'-'+str(n_act)+'_tru-20'+'_rreal.txt' datahyp=np.loadtxt(data_directory+filename) print data_directory+filename for i in range(len(datahyp)): print datahyp[i,2]-datahyp[i,0], datajoi[i,2]-datajoi[i,0] import entropy_gains as eg import world pa=world.possible_actions() print eg.hypotheses_expected_final_entropy(pa[0],[])-eg.hypotheses_expected_final_entropy(pa[1],[]) print eg.joint_expected_final_entropy(pa[0],[])-eg.hypotheses_expected_final_entropy(pa[1],[])
import low_model as model import world import entropy_gains as eg for action in world.possible_actions(): print action, eg.hypotheses_expected_final_entropy(action,[])
lposc = [0] * 12 hposc = [0] * 12 lposs = [0] * 12 hposs = [0] * 12 lposi = [0] * 12 hposi = [0] * 12 data = Data.Data() data.read(astext=False) dind = data.data[data.get_kids()[3]] [d.display() for d in dind] # dind[-1].active=False print low_model.p_data_action(dind[0], world.possible_actions()[3]) print high_model.p_data_action(dind[0], world.possible_actions()[3]) dind[0].active = True print low_model.p_data_action(dind[0], world.possible_actions()[3]) print high_model.p_data_action(dind[0], world.possible_actions()[3]) norm = True lostart = time.clock() for t in range(12): lposc[t] = low_model.p_theory_data(t, dcol, normalized=norm) lposs[t] = low_model.p_theory_data(t, dsha, normalized=norm) lposi[t] = low_model.p_theory_data(t, dind, normalized=norm)
col = 0 if acts2[i].machine[0] == acts2[i].toy[0]: condition[i] = 'color' else: condition[i] = 'shape' else: if acts2[i].machine[0] != acts2[i].toy[0] and acts2[i].machine[ 1] != acts2[i].toy[1]: col = 2 else: col = 1 table[row, col] += 1 import world wacts = world.possible_actions() kacts = np.zeros((6, 3), dtype=int) for i in range(30): toy1 = acts1[i].toy mac1 = acts1[i].machine toy2 = acts2[i].toy mac2 = acts2[i].machine if acts1[i].active: col = 0 if toy1[0] == mac1[0]: prop1 = 'col_p' elif toy1[1] == mac1[1]: prop1 = 'sha_p' else: if toy1[0] == mac1[0]: