def init_belief(self): #remember later to change it to override method self.obj = Reason() time = [ '(currenttime=morning)', '(currenttime=afternoon)', '(currenttime=evening)' ] location = ['(atlocation=classroom)', '(atlocation=library)'] #lstm = ['(classifier=zero)','(classifier=one)'] decision = ['interested', 'not_interested'] time_rand = random.choice(time) loc_rand = random.choice(location) #lstm_rand = random.choice(lstm) lstm_rand = self.lstm() int_prob = float( self.obj.query('reason.plog', self.decision, time_rand, lstm_rand, loc_rand)) self.obj.delete('reason.plog') print int_prob init_belief = [1.0 - int_prob, int_prob, 1.0 - int_prob, int_prob, 0] b = np.zeros(len(init_belief)) for i in range(len(init_belief)): b[i] = init_belief[i] / sum(init_belief) print b return b
def update_site(site_name, metric, form, original_metric_file, metric_file, metric_reasons_file, metric_log_file, metric_groups, metric_states, metric_adfs_groups): """Function called after form submit in the main page. 1. Removing site from sites' list 2. Removing old reason 3. Adding updated site to sites' list 4. Adding new reason to reasons' list 5. Writintg new event in log """ metric_name = Sites(metric_file, original_metric_file) user = User(adfs_login, metric_name.sites, metric_groups, adfs_groups, metric_adfs_groups, metric) reasons = Reason(metric_reasons_file) if site_name in user.managed_sites: for site in metric_name.sites: if site[1] == site_name and site[2] == form.getvalue( "old-status") and form.getvalue( "new-status") in metric_states: metric_name.sites.remove(site) #color = find_color(metric, form.getvalue('new-status')) #updated_site = [ #datetime.now().strftime("%Y-%m-%d %H:%M:%S"), #site_name, #form.getvalue("new-status"), #color, #form.getvalue("url") #] for site in reasons.sites: if site[0] == site_name: reasons.sites.remove(site) if form.getvalue('new-status') != "no_override": color = find_color(metric, form.getvalue('new-status')) updated_site = [ datetime.now().strftime("%Y-%m-%d %H:%M:%S"), site_name, form.getvalue("new-status"), color, form.getvalue("url") ] metric_name.sites.append(updated_site) reasons.sites.append([ site_name, re.sub('[\s+]', ' ', form.getvalue("reason")) ]) reasons.write_changes(reasons.sites) metric_name.write_changes(metric_name.sites) log = Log(metric_log_file) new_log = [ datetime.now().strftime("%Y-%m-%d %H:%M:%S"), adfs_login, site_name, form.getvalue('new-status'), form.getvalue('reason') ] log.write_changes(new_log) url = "https://cmssst.web.cern.ch/cmssst/man_override/cgi/manualOverride.py/%s" % metric print "Status: 302 Moved" print "Location: %s" % url print
def __init__(self, pomdpfile='program.pomdp'): self.time = ['morning', 'afternoon', 'evening'] self.location = ['classroom', 'library'] self.identity = ['student', 'professor', 'visitor'] self.intention = ['interested', 'not_interested'] self.reason = Reason('reason0.plog') self.model = Model(filename='program.pomdp', parsing_print_flag=False) self.policy = Policy(5, 4, output='program.policy') self.instance = [] self.results = {} self.learning = Learning('./', 'interposx.csv', 'interposy.csv') self.trajectory_label = 0
def update_site(site_name, metric, form, original_metric_file, metric_file, metric_reasons_file, metric_log_file, metric_groups, metric_states, metric_adfs_groups): """Function called after form submit in the main page. 1. Removing site from sites' list 2. Removing old reason 3. Adding updated site to sites' list 4. Adding new reason to reasons' list 5. Writintg new event in log """ metric_name = Sites(metric_file, original_metric_file) user = User(adfs_login, metric_name.sites, metric_groups, adfs_groups, metric_adfs_groups, metric) reasons = Reason(metric_reasons_file) if site_name in user.managed_sites: for site in metric_name.sites: if site[1] == site_name and site[2] == form.getvalue("old-status") and form.getvalue("new-status") in metric_states: metric_name.sites.remove(site) #color = find_color(metric, form.getvalue('new-status')) #updated_site = [ #datetime.now().strftime("%Y-%m-%d %H:%M:%S"), #site_name, #form.getvalue("new-status"), #color, #form.getvalue("url") #] for site in reasons.sites: if site[0] == site_name: reasons.sites.remove(site) if form.getvalue('new-status') != "no_override": color = find_color(metric, form.getvalue('new-status')) updated_site = [ datetime.now().strftime("%Y-%m-%d %H:%M:%S"), site_name, form.getvalue("new-status"), color, form.getvalue("url") ] metric_name.sites.append(updated_site) reasons.sites.append([site_name, re.sub('[\s+]', ' ', form.getvalue("reason"))]) reasons.write_changes(reasons.sites) metric_name.write_changes(metric_name.sites) log = Log(metric_log_file) new_log = [datetime.now().strftime("%Y-%m-%d %H:%M:%S"), adfs_login, site_name, form.getvalue('new-status'), form.getvalue('reason')] log.write_changes(new_log) url = "https://cmssst.web.cern.ch/cmssst/man_override/cgi/manualOverride.py/%s" %metric print "Status: 302 Moved" print "Location: %s" % url print
def init_pred(self): #remember later to change it to override method self.obj = Reason() time = ['(currenttime=rush)', '(currenttime=break)'] location = ['(atlocation=at_entrance)', '(atlocation=at_exit)'] lstm = ['(classifier=zero)', '(classifier=one)'] decision = ['interested', 'not_interested'] time_rand = random.choice(time) loc_rand = random.choice(location) lstm_rand = random.choice(lstm) int_prob = float( self.obj.query('reason.plog', self.decision, time_rand, lstm_rand, loc_rand)) self.obj.delete('reason.plog') print int_prob print int_prob return int_prob
def index(metric, original_metric_file, metric_file, metric_reasons_file, metric_groups, metric_adfs_groups): """Function generates main page. For example - manualOverride.py/lifestatus """ template = env.get_template('%s.html' % metric) metric_name = Sites(metric_file, original_metric_file) user = User(adfs_login, metric_name.sites, metric_groups, adfs_groups, metric_adfs_groups, metric) reasons = Reason(metric_reasons_file) metric_name.sites = sorted(metric_name.sites, key = itemgetter(1)) for site in metric_name.sites: for item in reasons.sites: if site[1] == item[0]: site.append(item[1]) print template.render(sites = metric_name.sites, adfs_login = adfs_login, user_info = user.user_info, metricName = metric)
class Corrp(): def __init__(self): #self.obj=Reason() self.obj = None self.decision = None self.time = None self.location = None self.suplearn = None def init_pred(self): #remember later to change it to override method self.obj = Reason() time = ['(currenttime=rush)', '(currenttime=break)'] location = ['(atlocation=at_entrance)', '(atlocation=at_exit)'] lstm = ['(classifier=zero)', '(classifier=one)'] decision = ['interested', 'not_interested'] time_rand = random.choice(time) loc_rand = random.choice(location) lstm_rand = random.choice(lstm) int_prob = float( self.obj.query('reason.plog', self.decision, time_rand, lstm_rand, loc_rand)) self.obj.delete('reason.plog') print int_prob print int_prob return int_prob def init_state(self): self.decision = random.choice(['interested', 'not_interested']) print 'the random decision is:', self.decision self.time = random.choice(['break', 'rush']) print 'The random time is ', self.time self.location = random.choice(['at_entrance', 'at_exit']) print 'The random location is : ', self.location self.lstm() print 'The classifier output is: ', self.classifier return self.decision def get_state_index(self, state): return self.model.states.index(state) def get_obs_index(self, obs): return self.model.observations.index(obs) def lstm(self): pred = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] #pred_int = [0.9,1.0] #pred_not_int = [0.0,0.1,0.2] if random.choice(pred) >= 0.5: self.suplearn = 1 self.classifier = 'one' else: self.suplearn = 0 self.classifier = 'zero' def run(self): init = self.init_state() prob = self.init_pred() cost = 0 #print ( 'b shape is,', b.shape ) #print b success = 0 tp = 0 tn = 0 fp = 0 fn = 0 if prob > 0.5: if 'interested' == init: success = 1 tp = 1 print 'Trial was successfull' elif 'not_interested' == init: success = 0 fp = 1 print 'Trial was unsuccessful' else: if 'not_interested' == init: tn = 1 success = 1 print 'Trial was successful' elif 'interested' == init: fn = 1 #print ('finished ') return cost, success, tp, tn, fp, fn def trial_num(self, num): total_success = 0 total_cost = 0 total_tp = 0 total_tn = 0 total_fp = 0 total_fn = 0 for i in range(num): random.seed(i) c, s, tp, tn, fp, fn = self.run() total_cost += c total_success += s total_tp += tp total_tn += tn total_fp += fp total_fn += fn print 'Average total reward is:', total_cost / num print 'Average total success is: ', float(total_success) / num Precision = float(total_tp) / (total_tp + total_fp) print 'Precision is ', Precision Recall = float(total_tp) / (total_tp + total_fn) print 'Recall is ', Recall F1score = 2.0 * Precision * Recall / (Precision + Recall) print 'F1 score', F1score
class Simulator: def __init__(self, pomdpfile='program.pomdp'): self.time = ['morning', 'afternoon', 'evening'] self.location = ['classroom', 'library'] self.identity = ['student', 'professor', 'visitor'] self.intention = ['interested', 'not_interested'] self.reason = Reason('reason0.plog') self.model = Model(filename='program.pomdp', parsing_print_flag=False) self.policy = Policy(5, 4, output='program.policy') self.instance = [] self.results = {} self.learning = Learning('./', 'interposx.csv', 'interposy.csv') self.trajectory_label = 0 def sample(self, alist, distribution): return np.random.choice(alist, p=distribution) def create_instance(self, i): random.seed(i) #person = random.choice(self.identity) person = 'visitor' #print ('\nIdentity (uniform sampling): [student, visitor, professor] : '), person # print ('identity is:'), person if person == 'student': #place ='classroom' place = self.sample(self.location, [0.7, 0.3]) #time ='evening' #time =self.sample(self.time,[0.15,0.15,0.7]) #intention =self.sample(self.intention,[0.3,0.7]) #intention = 'not_interested' elif person == 'professor': place = self.sample(self.location, [0.9, 0.1]) time = self.sample(self.time, [0.8, 0.1, 0.1]) intention = self.sample(self.intention, [0.1, 0.9]) else: #place = self.sample(self.location,[0.2,0.8]) place = 'classroom' #time =self.sample(self.time,[0.1,0.7,0.2]) time = 'afternoon' intention = self.sample(self.intention, [0.8, 0.2]) intention = 'interested' self.trajectory_label = self.learning.get_traj(intention) print('Sampling time, location and intention for the identity: ' + person) self.instance.append(person) self.instance.append(time) #1 self.instance.append(place) #2 self.instance.append(intention) self.instance.append('trajectory with label ' + str(self.trajectory_label)) print('Instance: ') print(self.instance[0], self.instance[1], self.instance[2], self.instance[3], self.instance[4]) return self.instance def observe_fact(self, i): random.seed(i) #print '\nObservations:' time = self.instance[1] location = self.instance[2] #print ('Observed time: '),time #print ('Observed location: '),location return time, location def init_belief(self, int_prob): l = len(self.model.states) b = np.zeros(l) # initialize the beliefs of the states with index=0 evenly int_prob = float(int_prob) init_belief = [0, 0, 1.0 - int_prob, int_prob, 0] b = np.zeros(len(self.model.states)) for i in range(len(self.model.states)): b[i] = init_belief[i] / sum(init_belief) print 'The normalized initial belief would be: ' print b return b def get_state_index(self, state): return self.model.states.index(state) def init_state(self): state = random.choice( ['not_turned_not_interested', 'not_turned_interested']) #print '\nRandomly selected state from [not_forward_not_interested,not_forward_interested] =',state s_idx = self.get_state_index(state) #print s_idx return s_idx, state def get_obs_index(self, obs): return self.model.observations.index(obs) def observe(self, a_idx, intention, pln_obs_acc): p = pln_obs_acc if self.model.actions[ a_idx] == 'move_forward' and intention == 'interested': #obs='physical' obs = self.sample(['pos', 'neg'], [p, 1 - p]) elif self.model.actions[ a_idx] == 'move_forward' and intention == 'not_interested': obs = obs = self.sample(['pos', 'neg'], [1 - p, p]) elif self.model.actions[a_idx] == 'greet' and intention == 'interested': #obs = 'verbal' obs = self.sample(['pos', 'neg'], [p, 1 - p]) elif self.model.actions[ a_idx] == 'greet' and intention == 'not_interested': obs = self.sample(['pos', 'neg'], [1 - p, p]) elif self.model.actions[a_idx] == 'turn' and intention == 'interested': #obs = 'verbal' obs = self.sample(['pos', 'neg'], [p, 1 - p]) elif self.model.actions[ a_idx] == 'turn' and intention == 'not_interested': obs = self.sample(['pos', 'neg'], [1 - p, p]) else: obs = 'na' #l=len(self.model.observations)-1 #o_idx=randint(0,l) o_idx = self.get_obs_index(obs) print('random observation is: ', self.model.observations[o_idx]) return o_idx def update(self, a_idx, o_idx, b): b = np.dot(b, self.model.trans_mat[a_idx, :]) b = [ b[i] * self.model.obs_mat[a_idx, i, o_idx] for i in range(len(self.model.states)) ] b = b / sum(b) return b def run(self, strategy, time, location, r_thresh, l_thresh, pln_obs_acc): a_cnt = 0 success = 0 tp = 0 tn = 0 fp = 0 fn = 0 cost = 0 R = 0 if strategy == 'corpp': #prob = self.reason.query_nolstm(time, location,'reason_nolstm.plog') prob = self.reason.query_nolstm(time, location, 'reason0.plog') print colored('\nSTRATEGY: ', 'red'), colored(strategy, 'red') print '\nOur POMDP Model states are: ' print self.model.states s_idx, temp = self.init_state() b = self.init_belief(prob) #print ( 'b shape is,', b.shape ) #print b while True: a_idx = self.policy.select_action(b) a = self.model.actions[a_idx] a_cnt = a_cnt + 1 if a_cnt > 20: print('POLICY IS NOT REPORTING UNDER 20 ACTIONS') sys.exit() print('action selected', a) o_idx = self.observe(a_idx, self.instance[3], pln_obs_acc) #print ('transition matrix shape is', self.model.trans_mat.shape) #print self.model.trans_mat[a_idx,:,:] #print ('observation matrix shape is', self.model.obs_mat.shape) #print self.model.trans_mat[a_idx,:,:] #print s_idx R = R + self.model.reward_mat[a_idx, s_idx] print 'Reward is : ', cost #print ('Total reward is,' , cost) b = self.update(a_idx, o_idx, b) print b if 'report' in a: if 'not_interested' in a and 'not_interested' == self.instance[ 3]: success = 1 tn = 1 print 'Trial was successfull' elif 'report_interested' in a and 'interested' == self.instance[ 3]: success = 1 tp = 1 print 'Trial was successful' elif 'report_interested' in a and 'not_interested' == self.instance[ 3]: fp = 1 print 'Trial was unsuccessful' elif 'not_interested' in a and 'interested' == self.instance[ 3]: fn = 1 print('Finished\n ') break cost = cost + self.model.reward_mat[a_idx, s_idx] print 'cost is : ', cost if strategy == 'planning': #prob = self.reason.query_nolstm(time, location,'reason_nolstm.plog') print colored('\nSTRATEGY', 'green'), colored(strategy, 'green') print '\nOur POMDP Model states are: ' print self.model.states s_idx, temp = self.init_state() #init_belief = [0.25, 0.25, 0.25, 0.25, 0] #b = np.zeros(len(init_belief)) b = np.ones(len(self.model.states)) for i in range(len(self.model.states)): b[i] = b[i] / len(self.model.states) print 'initial belief', b while True: a_idx = self.policy.select_action(b) a = self.model.actions[a_idx] print('action selected', a) o_idx = self.observe(a_idx, self.instance[3], pln_obs_acc) R = R + self.model.reward_mat[a_idx, s_idx] print 'R is : ', cost #print ('Total reward is,' , cost) b = self.update(a_idx, o_idx, b) print b if 'report' in a: if 'not_interested' in a and 'not_interested' == self.instance[ 3]: success = 1 tn = 1 print 'Trial was successfull' elif 'report_interested' in a and 'interested' == self.instance[ 3]: success = 1 tp = 1 print 'Trial was successful' elif 'report_interested' in a and 'not_interested' == self.instance[ 3]: fp = 1 print 'Trial was unsuccessful' elif 'not_interested' in a and 'interested' == self.instance[ 3]: fn = 1 print('Finished\n ') break cost = cost + self.model.reward_mat[a_idx, s_idx] print 'cost is : ', cost if strategy == 'reasoning': print colored('\nSTRATEGY is: ', 'yellow'), colored(strategy, 'yellow') #prob = self.reason.query_nolstm(time, location,'reason_nolstm.plog') prob = self.reason.query_nolstm(time, location,'reason_nolstm.plog') prob = self.reason.query_nolstm(time, location, 'reason0.plog') print('P(ineterested)| observed time and location: '), prob prob = float(prob) if prob >= r_thresh and 'interested' == self.instance[3]: success = 1 print('Greater than threshold =' + str(r_thresh) + ', -> human IS interested') print 'Trial was successful' tp = 1 elif prob >= r_thresh and 'not_interested' == self.instance[3]: success = 0 fp = 1 print('Greater than threshold =' + str(r_thresh) + ', -> human IS interested') print 'Trial was unsuccessful' elif prob < r_thresh and 'interested' == self.instance[3]: success = 0 print('Less than threshold =' + str(r_thresh) + ', -> human IS NOT interested') fn = 1 print 'Trial was unsuccessful' else: success = 1 print('Less than threshold =' + str(r_thresh) + ', -> human IS NOT interested') print 'Trial was successful' tn = 1 if strategy == 'learning': print colored('\nStrategy is: ', 'blue'), colored(strategy, 'blue') res = self.learning.predict() if res > l_thresh and self.trajectory_label == 1.0: print('CASE I the trajectory shows person is interested') success = 1 tp = 1 elif res < l_thresh and self.trajectory_label == 0: print('CASE II the person is not interested') success = 1 tn = 1 elif res > l_thresh and self.trajectory_label == 0: sucess = 0 fp = 1 print('CASE III the trajectory shows person is interested') elif res < l_thresh and self.trajectory_label == 1.0: fn = 1 success = 0 ('CASE IV the person is not interested') if strategy == 'lstm-corpp': print colored('\nSTRATEGY is: ', 'magenta'), colored(strategy, 'magenta') res = self.learning.predict() #do not change 0.2 below if res > l_thresh: #prob = self.reason.query(time, location,'one','reason.plog') prob = self.reason.query(time, location, 'one', 'reason0.plog') else: prob = self.reason.query(time, location, 'zero', 'reason0.plog') print '\nOur POMDP Model states are: ' print self.model.states s_idx, temp = self.init_state() b = self.init_belief(prob) while True: a_idx = self.policy.select_action(b) a = self.model.actions[a_idx] print('action selected', a) o_idx = self.observe(a_idx, self.instance[3], pln_obs_acc) R = R + self.model.reward_mat[a_idx, s_idx] print 'R is : ', cost #print ('Total reward is,' , cost) b = self.update(a_idx, o_idx, b) print b if 'report' in a: if 'not_interested' in a and 'not_interested' == self.instance[ 3]: success = 1 tn = 1 print 'Trial was successfull' elif 'report_interested' in a and 'interested' == self.instance[ 3]: success = 1 tp = 1 print 'Trial was successful' elif 'report_interested' in a and 'not_interested' == self.instance[ 3]: fp = 1 print 'Trial was unsuccessful' elif 'not_interested' in a and 'interested' == self.instance[ 3]: fn = 1 print('Finished\n ') break cost = cost + self.model.reward_mat[a_idx, s_idx] print 'cost is : ', cost if strategy == 'lreasoning': print colored('\nStrategy is: learning + reasoning ', 'cyan') res = self.learning.predict() if res > l_thresh: prob = self.reason.query(time, location, 'one', 'reason0.plog') else: prob = self.reason.query(time, location, 'zero', 'reason0.plog') lr_thresh = r_thresh prob = float(prob) if prob >= r_thresh and 'interested' == self.instance[3]: success = 1 print('Greater than threshold =' + str(r_thresh) + ', -> human IS interested') print 'Trial was successful' tp = 1 elif prob >= r_thresh and 'not_interested' == self.instance[3]: success = 0 fp = 1 print('Greater than threshold =' + str(r_thresh) + ', -> human IS interested') print 'Trial was unsuccessful' elif prob < r_thresh and 'interested' == self.instance[3]: success = 0 print('Less than threshold =' + str(r_thresh) + ', -> human IS NOT interested') fn = 1 print 'Trial was unsuccessful' else: success = 1 print('Less than threshold =' + str(r_thresh) + ', -> human IS NOT interested') print 'Trial was successful' tn = 1 return cost, success, tp, tn, fp, fn, R def trial_num(self, num, strategylist, r_thresh, l_thresh, pln_obs_acc): df = pd.DataFrame() total_success = {} total_cost = {} total_tp = {} total_tn = {} total_fp = {} total_fn = {} prec = {} recall = {} SDcost = {} reward = {} SDreward = {} for strategy in strategylist: total_success[strategy] = 0 total_cost[strategy] = 0 total_tp[strategy] = 0 total_tn[strategy] = 0 total_fp[strategy] = 0 total_fn[strategy] = 0 prec[strategy] = 0 recall[strategy] = 0 reward[strategy] = 0 SDreward[strategy] = [] SDcost[strategy] = [] for i in range(num): print colored('######TRIAL:', 'blue'), colored(i, 'red'), colored('#######', 'blue') del self.instance[:] self.create_instance(i) time, location = self.observe_fact(i) for strategy in strategylist: c, s, tp, tn, fp, fn, R = self.run(strategy, time, location, r_thresh, l_thresh, pln_obs_acc) reward[strategy] += R total_cost[strategy] += c total_success[strategy] += s total_tp[strategy] += tp total_tn[strategy] += tn total_fp[strategy] += fp total_fn[strategy] += fn SDcost[strategy].append(c) SDreward[strategy].append(R) # print ('total_tp:'), total_tp # print ('total_tn:'), total_tn # print ('total_fp:'), total_fp # print ('total_fn:'), total_fn try: df.at[strategy, 'Reward'] = float(reward[strategy]) / num df.at[strategy, 'SDCost'] = statistics.stdev(SDcost[strategy]) df.at[strategy, 'SDreward'] = statistics.stdev(SDreward[strategy]) df.at[strategy, 'Cost'] = float(total_cost[strategy]) / num df.at[strategy, 'Success'] = float(total_success[strategy]) / num prec[strategy] = round( float(total_tp[strategy]) / (total_tp[strategy] + total_fp[strategy]), 2) recall[strategy] = round( float(total_tp[strategy]) / (total_tp[strategy] + total_fn[strategy]), 2) df.at[strategy, 'Precision'] = prec[strategy] df.at[strategy, 'Recall'] = recall[strategy] df.at[strategy, 'F1 Score'] = round( 2 * prec[strategy] * recall[strategy] / (prec[strategy] + recall[strategy]), 2) except: print 'Can not divide by zero' df.at[strategy, 'Precision'] = 0 df.at[strategy, 'Recall'] = 0 df.at[strategy, 'F1 Score'] = 0 #print 'fp',total_fp['learning'] #print 'fn',total_fn['learning'] #self.instance =[] return df def print_results(self, df): print '\nWRAP UP OF RESULTS:' print df
class Corrp(): def __init__(self, pomdpfile='program.pomdp'): #self.obj=Reason() self.obj = None self.model = Model(filename='program.pomdp', parsing_print_flag=False) self.policy = Policy(5, 4, output='program.policy') print self.model.states self.decision = None self.time = None self.location = None self.suplearn = None def init_belief(self): #remember later to change it to override method self.obj = Reason() time = [ '(currenttime=morning)', '(currenttime=afternoon)', '(currenttime=evening)' ] location = ['(atlocation=classroom)', '(atlocation=library)'] #lstm = ['(classifier=zero)','(classifier=one)'] decision = ['interested', 'not_interested'] time_rand = random.choice(time) loc_rand = random.choice(location) #lstm_rand = random.choice(lstm) lstm_rand = self.lstm() int_prob = float( self.obj.query('reason.plog', self.decision, time_rand, lstm_rand, loc_rand)) self.obj.delete('reason.plog') print int_prob init_belief = [1.0 - int_prob, int_prob, 1.0 - int_prob, int_prob, 0] b = np.zeros(len(init_belief)) for i in range(len(init_belief)): b[i] = init_belief[i] / sum(init_belief) print b return b def init_state(self): #state=random.choice(['not_forward_not_interested','not_forward_interested']) self.decision = random.choice(['interested', 'not_interested']) print 'the random decision is:', self.decision self.time = random.choice(['morning', 'afternoon', 'evening']) print 'The random time is ', self.time self.location = random.choice(['library', 'classroom']) print 'The random location is : ', self.location self.lstm() print 'The classifier output is: ', self.classifier if self.decision == 'interested': state = random.choice( ['not_forward_interested', 'forward_interested']) else: state = random.choice( ['not_forward_not_interested', 'forward_not_interested']) print state s_idx = self.get_state_index(state) print s_idx return s_idx, state def get_state_index(self, state): return self.model.states.index(state) def get_obs_index(self, obs): return self.model.observations.index(obs) def random_observe(self, a_idx): if self.model.actions[a_idx] == 'move_forward': obs = random.choice(['physical', 'no_physical']) elif self.model.actions[a_idx] == 'greet': obs = random.choice(['verbal', 'no_verbal']) else: obs = 'na' #l=len(self.model.observations)-1 #o_idx=randint(0,l) o_idx = self.get_obs_index(obs) print('random observation is: ', self.model.observations[o_idx]) return o_idx def observe_logical(self, a_idx): if self.model.actions[ a_idx] == 'move_forward' and self.decision == 'interested': obs = 'physical' elif self.model.actions[ a_idx] == 'move_forward' and self.decision == 'not_interested': obs = 'no_physical' elif self.model.actions[ a_idx] == 'greet' and self.decision == 'interested': obs = 'verbal' elif self.model.actions[ a_idx] == 'greet' and self.decision == 'not_interested': obs = 'no_verbal' else: obs = 'na' #l=len(self.model.observations)-1 #o_idx=randint(0,l) o_idx = self.get_obs_index(obs) print('random observation is: ', self.model.observations[o_idx]) return o_idx #def update(self, a_idx,o_idx,b ): # temp = np.matmul(self.model.trans_mat[0,:,:],self.model.obs_mat[0,:,:]) # temp = np.matmul(b,temp) # b = temp/np.sum(temp) # return b def update(self, a_idx, o_idx, b): b = np.dot(b, self.model.trans_mat[a_idx, :]) b = [ b[i] * self.model.obs_mat[a_idx, i, o_idx] for i in range(len(self.model.states)) ] b = b / sum(b) return b def lstm(self): predone = [1, 1, 1, 1, 1, 1, 1, 0, 0, 0] prezero = [0, 0, 0, 0, 0, 0, 0, 1, 1, 1] if self.decision == 'interested': if random.choice(predone) == 1: self.classifier = 'one' return '(classifier=one)' else: self.classifier = 'zero' return '(classifier=zero)' else: if random.choice(predone) == 1: self.classifier = 'one' return '(classifier=one)' else: self.classifier = 'zero' return '(classifier=zero)' #self.suplearn = 0 #self.classifier = 'zero' def run(self): s_idx, temp = self.init_state() b = self.init_belief() cost = 0 #print ( 'b shape is,', b.shape ) #print b while True: a_idx = self.policy.select_action(b) a = self.model.actions[a_idx] print('action selected', a) o_idx = self.random_observe(a_idx) #o_idx= self.observe_logical(a_idx) #print ('transition matrix shape is', self.model.trans_mat.shape) #print self.model.trans_mat[a_idx,:,:] #print ('observation matrix shape is', self.model.obs_mat.shape) #print self.model.trans_mat[a_idx,:,:] print s_idx cost = cost + self.model.reward_mat[a_idx, s_idx] print('Total reward is,', cost) b = self.update(a_idx, o_idx, b) print b success = 0 tp = 0 tn = 0 fp = 0 fn = 0 if 'report' in a: if 'not_interested' in a and 'not_interested' in temp: success = 1 tn = 1 print 'Trial was successfull' elif 'report_interested' in a and 'forward_interested' in temp: success = 1 tp = 1 print 'Trial was successful' elif 'report_interested' in a and 'forward_not_interested' in temp: fp = 1 print 'Trial was unsuccessful' elif 'not_interested' in a and 'forward_interested' in temp: fn = 1 print('finished ') break return cost, success, tp, tn, fp, fn def trial_num(self, num): total_success = 0 total_cost = 0 total_tp = 0 total_tn = 0 total_fp = 0 total_fn = 0 for i in range(num): random.seed(i) c, s, tp, tn, fp, fn = self.run() total_cost += c total_success += s total_tp += tp total_tn += tn total_fp += fp total_fn += fn print 'Average total reward is:', total_cost / num print 'Average total success is: ', float(total_success) / num Precision = float(total_tp) / (total_tp + total_fp) print 'Precision is ', Precision Recall = float(total_tp) / (total_tp + total_fn) print 'Recall is ', Recall F1score = 2.0 * Precision * Recall / (Precision + Recall) print 'F1 score', F1score