def main(argv): install_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) utils_dirname = os.path.join(install_path,'lib') sys.path.append(utils_dirname) from dataset_walker import dataset_walker list_dir = os.path.join(install_path,'config') parser = argparse.ArgumentParser(description='Evaluate output from a belief tracker.') parser.add_argument('--dataset', dest='dataset', action='store', metavar='DATASET', required=True, help='The dataset to analyze') parser.add_argument('--dataroot',dest='dataroot',action='store', metavar='PATH', required=True, help='Will look for corpus in <destroot>/<dataset>/...') parser.add_argument('--trackfile',dest='scorefile',action='store',metavar='JSON_FILE',required=True, help='File containing score JSON') parser.add_argument('--scorefile',dest='csv',action='store',metavar='CSV_FILE',required=True, help='File to write with CSV scoring data') parser.add_argument('--ontology',dest='ontology',action='store',metavar='JSON_FILE',required=True, help='JSON Ontology file') parser.add_argument('--rocdump',dest='rocdump',action='store',metavar='FILE_STEM', help='If present, use this file stem to write out ROC plot data: filestem.<schedule>.<slot>.<type>.csv, where type is either roc (which contains the ROC curve coordinates) or scores (which contains the raw scores used to compute the ROC curves).') args = parser.parse_args() sessions = dataset_walker(args.dataset,dataroot=args.dataroot,labels=True) tracker_output = json.load(open(args.scorefile)) ontology = json.load(open(args.ontology)) slots_informable = ontology["informable"].keys() slots_requestable = ontology["requestable"] csvfile = open(args.csv,'w') # what stats are there? stats = [] stat_classes = [Stat_Accuracy, Stat_Probs, Stat_MRR, Stat_Updates, Stat_ROC] for schedule in SCHEDULES: for label_scheme in LABEL_SCHEMES: for component in ['goal','requested', 'method', 'all']: if component == 'goal' : for slot in slots_informable + ['all','joint','joint_independent'] : for stat_class in stat_classes: stats.append((('goal', slot), (schedule, label_scheme), stat_class())) elif component == 'requested' : if label_scheme != "a" : continue for slot in slots_requestable + ['all'] : for stat_class in stat_classes: stats.append((('requested', slot), (schedule, label_scheme), stat_class())) elif component == 'method' : for stat_class in stat_classes: stats.append((('method',), (schedule, label_scheme), stat_class())) elif component == 'all' : for stat_class in stat_classes: stats.append((('all',), (schedule, label_scheme), stat_class())) turn_counter = 0.0 for session_num, (session_tracker, session) in enumerate(zip(tracker_output['sessions'], sessions)): for _, _, stat_class in stats: stat_class.newDialog() session_id = session.log['session-id'] try: # these are the set of slots 'mentioned so far', i.e. for schedule2 S = defaultdict(lambda : set([])) S_requested = set([]) session_length = len(session) goal_labels_b, method_labels_b = misc.LabelsB(session, ontology) method_schedule_2 = False # whether schedule 2 is active for method for turn_num, ((log_turn,label_turn),_tracker_turn) in enumerate(zip(session,session_tracker['turns'])): turn_counter += 1.0 S_new = misc.S(log_turn, ontology) for slot in S_new : S[slot] = S[slot].union(S_new[slot]) # remove just informed slots from S_requested S_requested = S_requested.difference(misc.SysInformed(log_turn)) # add in ones from slu hyps S_requested = S_requested.union(set(misc.S_requested(log_turn))) tracker_goal_labels = _tracker_turn["goal-labels"] for slot in slots_informable: if slot in tracker_goal_labels : tracker_goal_labels[slot] = normalise_dist(tracker_goal_labels[slot].items(), (session_id, turn_num, "goal."+slot)) else : tracker_goal_labels[slot] = [(None, 1.0)] # prepare for joint goals scoring: tracker_goal_joint_labels = "independent" if "goal-labels-joint" in _tracker_turn : tracker_goal_joint_labels = _tracker_turn["goal-labels-joint"] if tracker_goal_joint_labels != "independent" : # tracker_goal_joint_labels must be a list of joint hyps tracker_goal_joint_labels = [(hyp["slots"], hyp["score"]) for hyp in tracker_goal_joint_labels] tracker_goal_joint_labels = normalise_dist(tracker_goal_joint_labels, (session_id, turn_num, "goal.joint")) # also gather the correct joint label true_goal_joint = None for slot in label_turn["goal-labels"]: if true_goal_joint == None : true_goal_joint = {} true_goal_joint[slot] = label_turn["goal-labels"][slot] true_goal_joint_b = None for slot in goal_labels_b[turn_num]: if true_goal_joint_b == None : true_goal_joint_b = {} true_goal_joint_b[slot] = goal_labels_b[turn_num][slot] tracker_requested_slots = _tracker_turn["requested-slots"] for slot in tracker_requested_slots: dist = [(True, tracker_requested_slots[slot]), (False,1.0-tracker_requested_slots[slot])] tracker_requested_slots[slot] = normalise_dist(dist, (session_id, turn_num, "requested."+slot)) tracker_method_label = normalise_dist(_tracker_turn["method-label"].items(), (session_id, turn_num,"method")) # for method schedule 2, work out whether any slu-hyp has been given # which informs the method: if not method_schedule_2 : mact = log_turn["output"]["dialog-acts"] for slu_hyp in log_turn["input"]["live"]["slu-hyps"] : user_act = slu_hyp["slu-hyp"] method_label = misc.MethodLabel(user_act, mact) if method_label != "none" : method_schedule_2 = True break for component, (schedule, label_scheme), stat_class in stats: if component[0] == "goal" and (component[1] == "joint" or component[1] == "joint_independent"): if schedule == 2: # calculate schedule2 applicability applies = False for slot in slots_informable: if len(S[slot]) > 0: applies = True break if not applies : continue this_true_label = true_goal_joint if label_scheme == "b" : this_true_label = true_goal_joint_b if tracker_goal_joint_labels == "independent" or component[1] == "joint_independent" : stat_class.add(tracker_goal_labels, this_true_label, (session_id, turn_num, component, schedule, label_scheme), independent=True) else : stat_class.add(tracker_goal_joint_labels, this_true_label, (session_id, turn_num, component, schedule, label_scheme)) if (component[0] == "goal" or component[0] == "all") and (len(component)==1 or ("joint" not in component[1])) : if component[0] == "all" or component[1] == "all" : slots = slots_informable[:] else : slots = [component[1]] for slot in slots: if schedule ==2 and len(S[slot]) == 0 : continue dist = tracker_goal_labels[slot] true_label = None if slot in label_turn["goal-labels"] : true_label = label_turn["goal-labels"][slot] if label_scheme == "b" : true_label = None if slot in goal_labels_b[turn_num] : true_label = goal_labels_b[turn_num][slot] stat_class.add(dist, true_label, (session_id, turn_num, component, schedule, label_scheme)) if component[0] == "requested" or component[0] == "all" : if component[0] == "all" or component[1] == "all": slots = slots_requestable[:] else : slots = [component[1]] for slot in slots: if schedule ==2 and (slot not in S_requested): continue dist = [(False,1.0), (True,0.0)] if slot in tracker_requested_slots : dist = tracker_requested_slots[slot] true_label = (slot in label_turn["requested-slots"]) stat_class.add(dist, true_label, (session_id, turn_num, component, schedule, label_scheme)) if component[0] == "method" or component[0] == "all": if schedule == 2 and not method_schedule_2: continue # no slu hyp informing the method has been given yet. dist = tracker_method_label true_label = label_turn["method-label"] if label_scheme == "b" : true_label = method_labels_b[turn_num] stat_class.add(dist, true_label, (session_id, turn_num, component, schedule, label_scheme)) except KeyboardInterrupt : raise except: traceback.print_exc(file=sys.stdout) print "While scoring " + str(session_id) # output to csv print >>csvfile,( "state_component, stat, schedule, label_scheme, N, result") for stat in stats: component, (schedule, label_scheme), stat_class = stat results = stat_class.results() for stat_subname, N, result in results: if result == None : result = "-" else : result = "%.7f"%result print >>csvfile,( "%s, %s, %i, %s, %i, %s"%(".".join(component), stat_subname, schedule, label_scheme, N, result)) if isinstance(stat_class, Stat_ROC) and (args.rocdump): rocfile = args.rocdump + '.schedule' + str(schedule) + str(label_scheme)+'.' + (".".join(component)) + '.roc.csv' scoresfile = args.rocdump + '.schedule' + str(schedule) + str(label_scheme)+'.' + (".".join(component)) + '.scores.csv' stat_class.DumpROCToFile(rocfile) stat_class.DumpScoresToFile(scoresfile) print >>csvfile,'basic,total_wall_time,,,,%s' % (tracker_output['wall-time']) print >>csvfile,'basic,sessions,,,,%s' % (len(sessions)) print >>csvfile,'basic,turns,,,,%i' % (int(turn_counter)) print >>csvfile,'basic,wall_time_per_turn,,,,%s' % (tracker_output['wall-time'] / turn_counter) print >>csvfile,'basic,dataset,,,,%s' % (tracker_output['dataset'] ) csvfile.close()
self.labels_filename = labels_filename f = open(applog_filename) self.log = json.load(f) f.close() if (labels_filename != None): f = open(labels_filename) self.labels = json.load(f) f.close() else: self.labels = None def __iter__(self): if (self.labels_filename != None): for (log, labels) in zip(self.log['turns'], self.labels['turns']): yield (log, labels) else: for log in self.log['turns']: yield (log, None) def __len__(self, ): return len(self.log['turns']) if __name__ == '__main__': import misc dataset = dataset_walker("HDCCN", dataroot="data", labels=True) for call in dataset: if call.log["session-id"] == "voip-f32f2cfdae-130328_192703": for turn, label in call: print(misc.S(turn))
self.labels_filename = labels_filename f = open(applog_filename) self.log = json.load(f) f.close() if (labels_filename != None): f = open(labels_filename) self.labels = json.load(f) f.close() else: self.labels = None def __iter__(self): if (self.labels_filename != None): for (log,labels) in zip(self.log['turns'],self.labels['turns']): yield (log,labels) else: for log in self.log['turns']: yield (log,None) def __len__(self, ): return len(self.log['turns']) if __name__ == '__main__': import misc dataset = dataset_walker("HDCCN", dataroot="data", labels=True) for call in dataset : if call.log["session-id"]=="voip-f32f2cfdae-130328_192703" : for turn, label in call : print misc.S(turn)