def create_single_dataset(database, path, adj_obj): """ Creates a pickle file dataset for each motions for all objects with a particular adjective """ adjective = adj_obj.adjective #import pdb; pdb.set_trace() # Test to see if any phase files already exist and only create the ones that don't, or skip all all_phase_list = phases phase_list = [] nobuild = 0 for phase in all_phase_list: phase_obj = PhaseClass() phase_obj.phase = phase; dataset_file_name = "_".join(("hmm_feature", adjective,phase))+".pkl" phase_obj.path_name = os.path.join(path, dataset_file_name) if os.path.exists(phase_obj.path_name): print "File %s already exists, skipping it." % phase_obj.path_name phase_obj.build = False nobuild = nobuild + 1 else: phase_obj.build = True print "Creating adjective %s and phase %s" % (adjective, phase) phase_list.append(phase_obj) if nobuild == 4: print "All phases of adjective %s are already built. Moving on..." % adjective return # Open database and get train/test split database = tables.openFile(database) train_objs, test_objs = get_train_test_objects(database, adjective) # Select the features from the feature objects feature_train_dict_list = create_hmm_feature_set(database, train_objs, adj_obj, phase_list) feature_test_dict_list = create_hmm_feature_set(database, test_objs, adj_obj, phase_list) for i, phase_object in enumerate(phase_list): if phase_object.build == True: # Store the train/test in a dataset #import pdb; pdb.set_trace() dataset = defaultdict(dict) dataset['train'] = feature_train_dict_list[i] dataset['test'] = feature_test_dict_list[i] if len(dataset) is 0: print "Empty dataset for adj %s and phase %s" % (adjective, phase_object.phase) continue print "Saving dataset to file" #import pdb; pdb.set_trace() # Save the results in the folder #Saves one file per motion. This needs to be a for loop with open(phase_object.path_name, "w") as f: print "Saving file: ", phase_object.path_name cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
import pdb pdb.set_trace() # Initialize scores f1s = 0 precs = 0 recalls = 0 total = 0 # Setup text file to store values to adjective_report = open("adjective_score_report.txt", "w") adjective_report.write("Adjective, precision, recall, f1\n") for classifier in classifiers: try: # Pull out the objects that we want train_objs, test_objs = get_train_test_objects( database, classifier.adjective) # Compute score for each adjective p, r, f1 = test_adjective(classifier, database, test_objs, adjective_report) precs += p recalls += r f1s += f1 total += 1 except ValueError: print "Skipping values" continue adjective_report.close()
def create_single_dataset(database, path, adj_obj): """ Creates a pickle file dataset for each motions for all objects with a particular adjective """ adjective = adj_obj.adjective #import pdb; pdb.set_trace() # Test to see if any phase files already exist and only create the ones that don't, or skip all all_phase_list = phases phase_list = [] nobuild = 0 for phase in all_phase_list: phase_obj = PhaseClass() phase_obj.phase = phase dataset_file_name = "_".join( ("hmm_feature", adjective, phase)) + ".pkl" phase_obj.path_name = os.path.join(path, dataset_file_name) if os.path.exists(phase_obj.path_name): print "File %s already exists, skipping it." % phase_obj.path_name phase_obj.build = False nobuild = nobuild + 1 else: phase_obj.build = True print "Creating adjective %s and phase %s" % (adjective, phase) phase_list.append(phase_obj) if nobuild == 4: print "All phases of adjective %s are already built. Moving on..." % adjective return # Open database and get train/test split database = tables.openFile(database) train_objs, test_objs = get_train_test_objects(database, adjective) # Select the features from the feature objects feature_train_dict_list = create_hmm_feature_set(database, train_objs, adj_obj, phase_list) feature_test_dict_list = create_hmm_feature_set(database, test_objs, adj_obj, phase_list) for i, phase_object in enumerate(phase_list): if phase_object.build == True: # Store the train/test in a dataset #import pdb; pdb.set_trace() dataset = defaultdict(dict) dataset['train'] = feature_train_dict_list[i] dataset['test'] = feature_test_dict_list[i] if len(dataset) is 0: print "Empty dataset for adj %s and phase %s" % ( adjective, phase_object.phase) continue print "Saving dataset to file" #import pdb; pdb.set_trace() # Save the results in the folder #Saves one file per motion. This needs to be a for loop with open(phase_object.path_name, "w") as f: print "Saving file: ", phase_object.path_name cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
import pdb; pdb.set_trace() # Initialize scores f1s= 0 precs = 0 recalls = 0 total = 0 # Setup text file to store values to adjective_report = open("adjective_score_report.txt", "w") adjective_report.write("Adjective, precision, recall, f1\n") for classifier in classifiers: try: # Pull out the objects that we want train_objs, test_objs = get_train_test_objects(database, classifier.adjective) # Compute score for each adjective p, r, f1 = test_adjective(classifier, database, test_objs, adjective_report) precs += p recalls += r f1s += f1 total += 1 except ValueError: print "Skipping values" continue adjective_report.close() print "Average f1s: ", f1s / total