def orig_train_adjective_phase_classifier(path, adjective, all_features): """ Example function on how to access all of the features stored in adjective_phase_set """ # File name dataset_file_name = "_".join(("trained", adjective)) + ".pkl" newpath = os.path.join(path, "trained_adjectives") path_name = os.path.join(newpath, dataset_file_name) if os.path.exists(path_name): print "File %s already exists, skipping it." % path_name return print "Creating adjective %s" % adjective train_X = [] for phase in phases: train_set = all_features[adjective][phase]['train'] train_X.append(train_set['features']) train_Y = train_set['labels'] object_ids = train_set['object_ids'] train_X = np.concatenate(train_X, axis=1) print "Training adjective %s" % adjective if True: trained_clf, scaler = utilities.train_svm_gridsearch( train_X=train_X, train_Y=train_Y, verbose=True, object_ids=object_ids, n_jobs=6, scale=True) else: trained_clf = utilities.train_gradient_boost( train_X=train_X, train_Y=train_Y, object_ids=object_ids, ) dataset = all_features[adjective] dataset['adjective'] = adjective dataset['classifier'] = trained_clf dataset['scaler'] = scaler print "Saving trained_classifier" # Save the results in the folder with open(path_name, "w") as f: print "Saving file: ", path_name cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
def orig_train_adjective_phase_classifier(path, adjective, all_features): """ Example function on how to access all of the features stored in adjective_phase_set """ # File name dataset_file_name = "_".join(("trained", adjective))+".pkl" newpath = os.path.join(path, "trained_adjectives") path_name = os.path.join(newpath, dataset_file_name) if os.path.exists(path_name): print "File %s already exists, skipping it." % path_name return print "Creating adjective %s" % adjective train_X = [] for phase in phases: train_set = all_features[adjective][phase]['train'] train_X.append(train_set['features']) train_Y = train_set['labels'] object_ids = train_set['object_ids'] train_X = np.concatenate(train_X, axis=1) print "Training adjective %s" % adjective if True: trained_clf,scaler = utilities.train_svm_gridsearch(train_X = train_X, train_Y = train_Y, verbose=True, object_ids = object_ids, n_jobs = 6, scale = True ) else: trained_clf = utilities.train_gradient_boost(train_X = train_X, train_Y = train_Y, object_ids = object_ids, ) dataset = all_features[adjective] dataset['adjective'] = adjective dataset['classifier'] = trained_clf dataset['scaler'] = scaler print "Saving trained_classifier" # Save the results in the folder with open(path_name, "w") as f: print "Saving file: ", path_name cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
def orig_train_adjective_phase_classifier(path, adjective, all_features): """ Example function on how to access all of the features stored in adjective_phase_set """ # File name dataset_file_name = "_".join(("trained", adjective)) + ".pkl" newpath = os.path.join(path, "trained_adjectives_univ_deep_search") path_name = os.path.join(newpath, dataset_file_name) if os.path.exists(path_name): print "File %s already exists, skipping it." % path_name return print "Creating adjective %s" % adjective train_X = [] for phase in phases: train_set = all_features[adjective][phase]["train"] train_X.append(train_set["features"]) train_Y = train_set["labels"] object_ids = train_set["object_ids"] train_X = np.concatenate(train_X, axis=1) """ # Scale the data scaler = preprocessing.StandardScaler().fit(train_X) train_X = scaler.transform(train_X) all_features[adjective]['scaler'] = scaler all_features[adjective]['train'] = train_X # store off scaled # Remove features! all_features[adjective]['tree_features'] = remove_feature_tree_based(train_X,train_Y) print np.shape(train_X) train_X = all_features[adjective]['tree_features'][1]; # transformed features print np.shape(train_X) print "Training adjective %s" % adjective """ if sum(train_Y) < 180: trained_clf, scaler = utilities.train_univariate_selection( train_X, train_Y, verbose=True, object_ids=object_ids, n_jobs=6, scale=True ) print trained_clf else: trained_clf, scaler = utilities.train_svm_gridsearch( train_X=train_X, train_Y=train_Y, verbose=True, object_ids=object_ids, n_jobs=6, scale=True ) dataset = all_features[adjective] dataset["scaler"] = scaler dataset["adjective"] = adjective dataset["classifier"] = trained_clf print "Saving trained_classifier" # Save the results in the folder with open(path_name, "w") as f: print "Saving file: ", path_name cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
def orig_train_adjective_phase_classifier(path, adjective, all_features): """ Example function on how to access all of the features stored in adjective_phase_set """ # File name dataset_file_name = "_".join(("trained", adjective)) + ".pkl" newpath = os.path.join(path, "trained_adjectives_tree") path_name = os.path.join(newpath, dataset_file_name) if os.path.exists(path_name): print "File %s already exists, skipping it." % path_name return print "Creating adjective %s" % adjective train_X = [] for phase in phases: train_set = all_features[adjective][phase]['train'] train_X.append(train_set['features']) train_Y = train_set['labels'] object_ids = train_set['object_ids'] train_X = np.concatenate(train_X, axis=1) # Scale the data scaler = preprocessing.StandardScaler().fit(train_X) train_X = scaler.transform(train_X) all_features[adjective]['scaler'] = scaler all_features[adjective]['train'] = train_X # store off scaled # Remove features! all_features[adjective]['tree_features'] = remove_feature_tree_based( train_X, train_Y) print np.shape(train_X) train_X = all_features[adjective]['tree_features'][1] # transformed features print np.shape(train_X) print "Training adjective %s" % adjective ''' trained_clf, scaler = utilities.train_univariate_selection(train_X,train_Y, verbose=True, object_ids = object_ids, n_jobs = 6, scale = True ) all_features[adjective][phase]['scaler'] = scaler all_features[adjective][phase]['univ_select'] = trained_clf print trained_clf ''' if True: trained_clf, scaler = utilities.train_svm_gridsearch( train_X=train_X, train_Y=train_Y, verbose=True, object_ids=object_ids, n_jobs=6, scale=False) else: trained_clf = utilities.train_gradient_boost( train_X=train_X, train_Y=train_Y, object_ids=object_ids, ) dataset = all_features[adjective] dataset['adjective'] = adjective dataset['classifier'] = trained_clf print "Saving trained_classifier" # Save the results in the folder with open(path_name, "w") as f: print "Saving file: ", path_name cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
def original_train_adjective_phase_classifier(path1, path2, adjective, phase, all_features1, all_features2, boost): """ Example function on how to access all of the features stored in adjective_phase_set """ # File name dataset_file_name = "_".join(("trained", adjective, phase))+".pkl" newpath = os.path.join(path1, "trained_adjective_phase_merge") path_name = os.path.join(newpath, dataset_file_name) if os.path.exists(path_name): print "File %s already exists, skipping it." % path_name return print "Creating adjective %s and phase %s" % (adjective, phase) # First set of features train_set = all_features1[adjective][phase]['train'] train_X1 = train_set['features'] train_Y = train_set['labels'] object_ids = train_set['object_ids'] # test set test_set1 = all_features1[adjective][phase]['test'] test_X1 = test_set1['features'] test_set2 = all_features2[adjective][phase]['test'] test_X2 = test_set2['features'] test_X = np.concatenate((test_X1,test_X2),axis=1) all_features1[adjective][phase]['test'] = test_X # Second set of features train_set2 = all_features2[adjective][phase]['train'] train_X2 = train_set2['features'] # Merge the two for a nx51 vector train_X = np.concatenate((train_X1, train_X2), axis=1) print "Training adjective %s and phase %s" %(adjective, phase) if not boost: trained_clf, scaler = utilities.train_svm_gridsearch(train_X = train_X, train_Y = train_Y, verbose=True, object_ids = object_ids, n_jobs = 6, scale = True ) else: trained_clf, scaler = utilities.train_gradient_boost(train_X = train_X, train_Y = train_Y, object_ids = object_ids, verbose = True, n_jobs = 6, scale = True ) dataset = all_features1[adjective][phase] dataset['adjective'] = adjective dataset['phase'] = phase dataset['classifier'] = trained_clf dataset['scaler'] = scaler print "Saving trained_classifier" # Save the results in the folder with open(path_name, "w") as f: print "Saving file: ", path_name cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
def original_train_adjective_phase_classifier(path1, path2, adjective, phase, all_features1, all_features2, boost): """ Example function on how to access all of the features stored in adjective_phase_set """ # File name dataset_file_name = "_".join(("trained", adjective, phase)) + ".pkl" newpath = os.path.join(path1, "trained_adjective_phase_merge") path_name = os.path.join(newpath, dataset_file_name) if os.path.exists(path_name): print "File %s already exists, skipping it." % path_name return print "Creating adjective %s and phase %s" % (adjective, phase) # First set of features train_set = all_features1[adjective][phase]['train'] train_X1 = train_set['features'] train_Y = train_set['labels'] object_ids = train_set['object_ids'] # test set test_set1 = all_features1[adjective][phase]['test'] test_X1 = test_set1['features'] test_set2 = all_features2[adjective][phase]['test'] test_X2 = test_set2['features'] test_X = np.concatenate((test_X1, test_X2), axis=1) all_features1[adjective][phase]['test'] = test_X # Second set of features train_set2 = all_features2[adjective][phase]['train'] train_X2 = train_set2['features'] # Merge the two for a nx51 vector train_X = np.concatenate((train_X1, train_X2), axis=1) print "Training adjective %s and phase %s" % (adjective, phase) if not boost: trained_clf, scaler = utilities.train_svm_gridsearch( train_X=train_X, train_Y=train_Y, verbose=True, object_ids=object_ids, n_jobs=6, scale=True) else: trained_clf, scaler = utilities.train_gradient_boost( train_X=train_X, train_Y=train_Y, object_ids=object_ids, verbose=True, n_jobs=6, scale=True) dataset = all_features1[adjective][phase] dataset['adjective'] = adjective dataset['phase'] = phase dataset['classifier'] = trained_clf dataset['scaler'] = scaler print "Saving trained_classifier" # Save the results in the folder with open(path_name, "w") as f: print "Saving file: ", path_name cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)