def orig_train_adjective_phase_classifier(path, adjective, all_features): """ Example function on how to access all of the features stored in adjective_phase_set """ # File name dataset_file_name = "_".join(("trained", adjective)) + ".pkl" newpath = os.path.join(path, "trained_adjectives_univ_deep_search") path_name = os.path.join(newpath, dataset_file_name) if os.path.exists(path_name): print "File %s already exists, skipping it." % path_name return print "Creating adjective %s" % adjective train_X = [] for phase in phases: train_set = all_features[adjective][phase]["train"] train_X.append(train_set["features"]) train_Y = train_set["labels"] object_ids = train_set["object_ids"] train_X = np.concatenate(train_X, axis=1) """ # Scale the data scaler = preprocessing.StandardScaler().fit(train_X) train_X = scaler.transform(train_X) all_features[adjective]['scaler'] = scaler all_features[adjective]['train'] = train_X # store off scaled # Remove features! all_features[adjective]['tree_features'] = remove_feature_tree_based(train_X,train_Y) print np.shape(train_X) train_X = all_features[adjective]['tree_features'][1]; # transformed features print np.shape(train_X) print "Training adjective %s" % adjective """ if sum(train_Y) < 180: trained_clf, scaler = utilities.train_univariate_selection( train_X, train_Y, verbose=True, object_ids=object_ids, n_jobs=6, scale=True ) print trained_clf else: trained_clf, scaler = utilities.train_svm_gridsearch( train_X=train_X, train_Y=train_Y, verbose=True, object_ids=object_ids, n_jobs=6, scale=True ) dataset = all_features[adjective] dataset["scaler"] = scaler dataset["adjective"] = adjective dataset["classifier"] = trained_clf print "Saving trained_classifier" # Save the results in the folder with open(path_name, "w") as f: print "Saving file: ", path_name cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
def orig_train_adjective_phase_classifier(path, adjective, all_features): """ Example function on how to access all of the features stored in adjective_phase_set """ # File name dataset_file_name = "_".join(("trained", adjective))+".pkl" newpath = os.path.join(path, "trained_adjectives_univ_deep_search") path_name = os.path.join(newpath, dataset_file_name) if os.path.exists(path_name): print "File %s already exists, skipping it." % path_name return print "Creating adjective %s" % adjective train_X = [] for phase in phases: train_set = all_features[adjective][phase]['train'] train_X.append(train_set['features']) train_Y = train_set['labels'] object_ids = train_set['object_ids'] train_X = np.concatenate(train_X, axis=1) ''' # Scale the data scaler = preprocessing.StandardScaler().fit(train_X) train_X = scaler.transform(train_X) all_features[adjective]['scaler'] = scaler all_features[adjective]['train'] = train_X # store off scaled # Remove features! all_features[adjective]['tree_features'] = remove_feature_tree_based(train_X,train_Y) print np.shape(train_X) train_X = all_features[adjective]['tree_features'][1]; # transformed features print np.shape(train_X) print "Training adjective %s" % adjective ''' if sum(train_Y) < 180: trained_clf, scaler = utilities.train_univariate_selection(train_X,train_Y, verbose=True, object_ids = object_ids, n_jobs = 6, scale = True ) print trained_clf else: trained_clf,scaler = utilities.train_svm_gridsearch(train_X = train_X, train_Y = train_Y, verbose=True, object_ids = object_ids, n_jobs = 6, scale = True ) dataset = all_features[adjective] dataset['scaler'] = scaler dataset['adjective'] = adjective dataset['classifier'] = trained_clf print "Saving trained_classifier" # Save the results in the folder with open(path_name, "w") as f: print "Saving file: ", path_name cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
def train_adjective_phase_classifier(path, adjective, phase, all_features, boost): """ Example function on how to access all of the features stored in adjective_phase_set """ # File name dataset_file_name = "_".join(("trained", adjective, phase))+".pkl" newpath = os.path.join(path, "trained_adjective_phase_univ") path_name = os.path.join(newpath, dataset_file_name) if os.path.exists(path_name): print "File %s already exists, skipping it." % path_name return print "Creating adjective %s and phase %s" % (adjective, phase) train_set = all_features[adjective][phase]['train'] train_X = train_set['features'] train_Y = train_set['labels'] object_ids = train_set['object_ids'] ''' Use for tree selection # Scale the data scaler = preprocessing.StandardScaler().fit(train_X) train_X = scaler.transform(train_X) all_features[adjective][phase]['scaler'] = scaler all_features[adjective][phase]['train'] = train_X # store off scaled print "Training adjective %s and phase %s" %(adjective, phase) # Remove features! all_features[adjective][phase]['tree_features'] = remove_feature_tree_based(train_X,train_Y) print np.shape(train_X) train_X = all_features[adjective][phase]['tree_features'][1]; # transformed features print np.shape(train_X) ''' trained_clf, scaler = utilities.train_univariate_selection(train_X,train_Y, verbose=True, object_ids = object_ids, n_jobs = 6, scale = True ) all_features[adjective][phase]['scaler'] = scaler all_features[adjective][phase]['tree_features'] = trained_clf print trained_clf ''' if not boost: trained_clf, scaler = utilities.train_svm_gridsearch(train_X = train_X, train_Y = train_Y, verbose=True, object_ids = object_ids, n_jobs = 6, scale = False ) else: trained_clf, scaler = utilities.train_gradient_boost(train_X = train_X, train_Y = train_Y, object_ids = object_ids, verbose = True, n_jobs = 6, scale = False ) ''' dataset = all_features[adjective][phase] dataset['adjective'] = adjective dataset['phase'] = phase dataset['classifier'] = trained_clf print "Saving trained_classifier" # Save the results in the folder with open(path_name, "w") as f: print "Saving file: ", path_name cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)
def train_adjective_phase_classifier(path, adjective, phase, all_features, boost): """ Example function on how to access all of the features stored in adjective_phase_set """ # File name dataset_file_name = "_".join(("trained", adjective, phase)) + ".pkl" newpath = os.path.join(path, "trained_adjective_phase_univ") path_name = os.path.join(newpath, dataset_file_name) if os.path.exists(path_name): print "File %s already exists, skipping it." % path_name return print "Creating adjective %s and phase %s" % (adjective, phase) train_set = all_features[adjective][phase]['train'] train_X = train_set['features'] train_Y = train_set['labels'] object_ids = train_set['object_ids'] ''' Use for tree selection # Scale the data scaler = preprocessing.StandardScaler().fit(train_X) train_X = scaler.transform(train_X) all_features[adjective][phase]['scaler'] = scaler all_features[adjective][phase]['train'] = train_X # store off scaled print "Training adjective %s and phase %s" %(adjective, phase) # Remove features! all_features[adjective][phase]['tree_features'] = remove_feature_tree_based(train_X,train_Y) print np.shape(train_X) train_X = all_features[adjective][phase]['tree_features'][1]; # transformed features print np.shape(train_X) ''' trained_clf, scaler = utilities.train_univariate_selection( train_X, train_Y, verbose=True, object_ids=object_ids, n_jobs=6, scale=True) all_features[adjective][phase]['scaler'] = scaler all_features[adjective][phase]['tree_features'] = trained_clf print trained_clf ''' if not boost: trained_clf, scaler = utilities.train_svm_gridsearch(train_X = train_X, train_Y = train_Y, verbose=True, object_ids = object_ids, n_jobs = 6, scale = False ) else: trained_clf, scaler = utilities.train_gradient_boost(train_X = train_X, train_Y = train_Y, object_ids = object_ids, verbose = True, n_jobs = 6, scale = False ) ''' dataset = all_features[adjective][phase] dataset['adjective'] = adjective dataset['phase'] = phase dataset['classifier'] = trained_clf print "Saving trained_classifier" # Save the results in the folder with open(path_name, "w") as f: print "Saving file: ", path_name cPickle.dump(dataset, f, protocol=cPickle.HIGHEST_PROTOCOL)