def train(): from judgments import judgments_from_file, judgments_by_qid es = elastic_connection(timeout=1000) # Load features into Elasticsearch init_default_store() load_features(FEATURE_SET_NAME) # Parse a judgments movieJudgments = judgments_by_qid( judgments_from_file(filename=JUDGMENTS_FILE)) # Use proposed Elasticsearch queries (1.json.jinja ... N.json.jinja) to generate a training set # output as "sample_judgments_wfeatures.txt" log_features(es, judgments_dict=movieJudgments, search_index=INDEX_NAME) build_features_judgments_file(movieJudgments, filename=JUDGMENTS_FILE_FEATURES) # Train each ranklib model type for modelType in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: # 0, MART # 1, RankNet # 2, RankBoost # 3, AdaRank # 4, coord Ascent # 6, LambdaMART # 7, ListNET # 8, Random Forests # 9, Linear Regression Logger.logger.info("*** Training %s " % modelType) train_model(judgments_with_features_file=JUDGMENTS_FILE_FEATURES, model_output='model.txt', which_model=modelType) save_model(script_name="gsearch_model_%s" % modelType, feature_set=FEATURE_SET_NAME, model_fname='model.txt')
def gogo_kpca(fxpath, mpath): kpca_params = { 'n_components': 256, 'kernel': 'rbf', 'gamma': None, 'degree': 3, 'coef0': 1, 'kernel_params': None, 'alpha': 1.0, 'fit_inverse_transform': False, 'eigen_solver': 'auto', 'tol': 0, 'max_iter': None, 'remove_zero_eig': True } kpca_fname = '%s/kpca_rbf_{0}_{1}.pkl' % mpath for i in range(7): if i < 5: nbreed = 1 sbreed = 'dog' nsubject = i + 1 else: nbreed = 2 sbreed = 'human' nsubject = 1 + abs(5 - i) print('breed%d.subject%d..' % (nbreed, nsubject)) X_ictal = load_features(fxpath, nbreed, nsubject, 1) X_inter = load_features(fxpath, nbreed, nsubject, 2) X = vstack((X_inter, X_ictal)) del X_inter, X_ictal gc.collect() X_test = load_features(fxpath, nbreed, nsubject, 3) X = vstack((X, X_test)) del X_test gc.collect() kpca = KernelPCA(**kpca_params) skip_interval = get_skip_interval(X) X = kpca_preprocess_features(X) kpca.fit(X[::skip_interval]) with open(kpca_fname.format(sbreed, nsubject), 'wb') as f: pickle.dump(kpca, f) del X, kpca gc.collect()
def gogo_kpca( fxpath, mpath ): kpca_params = {'n_components':256, 'kernel':'rbf', 'gamma':None, 'degree':3, 'coef0':1, 'kernel_params':None, 'alpha':1.0, 'fit_inverse_transform':False, 'eigen_solver':'auto', 'tol':0, 'max_iter':None, 'remove_zero_eig':True} kpca_fname = '%s/kpca_rbf_{0}_{1}.pkl' % mpath for i in range(7): if i < 5: nbreed = 1 sbreed = 'dog' nsubject = i+1 else: nbreed = 2 sbreed = 'human' nsubject = 1 + abs(5-i) print 'breed%d.subject%d..' % ( nbreed, nsubject ) X_ictal = load_features( fxpath, nbreed, nsubject, 1 ) X_inter = load_features( fxpath, nbreed, nsubject, 2 ) X = vstack((X_inter, X_ictal)) del X_inter, X_ictal; gc.collect() X_test = load_features( fxpath, nbreed, nsubject, 3 ) X = vstack((X, X_test)) del X_test; gc.collect() kpca = KernelPCA(**kpca_params) skip_interval = get_skip_interval(X) X = kpca_preprocess_features(X) kpca.fit(X[::skip_interval]) with open(kpca_fname.format(sbreed,nsubject),'wb') as f: cPickle.dump(kpca,f) del X, kpca; gc.collect()
def main(): data = load_data() features_cnt,label_features = load_features.load_features(data["inputFile"]) #for label in label_features: # label_features[label] = random.shuffle(label_features[label]) training_data = {} for label in label_features: training_data.update({label:label_features[label][:data["traininSamplesCnt"]]}) testing_data = {} for label in label_features: testing_data.update({label:label_features[label][data["traininSamplesCnt"]:]}) network = NeuralNetwork(features_cnt, data["hiddenLayers"]["cnt"], data["hiddenLayers"]["layersNeuronsCnt"], data["hiddenLayers"]["layersActivFns"], data["outputLayer"]["neorunsCnt"], data["outputLayer"]["activFn"], data["withBias"]) model,loss_curve = network.train(training_data, data["eta"], data["epochsNo"], data["stopMSE"], data["MSE"]) draw_result.draw_training(loss_curve) x = network.test(testing_data,data["outputLayer"]["neorunsCnt"])*100 print "Accuracy: %f%%"%x
def training_pipeline(): from utils import elastic_connection es = elastic_connection() file_judgments = parse_data_and_get_judgement() print(file_judgments) init_default_store() load_features(FEATURE_SET_NAME) log_features(es, judgments_dict=file_judgments, search_index=INDEX_NAME) build_features_judgments_file(file_judgments, filename=JUDGMENTS_FILE_FEATURES) for modelType in [6, 7, 9]: # for modelType in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: Logger.logger.info("*** Training %s " % modelType) train_model(judgments_with_features_file=JUDGMENTS_FILE_FEATURES, model_output='model.txt', which_model=modelType) save_model(script_name="test_%s" % modelType, feature_set=FEATURE_SET_NAME, model_fname='model.txt')
def upload_model(): init_default_store() load_features(FEATURE_SET_NAME) save_model(script_name="test_9", feature_set=FEATURE_SET_NAME, model_fname='model.txt')
data=json.dumps(model_payload), headers=head, auth=ES_AUTH, verify=False) Logger.logger.info(resp.status_code) if resp.status_code >= 300: Logger.logger.error(resp.text) if __name__ == "__main__": from judgments import judgments_from_file, judgments_by_qid es = elastic_connection(timeout=1000) # Load features into Elasticsearch init_default_store() load_features(FEATURE_SET_NAME) # Parse a judgments movieJudgments = judgments_by_qid( judgments_from_file(filename=JUDGMENTS_FILE)) # Use proposed Elasticsearch queries (1.json.jinja ... N.json.jinja) to generate a training set # output as "sample_judgments_wfeatures.txt" log_features(es, judgments_dict=movieJudgments, search_index=INDEX_NAME) build_features_judgments_file(movieJudgments, filename=JUDGMENTS_FILE_FEATURES) # Train each ranklib model type #for modelType in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: for modelType in [6]: # 0, MART # 1, RankNet # 2, RankBoost # 3, AdaRank
# to change when small data: n_train and n_test in utils.py, n_components in fisher_feature_extractor.py folder_name = 'data/' # folder_name = 'data_small/' nclasses = 10 classifier = 'svm_ovo' do_validation = True validation = 0.2 do_prediction = False svm_kernel = LinearKernel() #svm_kernel = LaplacianRBFKernel(1.6) C = 1 Xtrain, Ytrain, Xtest = load_features(feature_extractor, overwrite_features, overwrite_kpca, kernel_pca, kernel_pca_kernel, cut_percentage, folder_name) #Xtrain = numpy.reshape(Xtrain, (Xtrain.shape[0], -1)) #Xtest = numpy.reshape(Xtest, (Xtest.shape[0], -1)) print(Xtrain.shape) print(Xtest.shape) assert Xtrain.ndim == 2 and Xtrain.shape[1] == Xtest.shape[1] print("Fitting on training data") if classifier == 'cross_entropy': Xtrain = concat_bias(Xtrain) Xtest = concat_bias(Xtest) model = CrossEntropyClassifier(nclasses) iterations = 500 lr = 0.01
def gogo_bagged_svm( fxpath, mpath, spath ): transform = True svc_params = {'penalty':'l2', 'loss':'l2', 'dual':False, 'C':33.0, 'intercept_scaling':1e4, 'class_weight':'auto', 'random_state':42} bc_params = {'base_estimator':LinearSVC(**svc_params), 'n_estimators':96, 'max_samples':0.1, 'max_features':0.8, 'oob_score':False, # if you have tons of memory (i.e. 32gb ram + 32gb swap) # incresaing this parameter may help performance. else, # increasing it may cause "out of memory" errors. 'n_jobs':1, #'n_jobs':8, 'verbose':1, 'random_state':42} ''' lr_params = {'C':1e6,#tr(-3,3,7), 'penalty':'l2', 'class_weight':'auto', 'intercept_scaling':1e6}#tr(-1,6,7)} ''' preds = [] kpca_fname = '%s/kpca_rbf_{0}_{1}.pkl' % mpath s_fname = '%s/kpca_linear_svm{0}_{1}_preds.csv' % spath for i in range(7): if i < 5: nbreed = 1 sbreed = 'dog' nsubject = i+1 else: nbreed = 2 sbreed = 'human' nsubject = 1 + abs(5-i) print('breed%d.subject%d..' % ( nbreed, nsubject )) X_ictal = load_features( fxpath, nbreed, nsubject, 1 ) X_inter = load_features( fxpath, nbreed, nsubject, 2 ) X_train = vstack((X_inter, X_ictal)) Y = [0 for x in X_inter] + [1 for x in X_ictal] wi = 1.0/len(X_inter) * 1000 wp = 1.0/len(X_ictal) * 1000 W = array([wp if y else wi for y in Y]) del X_inter, X_ictal; gc.collect() with open(kpca_fname.format(sbreed,nsubject),'rb') as f: kpca = pickle.load(f) if transform: X_train = kpca_preprocess_features(X_train) X_train = kpca_incremental_transform(kpca,X_train) gc.collect() X_test = load_features( fxpath, nbreed, nsubject, 3 ) if transform: X_test = kpca_preprocess_features(X_test) X_test = kpca_incremental_transform(kpca,X_test) gc.collect() bc = BC(**bc_params) bc.fit(X_train,Y) #print 'oob_score: ', bc.oob_score_ subject_preds = bc.predict_proba(X_test)[:,1] preds.append(subject_preds) subject_preds = pd.DataFrame(subject_preds) subject_preds.to_csv(s_fname.format(sbreed,nsubject),index=False,header=None) del X_train, X_test; gc.collect() sys.stdout.flush()
def gogo_bagged_svm(fxpath, mpath, spath): transform = True svc_params = { "penalty": "l2", "loss": "l2", "dual": False, "C": 33.0, "intercept_scaling": 1e4, "class_weight": "auto", "random_state": 42, } bc_params = { "base_estimator": LinearSVC(**svc_params), "n_estimators": 96, "max_samples": 0.1, "max_features": 0.8, "oob_score": False, # if you have tons of memory (i.e. 32gb ram + 32gb swap) # incresaing this parameter may help performance. else, # increasing it may cause "out of memory" errors. "n_jobs": 1, #'n_jobs':8, "verbose": 1, "random_state": 42, } """ lr_params = {'C':1e6,#tr(-3,3,7), 'penalty':'l2', 'class_weight':'auto', 'intercept_scaling':1e6}#tr(-1,6,7)} """ preds = [] kpca_fname = "%s/kpca_rbf_{0}_{1}.pkl" % mpath s_fname = "%s/kpca_linear_svm{0}_{1}_preds.csv" % spath for i in range(7): if i < 5: nbreed = 1 sbreed = "dog" nsubject = i + 1 else: nbreed = 2 sbreed = "human" nsubject = 1 + abs(5 - i) print "breed%d.subject%d.." % (nbreed, nsubject) X_ictal = load_features(fxpath, nbreed, nsubject, 1) X_inter = load_features(fxpath, nbreed, nsubject, 2) X_train = vstack((X_inter, X_ictal)) Y = [0 for x in X_inter] + [1 for x in X_ictal] wi = 1.0 / len(X_inter) * 1000 wp = 1.0 / len(X_ictal) * 1000 W = array([wp if y else wi for y in Y]) del X_inter, X_ictal gc.collect() with open(kpca_fname.format(sbreed, nsubject), "rb") as f: kpca = cPickle.load(f) if transform: X_train = kpca_preprocess_features(X_train) X_train = kpca_incremental_transform(kpca, X_train) gc.collect() X_test = load_features(fxpath, nbreed, nsubject, 3) if transform: X_test = kpca_preprocess_features(X_test) X_test = kpca_incremental_transform(kpca, X_test) gc.collect() bc = BC(**bc_params) bc.fit(X_train, Y) # print 'oob_score: ', bc.oob_score_ subject_preds = bc.predict_proba(X_test)[:, 1] preds.append(subject_preds) subject_preds = pd.DataFrame(subject_preds) subject_preds.to_csv(s_fname.format(sbreed, nsubject), index=False, header=None) del X_train, X_test gc.collect() sys.stdout.flush()