def run_test(features, labels, train_classes, test_classes, train_index, test_index): cpu_time = 0 features = np.array(features) labels = np.array(labels) xtrain = features[np.isin(labels,train_classes),:] ytrain = labels[np.isin(labels,train_classes)] xtest = features[np.isin(labels,test_classes),:] ytest = labels[np.isin(labels,test_classes)] X_train, X_test = xtrain[train_index], xtrain[test_index] y_train, y_test = ytrain[train_index], ytrain[test_index] y_test_l = y_test.tolist() model = sor.HierarchicalClassifierModel(input_size = X_train[0].size, num_classes = len(risk_class_files), learning_rate = 1e-3, num_epochs = 1000, batch_size = 100, l1 = 0, l2 = 0, train_classes = train_classes) model_s = pickle.load(open('test_results/trained_model_'+sys.argv[1] + '_joint.m', 'rb')) model.evt_fit_threshold(X_train, y_train) y_pred = model.predict(X_test, 0) np.savetxt('test_results/' +sys.argv[1] + '_joint_seen.pred', y_pred) for classk in range(len(test_classes)): print('test class', test_classes[classk]) xtest_ri = xtest[ytest == test_classes[classk]] y_pred_ri = model.predict(xtest_ri, 0) np.savetxt('test_results/' + sys.argv[1] + '_joint_unseen_' + str(classk) + '.pred', y_pred_ri)
def run_test(features, labels, train_classes, test_classes, train_index, test_index): cpu_time = 0 features = np.array(features) labels = np.array(labels) xtrain = features[np.isin(labels, train_classes), :] ytrain = labels[np.isin(labels, train_classes)] xtest = features[np.isin(labels, test_classes), :] ytest = labels[np.isin(labels, test_classes)] RR = 0 R2Ri = 0 RNR = 0 NRNR = 0 NRR = 0 RiR = np.zeros(len(test_classes)) RiNR = np.zeros(len(test_classes)) X_train, X_test = xtrain[train_index], xtrain[test_index] y_train, y_test = ytrain[train_index], ytrain[test_index] y_test_l = y_test.tolist() model = sor.HierarchicalClassifierModel(input_size=X_train[0].size, num_classes=len(risk_class_files), learning_rate=1e-3, num_epochs=1000, batch_size=100, l1=0, l2=0, train_classes=train_classes) parameters = { 'l1': np.logspace(-2, 2, 5), 'l2': np.append(np.logspace(-3, 1, 5), 0) } splitter = cv_split.UnseenTestSplit() cmodel = GridSearchCV(model, parameters, cv=splitter, verbose=5, n_jobs=10) cmodel.fit(X_train, y_train.ravel()) print('best params: l1=', cmodel.best_params_['l1'], 'l2=', cmodel.best_params_['l2']) model = sor.HierarchicalClassifierModel(input_size=X_train[0].size, num_classes=len(risk_class_files), learning_rate=1e-3, num_epochs=1000, batch_size=100, l1=cmodel.best_params_['l1'], l2=cmodel.best_params_['l2'], train_classes=train_classes) model.fit(X_train, y_train) np.savetxt('test_results/' + sys.argv[1] + '_loss.out', model.loss_trace) np.savetxt('test_results/' + sys.argv[1] + '_grad_norm.out', model.grad_norm) y_pred = model.predict(X_test, 0) y_pred_score = model.predict_score(X_test, 0) np.savetxt('test_results/' + sys.argv[1] + '_joint_seen.out', y_pred_score) for j in range(len(y_test)): if y_test_l[j] >= 1: if y_pred[j] == 1: RR += 1 y_pred_class = model.predict(X_test[j, :], int(y_test[j])) if y_pred_class == 1: R2Ri += 1 else: RNR += 1 else: if y_pred[j] < 1: NRNR += 1 else: NRR += 1 for classk in range(len(test_classes)): xtest_ri = xtest[ytest == test_classes[classk]] y_pred_ri = model.predict(xtest_ri, 0) y_pred_ri_score = model.predict_score(xtest_ri, 0) np.savetxt( 'test_results/' + sys.argv[1] + '_joint_unseen_' + str(classk) + '.out', y_pred_ri_score) for j in range(len(y_pred_ri)): if y_pred_ri[j] == 1: RiR[classk] += 1 else: RiNR[classk] += 1 print(RR, RNR, NRR, NRNR, RiR, RiNR, R2Ri) pickle.dump( model, open('test_results/trained_model_' + sys.argv[1] + '_joint.m', 'wb'))
def run_setup(features, labels, train_classes, test_classes, test_fold): features = np.array(features) labels = np.array(labels) xtrain = features[np.isin(labels, train_classes), :] ytrain = labels[np.isin(labels, train_classes)] xtest = features[np.isin(labels, test_classes), :] ytest = labels[np.isin(labels, test_classes)] train_ids = np.arange(len(ytrain)) np.random.shuffle(train_ids) train_index = train_ids[:int(len(ytrain) * 0.8)] test_index = train_ids[int(len(ytrain) * 0.8 + 1):] X_train, X_test = xtrain[train_index], xtrain[test_index] y_train, y_test = ytrain[train_index], ytrain[test_index] model = sor.HierarchicalClassifierModel(input_size=X_train[0].size, num_classes=len(risk_class_files), learning_rate=1e-3, num_epochs=1000, batch_size=100, model_name='wSOR', l1=0.1, l2=0) model.fit_wk(X_train, y_train) model.save('test_data/trained_model_' + str(test_fold) + '_wk.m') np.savez('test_data/test_' + str(test_fold) + '.npz', train_classes=train_classes, test_classes=test_classes, train_index=train_index, test_index=test_index) tfidf = TfidfVectorizer(ngram_range=(1, 1), stop_words='english', token_pattern=u'(?ui)\\b\\w*[a-z]+\\w*\\b') features = tfidf.fit_transform(X).toarray() features = np.array(features) xtrain = features[np.isin(labels, train_classes), :] ytrain = labels[np.isin(labels, train_classes)] X_train, X_test = xtrain[train_index], xtrain[test_index] y_train, y_test = ytrain[train_index], ytrain[test_index] scaler.fit(X_train) X_train_std = scaler.transform(X_train) pca.fit(X_train_std) model = sor.HierarchicalClassifierModel( input_size=pca.transform(X_train_std)[0].size, num_classes=len(risk_class_files), learning_rate=1e-3, num_epochs=1000, batch_size=100, model_name='wSOR', l1=0.1, l2=0) model.fit_wk(pca.transform(X_train_std), y_train) model.save('test_data/trained_model_' + str(test_fold) + '_pca_wk.m') transformer = FastICA(n_components=pca.n_components_, random_state=0, max_iter=500) xtrain_transformed = transformer.fit_transform(X_train_std) model = sor.HierarchicalClassifierModel( input_size=transformer.transform(X_train_std)[0].size, num_classes=len(risk_class_files), learning_rate=1e-3, num_epochs=1000, batch_size=100, model_name='wSOR', l1=0.1, l2=0) model.fit_wk(transformer.transform(X_train_std), y_train) model.save('test_data/trained_model_' + str(test_fold) + '_ica_wk.m') pickle.dump(transformer, open('test_data/' + str(test_fold) + '_ica.sav', 'wb'), protocol=4)