def fit(self): x1, x2 = self.__resampeling() try: err1 = f1( self.Y1, cross_val_predict(self.model1_init, self.X1, self.Y1, cv=10)) except: err1 = 0 try: err2 = f1( self.Y2, cross_val_predict(self.model2_init, self.X2, self.Y2, cv=10)) except: err2 = 0 self.err.append([err1, err2]) try: self.model1_init.fit(self.X1, self.Y1) except: self.model1_init.fit(self.X1) try: self.model2_init.fit(self.X2, self.Y2) except: self.model2_init.fit(self.X2) y1 = self.model1_init.predict(x1) if all(np.unique(y1 == [-1., 1.])): y1[y1 == -1.] = 1 y1[y1 == 1.] = 0 y2 = self.model2_init.predict(x2) if all(np.unique(y2 == [-1., 1.])): y2[y2 == -1.] = 1 y2[y2 == 1.] = 0 self.X1 = self.X1.append(x2).reset_index(drop=True) self.X2 = self.X2.append(x1).reset_index(drop=True) self.Y1 = self.Y1.append(pd.Series(y2)).reset_index(drop=True) self.Y2 = self.Y2.append(pd.Series(y1)).reset_index(drop=True) for i in range(self.n_iter - 1): x1, x2 = self.__resampeling() err1 = f1(self.Y1, cross_val_predict(self.model1, self.X1, self.Y1, cv=10)) err2 = f1(self.Y2, cross_val_predict(self.model2, self.X2, self.Y2, cv=10)) self.err.append([err1, err2]) self.model1.fit(self.X1, self.Y1) self.model2.fit(self.X2, self.Y2) y1 = self.model1.predict(x1) y2 = self.model2.predict(x2) self.X1 = self.X1.append(x2).reset_index(drop=True) self.X2 = self.X2.append(x1).reset_index(drop=True) self.Y1 = self.Y1.append(pd.Series(y2)).reset_index(drop=True) self.Y2 = self.Y2.append(pd.Series(y1)).reset_index(drop=True)
def runSpacy(file, out): nlp = spacy.load("en_core_web_md") df = baseDF(file) xx = df["clean_text"] yy = df["userid"] xx, yy = shuffle(xx, yy) trainX, testX, trainY, testY = train_test_split(xx, yy, test_size=0.2) start = time.time() docs = {} for user in tqdm(trainY.unique().tolist()): tweets = trainX.loc[trainY == user].tolist() tweetDoc = ". ".join(tweets).replace("..", ".") docs[user] = nlp(tweetDoc) def getSimilarity(base, target): return docs[target].similarity(base) def getConfidence(row): if row["Best"] == 0: return 0 diff = (row["Best"] - row["Second"]) / row["Best"] return diff evaluate = pd.concat([testX, testY], axis=1) evaluate["DOC"] = evaluate["clean_text"].progress_apply(lambda x: nlp(x)) for col in tqdm(trainY.unique()): evaluate[col] = evaluate["DOC"].apply(getSimilarity, target=col) evaluate.drop(["DOC"], axis=1, inplace=True) evaluate["Guess"] = evaluate.loc[:, trainY.unique()].idxmax(axis=1) #Unweighted score = f1(evaluate["userid"], evaluate["Guess"], average='weighted') checkPrint("Unweighted F1: {}".format(score), out) mid = time.time() checkPrint("Time Taken: {} seconds".format(mid - start), out) #Weighted evaluate["Best"] = evaluate.loc[:, trainY.unique()].max(axis=1) evaluate["Second"] = evaluate.loc[:, trainY.unique()].apply( lambda row: row.nlargest(2).values[-1], axis=1) evaluate["Confidence"] = evaluate.apply(getConfidence, axis=1) score = f1(evaluate["userid"], evaluate["Guess"], average='weighted', sample_weight=evaluate["Confidence"]) checkPrint("Confidence Weighted F1: {}".format(score), out) end = time.time() checkPrint("Time Taken: {} seconds".format(end - start), out)
def acc(loader): accuracy = 0 num_batches = 0 act = np.array([]) pred = np.array([]) for batch in loader: gpu = batch.question_text.to(device).long() preds = bid_lstm_cnn(gpu) target = batch.target.numpy() preds = preds.cpu().detach().numpy() preds = np.array([np.argmax(row) for row in preds]) total_correct = sum(target == preds) act = np.concatenate((act, target)) pred = np.concatenate((pred, preds)) accuracy += total_correct num_batches += 1 ass = accuracy / (num_batches * batch_size) print(ass) formula1 = f1(act, pred) print(formula1) tn, fp, fn, tp = cm(act, pred).ravel() print( 'True positives -> {}\nFalse positives -> {}\nTrue negatives -> {}\nFalse negatives -> {}\n' .format(tp, fp, tn, fn)) return ass, formula1
def cm_f1_test(model, test_data, test_labels): test_pred = model.predict(test_data) scores = f1(test_labels, test_pred, average=None) argSort = scores.argsort() scores = scores[argSort] return cm(test_labels, test_pred), (argSort[:2], scores[:2])
def compute(ground_truth, predictiveDistribution): class_predictions = np.round(predictiveDistribution.get_all_means()) # count number of classes occuring in ground truth classes_present = len(np.unique(ground_truth)) score = f1(ground_truth, class_predictions, average=None) score = np.sum(score) / classes_present return score
def get_metrics(prediction, y_test): ''' Computes accuracy, precision, recall, ROC-AUC and F1 metrics for consideroing predictions produced by a ML and actual values of a dependent variables. Inputs: - prediction: an array with predictions. - y_test: an array with actual values. Returns a dictionary with metrics of a ML model. ''' Accuracy = accuracy(prediction, y_test) Precision = precision(prediction, y_test) Recall = recall(prediction, y_test) try: AUC = roc_auc(prediction, y_test) except ValueError: AUC = 0 F1 = f1(prediction, y_test) metrics_dict = { 'Accuracy': Accuracy, 'Precision': Precision, 'Recall': Recall, 'AUC': AUC, 'F1': F1 } return metrics_dict
def evaluate(model, iterator_function, _batch_count, cuda_device, output_buffer=sys.stderr): if output_buffer is not None: print(_batch_count, file=output_buffer) model.eval() with torch.no_grad(): predictions = [] expectations = [] batch_generator = range(_batch_count) if output_buffer is not None: batch_generator = tqdm(batch_generator) for _ in batch_generator: features, targets = iterator_function() if cuda_device != -1: features = features.cuda(device=cuda_device) probs, _, _ = model(example_batch=features) batch_pred = np.argmax(probs.detach().cpu().numpy(), axis=-1).tolist() batch_tgt = targets.detach().cpu().numpy().tolist() predictions.extend(batch_pred) expectations.extend(batch_tgt) model.train() return acc(expectations, predictions) * 100, \ pr(expectations, predictions) * 100, \ rc(expectations, predictions) * 100, \ f1(expectations, predictions) * 100,
def evaluate(encoder, loc='./'): print('Preparing data...') traintext, testtext, labels = load_data(loc) print('Computing training skipthoughts...') trainA = encoder.encode(traintext[0]) trainB = encoder.encode(traintext[1]) C = 4 print('Computing testing skipthoughts...') testA = encoder.encode(testtext[0]) testB = encoder.encode(testtext[1]) train_features = np.c_[np.abs(trainA - trainB), trainA * trainB, feats(traintext[0], traintext[1])] test_features = np.c_[np.abs(testA - testB), testA * testB, feats(testtext[0], testtext[1])] print('Evaluating...') clf = LogisticRegression(C=C) clf.fit(train_features, labels[0]) yhat = clf.predict(test_features) print('Test accuracy: ', str(clf.score(test_features, labels[1]))) print('Test F1: ', str(f1(labels[1], yhat)))
def present_results_simp(y_test, predictions): results_list = [] for k, v in predictions.items(): inter_list = [ k, accuracy(v, y_test), precision(v, y_test), precision_top(v, y_test, 0.01), precision_top(v, y_test, 0.02), precision_top(v, y_test, 0.05), precision_top(v, y_test, 0.1), precision_top(v, y_test, 0.2), precision_top(v, y_test, 0.3), precision_top(v, y_test, 0.5), recall(v, y_test), recall_top(v, y_test, 0.01), recall_top(v, y_test, 0.02), recall_top(v, y_test, 0.05), recall_top(v, y_test, 0.1), recall_top(v, y_test, 0.2), recall_top(v, y_test, 0.3), recall_top(v, y_test, 0.5), f1(v, y_test) ] results_list.append(inter_list) df = pd.DataFrame(results_list) df.columns = [ 'Model', 'Accuracy', 'Precision', 'Precision top 1%', 'Precision top 2%', 'Precision top 5%', 'Precision top 10%', 'Precision top 20%', 'Precision top 30%', 'Precision top 50%', 'Recall', 'Recall top 1%', 'Recall top 2%', 'Recall top 5%', 'Recall top 10%', 'Recall top 20%', 'Recall top 30%', 'Recall top 50%', 'F 1' ] return df
def evaluate(encoder, k=10, seed=1234, evalcv=True, evaltest=False, use_feats=True, loc='./data/'): """ Run experiment k: number of CV folds test: whether to evaluate on test set """ print('Preparing data...') traintext, testtext, labels = load_data(loc) print('Computing training skipthoughts...') trainA = encoder.encode(traintext[0]) trainB = encoder.encode(traintext[1]) if evalcv: print('Running cross-validation...') C = eval_kfold(trainA, trainB, traintext, labels[0], shuffle=True, k=10, seed=1234, use_feats=use_feats) if evaltest: if not evalcv: C = 4 # Best parameter found from CV (combine-skip with use_feats=True) print('Computing testing skipthoughts...') testA = encoder.encode(testtext[0]) testB = encoder.encode(testtext[1]) if use_feats: train_features = np.c_[np.abs(trainA - trainB), trainA * trainB, feats(traintext[0], traintext[1])] test_features = np.c_[np.abs(testA - testB), testA * testB, feats(testtext[0], testtext[1])] else: train_features = np.c_[np.abs(trainA - trainB), trainA * trainB] test_features = np.c_[np.abs(testA - testB), testA * testB] print('Evaluating...') clf = LogisticRegression(C=C) clf.fit(train_features, labels[0]) yhat = clf.predict(test_features) print('Test accuracy: ' + str(clf.score(test_features, labels[1]))) print('Test F1: ' + str(f1(labels[1], yhat))) vis_data = TSNE(n_components=2).fit_transform(train_features) vis_x = vis_data[:, 0] vis_y = vis_data[:, 1] plt.scatter(vis_x, vis_y, c=labels[0]) #, cmap=plt.cm.get_cmap('jet',2)) plt.savefig('tsne_msrp.png')
def evaluatesMLPredictions(y_true, y_pred): esal = sal(y_true, y_pred) ehl = hl(y_true, y_pred) ma = 1 - f1(y_true, y_pred, average='macro') mi = 1 - f1(y_true, y_pred, average='micro') if1 = 1 - instanceF1(y_true, y_pred) eji = 1 - ji(y_true, y_pred) mapre = 1 - precision_score(y_true, y_pred, average='macro') marec = 1 - recall_score(y_true, y_pred, average='macro') mipre = 1 - precision_score(y_true, y_pred, average='micro') mirec = 1 - recall_score(y_true, y_pred, average='micro') # probability metrics cov = coverage_error(y_true, y_pred) erl = rl(y_true, y_pred) return esal, ehl, ma, mi, if1, eji, mapre, marec, mipre, mirec, cov, erl
def build_classifier_and_test(train_X, train_y, test_X, test_y, clf, print_train_result=True): clf.fit(train_X, train_y) if print_train_result == True: p_tr = clf.predict(train_X) print("Train Accuracy:\t", acc(train_y, p_tr)) print("Train Precision:\t", pr(train_y, p_tr)) print("Train Recall_score:\t", rc(train_y, p_tr)) print("Train F-score:\t", f1(train_y, p_tr)) predicted = clf.predict(test_X) print("Accuracy:\t", acc(test_y, predicted)) print("Precision:\t", pr(test_y, predicted)) print("Recall_score:\t", rc(test_y, predicted)) print("F-score:\t", f1(test_y, predicted))
def update_metrics(gt, pre, f1_m, p_m, r_m, acc_m): f1_value = f1(gt, pre, average="micro") f1_m.update(f1_value) p_value = precision(gt, pre, average="micro", zero_division=0) p_m.update(p_value) r_value = recall(gt, pre, average="micro") r_m.update(r_value) acc_value = accuracy(gt, pre) acc_m.update(acc_value)
def instanceF1(y_true, y_pred): """ y_true : 2d array-like, of size n x q y_pred : 2d array-like, of size n x q """ n, q = y_true.shape if1 = 0 for i in np.arange(n): if1 = if1 + f1(y_true[i, :], y_pred[i, :]) return if1 / n
def clone_analysis(data_paths): code = [] labels = [] positives = 0 for file_name in data_paths: data = json.load(open(file_name)) for example in data: code.append(example['tokenized']) l = 0 if 'label' in example.keys(): l = int(example['label']) elif 'lebel' in example.keys(): l = int(example['lebel']) elif 'leble' in example.keys(): l = int(example['leble']) elif 'lable' in example.keys(): l = int(example['lable']) if l > 1: l = 1 positives += l labels.append(l) print(len(code), len(labels), positives, len(labels) - positives) vectorizer = TfidfVectorizer(input=code, lowercase=False, ngram_range=(1, 3)) X = vectorizer.fit_transform(code) model = KMeans(n_clusters=10, max_iter=100) model.fit(X) y = model.predict(X) cluster_to_positive = [0] * 10 cluster_to_negative = [0] * 10 for pred, label in zip(y, labels): if label == 1: cluster_to_positive[pred] += 1 else: cluster_to_negative[pred] += 1 print(cluster_to_positive) print(cluster_to_negative) percentages = [ float(p) / (p + n) for p, n in zip(cluster_to_positive, cluster_to_negative) ] for p in percentages: print(p) for _ in range(5): XTrain, XTest, YTrain, YTest = train_test_split(X, labels, test_size=0.2) model = RandomForestClassifier() model.fit(XTrain, YTrain) predicted = model.predict(XTest) print('%.3f\t%.3f\t%.3f\t%.3f' % (acc(YTest, predicted) * 100, pr(YTest, predicted) * 100, rc(YTest, predicted) * 100, f1(YTest, predicted) * 100)) pass
def eval_kfold(A, B, train, labels, shuffle=True, k=10, seed=1234, use_feats=False): """ Perform k-fold cross validation """ # features labels = np.array(labels) if use_feats: features = np.c_[np.abs(A - B), A * B, feats(train[0], train[1])] else: features = np.c_[np.abs(A - B), A * B] scan = [2**t for t in range(0, 9, 1)] npts = len(features) kf = StratifiedKFold(n_splits=k, shuffle=shuffle, random_state=seed) scores = [] for s in scan: scanscores = [] for train, test in kf.split(features, labels): # Split data X_train = features[train] y_train = labels[train] X_test = features[test] y_test = labels[test] # Train classifier clf = LogisticRegression(C=s) clf.fit(X_train, y_train) yhat = clf.predict(X_test) fscore = f1(y_test, yhat) scanscores.append(fscore) print((s, fscore)) # Append mean score scores.append(np.mean(scanscores)) print(scores) # Get the index of the best score s_ind = np.argmax(scores) s = scan[s_ind] print(scores) print(s) return s
def evaluate(encoder, k=10, seed=1234, evalcv=True, evaltest=False, use_feats=True, loc='./data/'): """ Run experiment k: number of CV folds test: whether to evaluate on test set """ print('Preparing data...') traintext, testtext, labels = load_data(loc) print('Computing training skipthoughts...') trainA = encoder.encode(traintext[0], verbose=False) trainB = encoder.encode(traintext[1], verbose=False) if evalcv: print('Running cross-validation...') C = eval_kfold(trainA, trainB, traintext, labels[0], shuffle=True, k=10, seed=1234, use_feats=use_feats) if evaltest: if not evalcv: C = 4 # Best parameter found from CV (combine-skip with use_feats=True) print('Computing testing skipthoughts...') testA = encoder.encode(testtext[0], verbose=False) testB = encoder.encode(testtext[1], verbose=False) if use_feats: train_features = np.c_[np.abs(trainA - trainB), trainA * trainB, feats(traintext[0], traintext[1])] test_features = np.c_[np.abs(testA - testB), testA * testB, feats(testtext[0], testtext[1])] else: train_features = np.c_[np.abs(trainA - trainB), trainA * trainB] test_features = np.c_[np.abs(testA - testB), testA * testB] print('Evaluating...') clf = LogisticRegression(C=C) clf.fit(train_features, labels[0]) yhat = clf.predict(test_features) print('Test accuracy: ' + str(clf.score(test_features, labels[1]))) print('Test F1: ' + str(f1(labels[1], yhat)))
def evaluate(encoder, k=10, seed=1234, evalcv=False, evaltest=True, use_feats=True): """ Run experiment k: number of CV folds test: whether to evaluate on test set """ traintext, testtext, labels = load_data() trainA = encoder.encode(traintext[0], verbose=False, norm=True) trainB = encoder.encode(traintext[1], verbose=False, norm=True) if evalcv: print 'Running cross-validation...' C = eval_kfold(trainA, trainB, traintext, labels[0], shuffle=True, k=k, seed=seed, use_feats=use_feats) else: C = 4 if evaltest: print 'Computing testing skipthoughts...' testA = encoder.encode(testtext[0], verbose=False, norm=True) testB = encoder.encode(testtext[1], verbose=False, norm=True) if use_feats: train_features = np.c_[np.abs(trainA - trainB), trainA * trainB, feats(traintext[0], traintext[1])] test_features = np.c_[np.abs(testA - testB), testA * testB, feats(testtext[0], testtext[1])] else: train_features = np.c_[np.abs(trainA - trainB), trainA * trainB] test_features = np.c_[np.abs(testA - testB), testA * testB] print 'Evaluating...' clf = LogisticRegression(C=C) clf.fit(train_features, labels[0]) yhat = clf.predict(test_features) acc = clf.score(test_features, labels[1]) f1_score = f1(labels[1], yhat) print 'Test accuracy: ' + str(acc) print 'Test F1: ' + str(f1_score) return acc, f1_score
def on_epoch_end(self, epoch, logs={}): label_true = [] label_pred = [] for i in range(len(self.seq)): x_true, y_true = self.seq[i] lengths = self.get_lengths(y_true) y_pred = self.model.predict_on_batch(x_true) y_true = self.p.inverse_transform(y_true, lengths) y_pred = self.p.inverse_transform(y_pred, lengths) label_true.extend(y_true) label_pred.extend(y_pred) score = f1_score(label_true, label_pred) print(' - f1: {:04.2f}'.format(score * 100)) print(classification_report(label_true, label_pred)) label_true = [item for sublist in label_true for item in sublist] label_pred = [item for sublist in label_pred for item in sublist] classes = np.unique(label_true) # classes2 = np.unique(label_pred) # # print('Classes: ', classes, classes2) bacc = balanced_accuracy_score(label_true, label_pred) print(' - BACC: {:04.2f}'.format(bacc * 100)) score = f1(label_true, label_pred, average='micro') print(' - f1: {:04.2f}'.format(score * 100)) print(clsrep(label_true, label_pred, labels=classes)) self.history.append(bacc) self.model_checkpoint(bacc) # self.reduce_lr_on_plateau() # self.early_stopping_check() logs['f1'] = score if epoch == self.swa_epoch: self.swa_weights = self.model.get_weights() if epoch > self.swa_epoch and bacc > self.best_bacc: self.swa_control += 1 for i in range(len(self.swa_weights)): self.swa_weights[i] = (self.swa_weights[i] * self.swa_control + self.model.get_weights()[i]) / ( self.swa_control + 1) else: pass
def on_train_end(self, epoch, logs={}): x, y = next(self.val_gen) preds = self.model.predict(x, batch_size=self.batch_size) y = self.flat_and_binary(y) preds = self.flat_and_binary(preds) jac = jaccard(y, preds) dice = f1(y, preds, average='micro') logs['dice'] = dice logs['jac'] = jac logs['fold'] = self.fold print('\nTesting jac: {}, dice: {}\n'.format(jac, dice))
def computesMetric(y_true, y_pred, metric='HL'): if metric == 'HL': r = hl(y_true, y_pred) elif metric == 'SA': r = sal(y_true, y_pred) elif metric == 'Ma': r = 1 - f1(y_true, y_pred, average='macro') elif metric == 'Mi': r = 1 - f1(y_true, y_pred, average='micro') elif metric == 'IF1': r = instanceF1(y_true, y_pred) elif metric == 'IJ': r = 1 - ji(y_true, y_pred) elif metric == 'MaP': r = 1 - precision_score(y_true, y_pred, average='macro') elif metric == 'MiP': r = 1 - precision_score(y_true, y_pred, average='micro') elif metric == 'MaR': r = 1 - recall_score(y_true, y_pred, average='macro') elif metric == 'MiR': r = 1 - recall_score(y_true, y_pred, average='micro') return r
def validate(self, loader, model): act = np.array([]) pred = np.array([]) for batch in loader: gpu = batch.question_text.to(self.device).long() preds = model(gpu) target = batch.target.numpy() preds = preds.cpu().detach().numpy() preds = np.array([np.argmax(row) for row in preds]) act = np.concatenate((act, target)) pred = np.concatenate((pred, preds)) formula1 = f1(act, pred) print(model.ID, 'val f1 ->', formula1) return formula1
def reportStats(weight, current_iteration, X_train, y_train, X_test, y_test): y_train[y_train < 0] = 0 y_test[y_test < 0] = 0 ypred_is = predict_all(X_train, weight) ypred_oos = predict_all(X_test, weight) np_err_handling = np.seterr(invalid='ignore') is_acc = acc(y_train, ypred_is) is_mcc = mcc(y_train, ypred_is) is_f1 = f1(y_train, ypred_is) is_mse = mse(y_train, ypred_is) oos_acc = acc(y_test, ypred_oos) oos_mcc = mcc(y_test, ypred_oos) oos_f1 = f1(y_test, ypred_oos) oos_mse = mse(y_test, ypred_oos) is_tn, is_fp, is_fn, is_tp = confusion_matrix(y_train, ypred_is).ravel() oos_tn, oos_fp, oos_fn, oos_tp = confusion_matrix(y_test, ypred_oos).ravel() is_auprc = auprc(y_train, ypred_is) oos_auprc = auprc(y_test, ypred_oos) np.seterr(**np_err_handling) print( f"Consensus {current_iteration}: IS acc {is_acc:0.5f}. IS MCC {is_mcc:0.5f}. IS F1 {is_f1:0.5f}. IS MSE {is_mse:0.5f}. OOS acc {oos_acc:0.5f}. OOS MCC {oos_mcc:0.5f}. OOS F1 {oos_f1:0.5f}. OOS MSE {oos_mse:0.5f}." ) print( f"Confusion {current_iteration}: IS TP: {is_tp}, IS FP: {is_fp}, IS TN: {is_tn}, IS FN: {is_fn}, IS AUPRC: {is_auprc:0.5f}. OOS TP: {oos_tp}, OOS FP: {oos_fp}, OOS TN: {oos_tn}, OOS FN: {oos_fn}, OOS AUPRC: {oos_auprc:0.5f}." ) return is_acc, is_mcc, is_f1, is_mse, is_auprc, oos_acc, oos_mcc, oos_f1, oos_mse, oos_auprc
def best_f1(y_true, y_score): best_f1_score = -1000 sorted_scores = sorted(y_score) for threshold in sorted_scores: temp_y_predict = [] for actual_score in y_score: if actual_score < threshold: temp_y_predict.append(1) else: temp_y_predict.append(0) now_f1 = f1(y_true,temp_y_predict) if (now_f1>best_f1_score): best_f1_score = now_f1 return best_f1_score
def eval_kfold(A, B, train, labels, shuffle=True, k=10, seed=1234, use_feats=False): """ Perform k-fold cross validation """ # features labels = np.array(labels) if use_feats: features = np.c_[np.abs(A - B), A * B, feats(train[0], train[1])] else: features = np.c_[np.abs(A - B), A * B] scan = [2**t for t in range(0,9,1)] npts = len(features) kf = KFold(npts, n_folds=k, shuffle=shuffle, random_state=seed) scores = [] for s in scan: scanscores = [] for train, test in kf: # Split data X_train = features[train] y_train = labels[train] X_test = features[test] y_test = labels[test] # Train classifier clf = LogisticRegression(C=s) clf.fit(X_train, y_train) yhat = clf.predict(X_test) fscore = f1(y_test, yhat) scanscores.append(fscore) print (s, fscore) # Append mean score scores.append(np.mean(scanscores)) print scores # Get the index of the best score s_ind = np.argmax(scores) s = scan[s_ind] print scores print s return s
def evaluate(encoder, k=10, seed=3456, evalcv=True, evaltest=False, loc='./data/'): print 'Load Data...' traintext, testtext, labels = load_data(loc) print 'Convert to sentence embeddings...' trainA = encoder.encode(traintext[0], verbose=False) trainB = encoder.encode(traintext[1], verbose=False) if evalcv: print 'Perform cross-validation...' C = eval_kfold(trainA, trainB, traintext, labels[0], shuffle=True, k=10, seed=3456) #print("Size of sentences: ",trainA.shape) if evaltest: if not evalcv: C = 4 print 'Convert test data to skipthought vectors...' testA = encoder.encode(testtext[0], verbose=False) testB = encoder.encode(testtext[1], verbose=False) #u.v and u-v features concatenation train_features = np.c_[np.abs(trainA - trainB), trainA * trainB] test_features = np.c_[np.abs(testA - testB), testA * testB] print 'Evaluate logistic regression...' clf = LogisticRegression(C=C) #fit model clf.fit(train_features, labels[0]) #get prediction ypred = clf.predict(test_features) print 'Test accuracy: ' + str(clf.score(test_features, labels[1])) #get f1 score, label 1 is true value print 'Test F1: ' + str(f1(labels[1], ypred))
def get_best_f1_threshold(y_true, y_score): best_f1_score = -1000 best_f1_threshold = .0 sorted_scores = sorted(y_score) for threshold in sorted_scores: if threshold == 1000: continue temp_y_predict = [] for actual_score in y_score: if actual_score < threshold: temp_y_predict.append(1) else: temp_y_predict.append(0) now_f1 = f1(y_true,temp_y_predict) if (now_f1>best_f1_score): best_f1_score = now_f1 best_f1_threshold = threshold return best_f1_threshold,best_f1_score
def eval_kfold(A, B, train, labels, shuffle=True, k=10, seed=3456): # features labels = np.array(labels) features = np.c_[np.abs(A - B), A * B] scan = [2**t for t in range(0, 9, 1)] npts = len(features) kf = KFold(npts, n_folds=k, shuffle=shuffle, random_state=seed) scores = [] for s in scan: scanscores = [] for train, test in kf: # Split data X_train = features[train] y_train = labels[train] X_test = features[test] y_test = labels[test] # Train classifier clf = LogisticRegression(C=s) clf.fit(X_train, y_train) yhat = clf.predict(X_test) fscore = f1(y_test, yhat) scanscores.append(fscore) print(s, fscore) # Append mean score scores.append(np.mean(scanscores)) print scores # Get the index of the best score s_ind = np.argmax(scores) s = scan[s_ind] print scores print s return s
def test_classifiers(X,y,n=7,rname="results.txt"): clfs={ # "Bagging KNN": [BaggingClassifier(KNeighborsClassifier(),max_samples=0.5, max_features=0.5),[],[],[],[]], "NN (kNN k=1)": [KNeighborsClassifier(n_neighbors=1),[],[],[],[],[]], #"NN (kNN k=3)": [KNeighborsClassifier(n_neighbors=3),[],[],[],[],[]], "NN (kNN k=3 w)": [KNeighborsClassifier(n_neighbors=3, weights='distance'),[],[],[],[],[]], "NN (kNN k=5 w)": [KNeighborsClassifier(n_neighbors=5, weights='distance'),[],[],[],[],[]], #"NN (kNN k=7 w)": [KNeighborsClassifier(n_neighbors=7, weights='distance'),[],[],[],[]], #"SVM - Linear kernel": [svm.SVC(kernel="rbf",probability=True),[],[],[],[]], # "Naive Bayes": [GaussianNB(),[],[],[],[]], # "SVM Sigmoide": [svm.SVC(kernel="sigmoid"),[],[],[],[]], #"ANN":[MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1),[],[],[],[]], } #V=["Voting KNN",[None,[],[],[],[]]] skf=kfold(y, n_iter=n, random_state=None, train_size=0.7) output=open(rname,"w") for train,test in skf: Xt,Yt=X[train],y[train] Xv,Yv=X[test],y[test] votes=[] for (k,v) in clfs.items(): v[0].fit(Xt,Yt) #print(clfs[k]) Yr=v[0].predict(Xv) #print(accs(Yv,Yr)) v[1].append(accs(Yv,Yr)) v[2].append(f1(Yv,Yr,average="macro")) v[3].append(recall(Yv,Yr,average="macro")) v[4].append(precision(Yv,Yr)) v[5].append(kappa(Yv,Yr)) #votes.append(Yr) #Yp=predict(votes) for k,v in clfs.items(): fm="%s | %s| %s | %s | %s\n" output.write(fm %(k,"Accuracy",np.mean(v[1]),min(v[1]),max(v[1]))) #output.write(fm %(k,"Kappa",np.mean(v[5]),min(v[5]),max(v[5]))) output.write(fm %(k,"F1",np.mean(v[2]),min(v[2]),max(v[2]))) output.write(fm %(k,"Recall",np.mean(v[3]),min(v[3]),max(v[3]))) output.write(fm %(k,"Precision",np.mean(v[4]),min(v[4]),max(v[4])))
def present_results(y_test, predictions): results_list = [] for k, v in predictions.items(): inter_list = [ k, accuracy(v, y_test), precision(v, y_test), precision_top(v, y_test, 0.01), precision_top(v, y_test, 0.02), precision_top(v, y_test, 0.05), precision_top(v, y_test, 0.1), precision_top(v, y_test, 0.2), precision_top(v, y_test, 0.3), precision_top(v, y_test, 0.5), recall(v, y_test), recall_top(v, y_test, 0.01), recall_top(v, y_test, 0.02), recall_top(v, y_test, 0.05), recall_top(v, y_test, 0.1), recall_top(v, y_test, 0.2), recall_top(v, y_test, 0.3), recall_top(v, y_test, 0.5), f1(v, y_test) ] if k[:6] != 'd_tree': inter_list.append(roc_auc(v, y_test)) else: inter_list.append('ND') results_list.append(inter_list) df = pd.DataFrame(results_list) df.columns = [ 'Model', 'Accuracy', 'Precision', 'Precision top 1%', 'Precision top 2%', 'Precision top 5%', 'Precision top 10%', 'Precision top 20%', 'Precision top 30%', 'Precision top 50%', 'Recall', 'Recall top 1%', 'Recall top 2%', 'Recall top 5%', 'Recall top 10%', 'Recall top 20%', 'Recall top 30%', 'Recall top 50%', 'F 1', 'ROC AUC' ] return df
def prec_rec_f1score(y_true, x_test, model): bce = tf.keras.losses.BinaryCrossentropy() y_hat = model.predict(x_test) y_pred = (np.greater_equal(y_hat, 0.51)).astype(int) pr_re_f1score_perclass = precision_recall_fscore_support(y_true, y_pred, average=None) pr_re_f1score_average = precision_recall_fscore_support(y_true, y_pred, average='micro') precision = precision_score(y_true, y_pred, average=None) recall = recall_score(y_true, y_pred, average=None) accuracy = accuracy_score(y_true, y_pred) f1_score = f1(y_true, y_pred) #per class precision_true = pr_re_f1score_perclass[0][1] precision_fake = pr_re_f1score_perclass[0][0] recall_true = pr_re_f1score_perclass[1][1] recall_fake = pr_re_f1score_perclass[1][0] f1score_true = pr_re_f1score_perclass[2][1] f1score_fake = pr_re_f1score_perclass[2][0] metrices_name = [ 'accuracy', 'precision_true', 'precision_fake', 'recall_true', 'recall_fake', 'f1score_true', 'f1score_fake' ] metrices_value = [ accuracy, precision_true, precision_fake, recall_true, recall_fake, f1score_true, f1score_fake ] i = 0 for item in metrices_name: print(item + ':', metrices_value[i]) i += 1 binary_loss = bce(y_true, y_hat).numpy() print('Binary_loss', binary_loss) return accuracy, precision_true, precision_fake, recall_true, recall_fake, f1score_true, f1score_fake, binary_loss
def evaluate(encoder, k=10, seed=1234, evalcv=True, evaltest=False, use_feats=True, loc='./data/'): """ Run experiment k: number of CV folds test: whether to evaluate on test set """ print 'Preparing data...' traintext, testtext, labels = load_data(loc) print 'Computing training skipthoughts...' trainA = encoder.encode(traintext[0], verbose=False) trainB = encoder.encode(traintext[1], verbose=False) if evalcv: print 'Running cross-validation...' C = eval_kfold(trainA, trainB, traintext, labels[0], shuffle=True, k=10, seed=1234, use_feats=use_feats) if evaltest: if not evalcv: C = 4 # Best parameter found from CV (combine-skip with use_feats=True) print 'Computing testing skipthoughts...' testA = encoder.encode(testtext[0], verbose=False) testB = encoder.encode(testtext[1], verbose=False) if use_feats: train_features = np.c_[np.abs(trainA - trainB), trainA * trainB, feats(traintext[0], traintext[1])] test_features = np.c_[np.abs(testA - testB), testA * testB, feats(testtext[0], testtext[1])] else: train_features = np.c_[np.abs(trainA - trainB), trainA * trainB] test_features = np.c_[np.abs(testA - testB), testA * testB] print 'Evaluating...' clf = LogisticRegression(C=C) clf.fit(train_features, labels[0]) yhat = clf.predict(test_features) print 'Test accuracy: ' + str(clf.score(test_features, labels[1])) print 'Test F1: ' + str(f1(labels[1], yhat))
def compute_eval_stats(classifier, y_data, rankings, threshold): ''' Takes: classifier object, true target data, predicted score rankings, ranking threshold cutoff Returns: accuracy, precision, recall of predictions of classifier on x for y ''' predicted_test = np.where(rankings < threshold, 1, 0) # print(threshold) # print(predicted_test.sum()) # print(predicted_test[0:10]) # print("num unique ranks: ", pd.DataFrame(pred_scores)[0].unique().shape) # print("eval stats rankings are: ", rankings[0:10]) stats = [ accuracy(y_data, predicted_test), precision(y_data, predicted_test), recall(y_data, predicted_test), f1(y_data, predicted_test), roc(y_data, predicted_test) ] return stats
def train(self, data, dev, verbose=True, opter='adam', lr=0.01, epochs=100): trainloader = torch.utils.data.DataLoader(data, batch_size=5000) criterion = nn.CrossEntropyLoss() # create your optimizer if opter == 'adam': optimizer = optim.Adam(self.parameters(), lr=lr) elif opter == 'sgd': optimizer = optim.SGD(self.parameters(), lr=lr) for epoch in range(epochs): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = self.forward(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if verbose and (i % 1 == 0): # print every 2000 mini-batches ys, y_stars = self.get_eval_data(dev) print('[%d, %5d] loss: %.3f\tDev FI: %.3f' % (epoch + 1, i + 1, running_loss, f1(ys, y_stars))) running_loss = 0.0
def prec_rec_f1score(y_true, x_test, model, item): bce = tf.keras.metrics.BinaryCrossentropy() # print(model.summary() ) y_hat = model.predict(x_test) y_pred = (np.greater_equal(y_hat, 0.505)).astype(int) # for psuedo labelling and Vat technique # print(item+'********') if item == 'Psuedo_Label': # or item=='VAT_regularization': y_pred = tf.argmax(y_hat, 1).numpy() # y_hat= np.max(y_hat,axis=1)# this one for calculating binary loss # print(y_hat) # print(y_pred) pr_re_f1score_perclass = precision_recall_fscore_support(y_true, y_pred, average=None) pr_re_f1score_average = precision_recall_fscore_support(y_true, y_pred, average='micro') precision = precision_score(y_true, y_pred, average=None) recall = recall_score(y_true, y_pred, average=None) accuracy = accuracy_score(y_true, y_pred) f1_score = f1(y_true, y_pred) # per class precision_true = pr_re_f1score_perclass[0][1] precision_fake = pr_re_f1score_perclass[0][0] recall_true = pr_re_f1score_perclass[1][1] recall_fake = pr_re_f1score_perclass[1][0] f1score_true = pr_re_f1score_perclass[2][1] f1score_fake = pr_re_f1score_perclass[2][0] fpr_, tpr_, _ = roc_curve(y_true, y_pred) if item == 'Psuedo_Label': # or item == 'VAT_regularization' : y_true = tf.one_hot(y_true, 2).numpy() binary_loss = bce(y_true, y_hat).numpy() return accuracy, precision_true, precision_fake, recall_true, recall_fake, f1score_true, f1score_fake, binary_loss, fpr_, tpr_, y_pred
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("top_data_dir") parser.add_argument("--balanced","-bl",action="store_true") parser.add_argument('--method','-m',type=int,default=0,choices=range(6), help= """chose methods from: 0:linear_svc 1:logistic regression 2:naive bayes 3:decision tree 4:ExtraTreesClassifier 5:RandomForestClassifier """) args=parser.parse_args() print "load data from %s" %(args.top_data_dir) dataset = load_data(args.top_data_dir) clf = get_classifier(args.method) num_of_split = 10 skf = StratifiedKFold(n_splits=num_of_split,shuffle=True) # print dataset.X # print dataset.y f1_average = .0 f1_macro_average = .0 for training_index, test_index in skf.split(dataset.X, dataset.y): training_X = [] training_y = [] testing_X = [] testing_y = [] metrics = {} # print "%d training %d testing" %(len(training_index),len(test_index)) # print training_index # print test_index for i in training_index: training_X.append( dataset.X[i]) training_y.append( dataset.y[i]) for j in test_index: testing_X.append( dataset.X[j]) testing_y.append( dataset.y[j]) # print training_X # print testing_X clf.fit(training_X,training_y) predicted_y = clf.predict(testing_X) # print classification_report(testing_y, predicted_y) f1_macro_average += f1(testing_y, predicted_y,average="macro")/(1.0*num_of_split) f1_average += f1(testing_y, predicted_y)/(1.0*num_of_split) # f1_11_macro = f1(dataset_11.y, predicted_11,average="macro") # f1_11 = f1(dataset_11.y, predicted_11) # f1_average = (f1_1516+f1_11)/2.0 # f1_macro_average = (f1_1516_macro+f1_11_macro)/2.0 print "Positive f1: %f" %(f1_average) print "Average f1: %f" %(f1_macro_average) print "-"*20
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--balanced","-bl",action="store_true") parser.add_argument('--method','-m',type=int,default=0,choices=range(6), help= """chose methods from: 0:linear_svc 1:logistic regression 2:naive bayes 3:decision tree 4:ExtraTreesClassifier 5:RandomForestClassifier """) parser.add_argument("--use_result","-ur",action="store_true") parser.add_argument("--feature_size","-fs",default=12,type=int) parser.add_argument("--top_dest_dir","-td",default="/infolab/headnode2/lukuang/2016-rts/code/my_code/post_analysis/predictor_analysis/sday_prediction_data") parser.add_argument("--predictor_data_dir","-pd",default="/infolab/headnode2/lukuang/2016-rts/code/my_code/post_analysis/predictor_analysis/predictor_data") parser.add_argument("--result_expansion","-re",choices=list(map(int, Expansion)),default=0,type=int, help=""" Choose the expansion: 0:raw 1:static: 2:dynamic """) parser.add_argument("--retrieval_method","-rm",choices=list(map(int, RetrievalMethod)),default=0,type=int, help=""" Choose the retrieval method: 0:f2exp 1:dirichlet 2:pivoted 3:bm25 """) parser.add_argument("--designate_dest_dir","-dr") args=parser.parse_args() args.result_expansion = Expansion(args.result_expansion) args.retrieval_method = RetrievalMethod(args.retrieval_method) # get single term queries single_term_queries = {} for year in Year: qrel_file = QREL_FILE[year] judged_qids = get_judged_qid(qrel_file) query_dir = Q_DIR[year][args.result_expansion] single_term_queries[year] = get_single_term_qids(query_dir,judged_qids) print single_term_queries feature_descrption_list = [ 'average_idf:raw', 'scq:raw', 'var:raw', 'max_pmi:raw', 'avg_pmi:raw', 'dev:raw', 'ndev:raw', 'nqc:raw', 'wig:raw', 'top_score:raw', 'clarity:raw', 'qf:raw'] sub_feature_lists = [] sub_feature_lists = itertools.combinations(feature_descrption_list, args.feature_size) # for length in range(len(feature_descrption_list)): # length += 1 # sub_feature_lists += itertools.combinations(feature_descrption_list, length) # best single term without result # sub_feature_lists = [ [ 'average_idf:raw', 'scq:raw', 'dev:raw', 'ndev:raw', 'nqc:raw', 'qf:raw' ]] # best single term with result sub_feature_lists = [ [ 'average_idf:raw', 'scq:raw', 'dev:raw' ]] max_average_f1 = -1 best_sub_features = [] for sub_feature_list in sub_feature_lists: data_preparor = DataPreparor( args.predictor_data_dir, sub_feature_list, args.use_result, args.result_expansion,args.top_dest_dir,args.retrieval_method, args.designate_dest_dir) data_preparor.prepare_data() dataset_11, dataset_1516 = load_data(data_preparor._dest_dir) clf = get_classifier(args.method) # print "cross validation:" # training_predicted = cross_validation.cross_val_predict(clf,training_dataset.X,training_dataset.y,cv=5) # print classification_report(training_dataset.y, training_predicted) # print "-"*20 # print "Test on 1516 data" # print "load data from %s" %(data_preparor._dest_dir) clf.fit(dataset_11.X,dataset_11.y) X_single_15, y_single_15 = prepare_single_term_query_data(dataset_1516, single_term_queries[Year.y2015],year.y2015 ) X_single_16, y_single_16 = prepare_single_term_query_data(dataset_1516, single_term_queries[Year.y2016],year.y2016 ) X_single_1516 = X_single_15 + X_single_16 y_single_1516 = y_single_16 + y_single_16 predicted_single_1516 = clf.predict(X_single_1516) # print classification_report(y_single_1516, predicted_single_1516) # print "Test on 11 data" clf.fit(dataset_1516.X,dataset_1516.y) X_single_11, y_single_11 = prepare_single_term_query_data(dataset_11, single_term_queries[Year.y2011],year.y2011 ) predicted_single_11 = clf.predict(X_single_11) # print y_single_11, predicted_single_11 # print classification_report(y_single_11, predicted_single_11) f1_1516 = f1(y_single_1516, predicted_single_1516) f1_11 = f1(y_single_11,predicted_single_11) f1_average = (f1_1516 + f1_11)/2.0 # print "Positive f1: %f" %(f1_average) if f1_average > max_average_f1: max_average_f1 = f1_average best_sub_features = sub_feature_list print sub_feature_list print f1_average print "-"*20 print "Best Average F1:%f" %(max_average_f1) print "Best Sub Features:" print best_sub_features
from sklearn.preprocessing import Imputer from sklearn.cross_validation import train_test_split from sklearn.metrics import f1_score as f1 from BagOfWords import BagOfWords from CatsTransformer import CatsTransformer from CatsLister import CatsLister from Word2VecFeature import Word2VecFeature df = pd.read_csv("../data/original.csv") X = df[["keyword", "category_products_and_services"]].values y = df["labels"].values X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5, random_state=1) missing_words = [] cl = CatsLister(column=1) ct = CatsTransformer(column=1) bof = BagOfWords(column=0) w2vf = Word2VecFeature(column=0, fname="../data/model", missing_words=missing_words) features = FeatureUnion([('ct', ct), ('bof', bof), ('w2vf', w2vf)]) clf = LogisticRegression() pipeline = Pipeline([('cl', cl), ('features', features), ('imp', Imputer(strategy="median")), ('clf', clf)]); pipeline.fit(X_train, y_train) y_pred = pipeline.predict(X_test) print "The f1 score is for the three classes are: %.3f, %.3f, and %.3f." % tuple(f1(y_test, y_pred, average=None)) print "Found %d missing words." % len(missing_words)
def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--tree_estimator_directory","-td",default="/infolab/node4/lukuang/2015-RTS/src/my_code/post_analysis/predictor_analysis/disk4-5/predictor_data/post/tree_estimator") parser.add_argument("--number_of_iterations","-ni",type=int,default=50) parser.add_argument("--error_threshold","-et",type=int,default=30) parser.add_argument("--silent_query_info_file","-sf",default="/infolab/node4/lukuang/2015-RTS/disk4-5/eval/silent_query_info") parser.add_argument("--retrieval_method","-rm",choices=list(map(int, RetrievalMethod)),default=0,type=int, help=""" Choose the retrieval method: 0:f2exp 1:dirichlet 2:pivoted 3:bm25 """) parser.add_argument("--use_auc","-ua",action="store_true") parser.add_argument("--title_only","-to",action="store_true") parser.add_argument("--metric_string","-ms",default="P_10") parser.add_argument("tree_store_dir") args=parser.parse_args() index_type = IndexType.processed eval_data = EvalData(index_type,args.metric_string) args.retrieval_method = RetrievalMethod(args.retrieval_method) result_dir = R_DIR[index_type][args.retrieval_method] print "result dir %s" %(result_dir) result_files = get_result_files(result_dir) query_data_file = os.path.join(args.tree_estimator_directory,index_type.name,args.retrieval_method.name) query_data_file = os.path.join(query_data_file,"data") print "get value pair %s" %(query_data_file) values = json.load(open(query_data_file)) all_metrics = {} for day in values: all_metrics[day] = eval_data.get_metric(result_files[day]) silent_query_info = json.load(open(args.silent_query_info_file)) # print all_metrics title_query_data = [] desc_query_data = [] query_data = [] silent_judgments = [] silent_days = {} day = "10" for qid in values.values()[0].keys(): # m = re.search("^(\d+)_",qid) # if m: # q_num = int(m.group(1)) # if q_num > 650: # continue # else: # raise RuntimeError("Mal qid format %s" %(qid)) day_qid = "10_%s" %(qid) # print day_qid # print results[day] if args.title_only: if "title" not in qid: continue if qid in all_metrics[day]: day_query_metric = all_metrics[day][qid] m = re.search("^(\d+)_",qid) if m: q_num = m.group(1) else: raise RuntimeError("Mal qid format %s" %(qid)) if q_num in silent_query_info : silent_days[day_qid] = 1 else: silent_days[day_qid] = 0 else: print "%s query has no metric!" %(qid) day_query_metric = .0 silent_days[day_qid] = 1 single_data = {} single_data["day_qid"] = day_qid single_data["metric"] = day_query_metric single_data["values"] = values[day][qid] if "title" in qid: title_query_data.append(single_data) else: desc_query_data.append(single_data) query_data.append(single_data) silent_judgments.append( silent_days[day_qid] ) title_tree = load_tree(args.tree_store_dir,QueryPart.title,args.retrieval_method,args.metric_string) title_predicted = title_tree.output_result(title_query_data) if not args.title_only: desc_tree = load_tree(args.tree_store_dir,QueryPart.desc,args.retrieval_method,args.metric_string) desc_predicted = desc_tree.output_result(desc_query_data) # print "There are %d queries" %(len(query_data)) # print "%d of them are silent" %(sum(silent_judgments)) print "There are %d samples" %(len(query_data)) # print thresholds num_of_split = 10 f1_macro_average = .0 f1_average = .0 skf = StratifiedKFold(n_splits=num_of_split,shuffle=True) for training_index, test_index in skf.split(query_data, silent_judgments): all_training_data = [] training_title_query_data = [] training_desc_query_data = [] # print "%d training %d testing" %(len(training_index),len(test_index)) for i in training_index: single_data = deepcopy(query_data[i]) day_qid = single_data["day_qid"] all_training_data.append(single_data ) if "title" in day_qid: training_title_query_data.append(single_data) else: if not args.title_only: training_desc_query_data.append(single_data) train_title_predicted = title_tree.output_result(training_title_query_data) if not args.title_only: train_desc_predicted = desc_tree.output_result(training_desc_query_data) else: train_desc_predicted = {0:0} thresholds = get_threshold(train_title_predicted.values(),train_desc_predicted.values(),args.title_only) best_tree_threshold = {} best_f1_score = -1000 best_f1_threshold = .0 for threshold in thresholds: sub_training_data = [] training_pre_y_true = [] training_pre_y_score = [] for single_data in all_training_data: day_qid = single_data["day_qid"] if "title" in day_qid: if (title_predicted[day_qid] <= threshold["title"]): sub_training_data.append(single_data ) else: training_pre_y_score.append(1000) training_pre_y_true.append(silent_days[day_qid]) else: if not args.title_only: if (desc_predicted[day_qid] <= threshold["desc"]): sub_training_data.append(single_data) else: training_pre_y_score.append(1000) training_pre_y_true.append(silent_days[day_qid]) forest = Forest(sub_training_data,args.error_threshold,args.number_of_iterations) forest.start_training() training_predicted_values = forest.output_result(sub_training_data) training_y_true, training_y_score = make_score_prediction_lists(training_predicted_values,silent_days) training_y_true = training_pre_y_true + training_y_true training_y_score = training_pre_y_score + training_y_score threshold_best_f1_threshold,theshold_best_f1_score = get_best_f1_threshold(training_y_true, training_y_score) if theshold_best_f1_score > best_f1_score: best_tree_threshold = threshold best_f1_score = theshold_best_f1_score best_f1_threshold = threshold_best_f1_threshold print "best f1 threshold:%f, best f1 %f:" %(best_f1_threshold,best_f1_score) print best_tree_threshold testing_data = [] testing_pre_y_true = [] testing_pre_y_score = [] for j in test_index: single_data = deepcopy(query_data[j]) day_qid = single_data["day_qid"] if "title" in day_qid: if (title_predicted[day_qid] <= best_tree_threshold["title"]): testing_data.append(single_data ) else: testing_pre_y_score.append(1000) testing_pre_y_true.append(silent_days[day_qid]) else: if not args.title_only: if (desc_predicted[day_qid] <= best_tree_threshold["desc"]): testing_data.append(single_data ) else: testing_pre_y_score.append(1000) testing_pre_y_true.append(silent_days[day_qid]) # test_forest = Forest(testing_data,args.error_threshold,args.number_of_iterations) # test_forest.start_training() test_predicted_values = forest.output_result(testing_data) testing_y_true, testing_y_score = make_score_prediction_lists(test_predicted_values,silent_days) testing_y_true = testing_pre_y_true + testing_y_true testing_y_score = testing_pre_y_score + testing_y_score test_y_predict = [] for single_score in testing_y_score: if single_score < best_f1_threshold: test_y_predict.append(1) else: test_y_predict.append(0) f1_macro_average += f1(testing_y_true, test_y_predict,average="macro")/(1.0*num_of_split) f1_average += f1(testing_y_true, test_y_predict)/(1.0*num_of_split) print "Positive f1: %f" %(f1_average) print "Average f1: %f" %(f1_macro_average) print "-"*20