def __scoreMulitClass(self, Predictions: Array, Actual: list) -> list: MiMa = self.__getMacroAndMicroScore(Predictions, Actual) Score = [ MiMa[0], MiMa[1], F1(y_pred=Predictions, y_true=Actual, average='weighted') ] self.__makeFrameAndSave( 'f1.csv', [Score], ['macro', 'micro', 'weighted'], ) return Score
def evaluate_answer(self, session, data_x, data_y, dataset_name, sample=4000, log=False): """ Evaluate the model's performance using the harmonic mean of F1 and Exact Match (EM) with the set of true answer labels This step actually takes quite some time. So we can only sample 100 examples from either training or testing set. :param session: session should always be centrally managed in train.py :param dataset: a representation of our data, in some implementations, you can pass in multiple components (arguments) of one dataset to this function :param sample: how many examples in dataset we look at :param log: whether we print to std out stream :return: """ count = 0 random_indices = random.sample(xrange(len(data_x)), sample) sample_x = data_x[random_indices] sample_y = data_y[random_indices] if self.FLAGS.scoring == 'auc': yp = self.predict_proba(session, sample_x)[0] sample_y = np.eye(self.FLAGS.output_size)[ sample_y] #one liner for one-hot encoding score = AUC(sample_y, yp) elif self.FLAGS.scoring == 'f1_macro': yp = self.predict(session, sample_x) score = F1(sample_y, yp, average='macro') if log: logging.info("{} - Score: {}, for {} samples".format( dataset_name, score, sample)) return score
from sklearn.model_selection import StratifiedKFold as SKF from sklearn.model_selection import cross_val_score as CVS model = SVC(kernel='rbf', C=13, gamma=0.325) folds = 5 start = T() cross_val = SKF(n_splits=folds, shuffle=True, random_state=4) scores = CVS(model, X, Y, scoring='accuracy', cv=cross_val) end = T() accuracy = scores.mean() * 100 print(f"SVC has mean accuracy of {accuracy:.3f}%\n" + f"Cross Validation took {(end-start)*1000:.3f}ms") # ### Calculate F1-Score of the model # In[13]: from sklearn.metrics import f1_score as F1 f1score = F1(Y_test, pred, average='weighted') print(f"SVC has F1-Score = {f1score * 100:.3f}%") # ### Plot Confusion Matrix # In[14]: from sklearn.metrics import plot_confusion_matrix as PCM PCM(clf, X_test, Y_test)
'XGB__subsample': [0.5, 0.6, 0.7], 'XGB__colsample_bytree': [0.5, 0.6, 0.7] } model = RSC(pipe, RSCparameter, scoring='f1', n_iter=60, cv=5, return_train_score=True) model.fit(xtrain, ytrain) y_val_pred = model.predict(xval).astype(int) print('CV Train F1: %s' % (max(model.cv_results_['mean_train_score']))) print('CV Validation F1: %s' % (max(model.cv_results_['mean_test_score']))) print('Validation F1: %s' % (F1(yval, y_val_pred))) parameter = [] for x, y in model.best_params_.items(): new = ''.join(" '{}' : {}".format(x.rpartition('__')[-1], y)) parameter.append(new) parameter = ', '.join(parameter) print(parameter) xtest = test.drop(columns=['Survived']) prediction = model.predict(xtest).astype(int) sub = pd.DataFrame({'PassengerId': test_id, 'Survived': prediction}) sub.to_csv('submission_1.csv', index=False) a = model.best_estimator_.named_steps['XGB'].feature_importances_ print(a)
def __getMacroAndMicroScore(self, Predicted: Array, Actual: list) -> tuple: return (F1(y_pred=Predicted, y_true=Actual, average='macro'), F1(y_pred=Predicted, y_true=Actual, average='micro'))
def main(args, mode="divide", emb=None): # data_path train_file = "data/" + args.dataset + "_train.txt" test_file = "data/" + args.dataset + "_test.txt" if mode == "divide": emb_file = "embedding/" + args.emb_file_name + ".pickle" # load train edges train_edges = [] with open(train_file, "r") as f: line = f.readline() while line: train_edges.append(list(map(int, line.split("\n")[0].split(",")))) line = f.readline() # load test edges test_edges = [] with open(test_file, "r") as f: line = f.readline() while line: test_edges.append(list(map(int, line.split("\n")[0].split(",")))) line = f.readline() # load embeddng if mode == "divide": with open(emb_file, "br") as f: emb = pickle.load(f) # preprocessing for train edges positive_train_edges = [edge for edge in train_edges if edge[2] == 1] negative_train_edges = [[edge[0], edge[1], 0] for edge in train_edges if edge[2] == -1] sample_size = min([ len(positive_train_edges), int(len(negative_train_edges) * float(args.ratio)) ]) sampled_edges = random.sample(positive_train_edges, sample_size) + negative_train_edges random.shuffle(sampled_edges) x_train = np.array([ np.concatenate((emb[edge[0]], emb[edge[1]])) for edge in sampled_edges ]) y_train = [edge[2] for edge in sampled_edges] x_valid = np.array( [np.concatenate((emb[edge[0]], emb[edge[1]])) for edge in test_edges]) y_valid = [1 if edge[2] == 1 else 0 for edge in test_edges] # train logisitic regression clf = LR().fit(x_train, y_train) # calc each metric and output log if mode == "divide": try: log_name = "embedding/" + args.emb_file_name + "_emb_log.txt" with open(log_name, "r") as f: log = f.read() except FileNotFoundError as e: log = "No embedding log\n" auc = roc_auc_score(y_valid, clf.predict_proba(x_valid)[:, 1]) print("auc", auc) y_ = clf.predict(x_valid) f1 = F1(y_valid, y_) print("F1", f1) macro = F1(y_valid, y_, labels=[0, 1], average="macro") print("macro F1", macro) if mode == "divide": if args.write_log: now = datetime.datetime.now() now_time = now.strftime("%Y%m%d%H:%M:%S") log_name = "log/" + now_time + "_relation_log.txt" with open(log_name, "w") as f: f.write("score\n") f.write("AUC:" + str(auc) + "\n") f.write("f1:" + str(f1) + "\n") f.write("macro f1:" + str(macro) + "\n") f.write("emb info\n") f.write(log) return auc, f1, macro
def main(args): # data path train_file = "data/" + args.dataset + "_train.txt" test_file = "data/" + args.dataset + "_test.txt" emb_file = "embedding/" + args.emb_file_name + ".pickle" nodes = set() # load train edges train_edges = [] with open(train_file, "r") as f: line = f.readline() while line: line = list(map(int, line.split("\n")[0].split(","))) train_edges.append(line) nodes.add(line[0]) nodes.add(line[1]) line = f.readline() # load test edges test_edges = [] with open(test_file, "r") as f: line = f.readline() while line: line = list(map(int, line.split("\n")[0].split(","))) test_edges.append(line) nodes.add(line[0]) nodes.add(line[1]) line = f.readline() n = len(nodes) # load embedding with open(emb_file, "br") as f: emb = pickle.load(f) # make grpah G = nx.DiGraph() G.add_edges_from([[edge[0], edge[1]] for edge in train_edges]) G.add_edges_from([[edge[0], edge[1]] for edge in test_edges]) # preprocess test edge test_negative_edges = [[edge[0], edge[1], 0] for edge in test_edges if edge[2] == -1] test_positive_edges = [[edge[0], edge[1], 1] for edge in test_edges if edge[2] == 1] # sample unexisting edges neg_edges = [] while len(neg_edges) < len(test_negative_edges): edge = [np.random.randint(0, n - 1), np.random.randint(0, n - 1)] if G.has_edge(edge[0], edge[1]): continue else: neg_edges.append([edge[0], edge[1], 2]) test_edges = test_negative_edges + test_positive_edges + neg_edges # preprocess train edge train_negative_edges = [[edge[0], edge[1], 0] for edge in train_edges if edge[2] == -1] # sample train edge (same number of negative edge) train_positive_edges = random.sample( [edge for edge in train_edges if edge[2] == 1], len(train_negative_edges)) # sample unexisting edges neg_edges = [] while len(neg_edges) < len(train_negative_edges): edge = [np.random.randint(0, n - 1), np.random.randint(0, n - 1)] if G.has_edge(edge[0], edge[1]): continue else: neg_edges.append([edge[0], edge[1], 2]) sampled_edges = train_negative_edges + train_positive_edges + neg_edges random.shuffle(sampled_edges) # make train and test x_train = np.array([ np.concatenate((emb[edge[0]], emb[edge[1]])) for edge in sampled_edges ]) y_train = [edge[2] for edge in sampled_edges] x_valid = np.array( [np.concatenate((emb[edge[0]], emb[edge[1]])) for edge in test_edges]) y_valid = [edge[2] for edge in test_edges] # train logisitic regression clf = LR(multi_class="ovr").fit(x_train, y_train) # calc each metric and output log y_ = clf.predict(x_valid) if args.read_log: log_name = "embedding/" + args.emb_file_name + "_emb_log.txt" with open(log_name, "r") as f: log = f.read() else: log = "No log" macro = F1(y_valid, y_, labels=[0, 1, 2], average="macro") print("macro f1", macro) micro = F1(y_valid, y_, labels=[0, 1, 2], average="micro") print("micro f1", micro) if args.write_log: now = datetime.datetime.now() now_time = now.strftime("%Y%m%d%H:%M:%S") log_name = "log/" + now_time + "_link_prediction_log.txt" with open(log_name, "w") as f: f.write("score\n") f.write("micro f1:" + str(micro) + "\n") f.write("macro f1:" + str(macro) + "\n") f.write("emb info\n") f.write(log) return macro, micro
def evaluation(y_test, y_pred): print("test precision:", PS(amax(y_test),amax(y_pred), labels=range(10), average="macro")) print("test recall:", RS(amax(y_test),amax(y_pred), labels=range(10), average="macro")) print("test f1 score:", F1(amax(y_test),amax(y_pred), labels=range(10), average="macro"))
completenessList.append(completeness) purityList.append(purity) threshList.append(thresh) # Makes predictions based on the calculated threshold y_predicted = getPredictionsForThresh(y_probs, thresh) # Calculates completeness-purity curve for these predictions completenessCurve, purityCurve, threshCurve = getCompletenessPurityCurve( y_probs, y_test, step=False) completenessCurves.append(completenessCurve) purityCurves.append(purityCurve) threshCurves.append(threshCurve) # Stats! f1AllSN = F1(y_test, [1] * len(y_test)) f1Actual = F1(y_test, y_predicted) f1Inverse = F1(y_test, y_predicted, pos_label=0) classTable = pd.crosstab(np.asarray(y_test), np.asarray(y_predicted), rownames=['Actual'], colnames=['Pred']) print("----- Dataset:", trainingFile, "-----") print("Goal purity: ", asPct(purityGoal)) print("Achieved purity: ", asPct(purity)) print("Achieved completeness: ", asPct(completeness)) print("Threshold: ", asPct(thresh)) print("F1 (all SN): ", asPct(f1AllSN)) print("F1 (actual): ", asPct(f1Actual)) print("F1 (inverse):", asPct(f1Inverse)) print(classTable)
logit, loss = classifier(output, label) pred = torch.argmax(torch.softmax(logit, dim=1), dim=1).data.cpu().numpy() label = label.data.cpu().numpy() preds = np.concatenate((pred, preds)) labels = np.concatenate((label, labels)) loss = loss.mean() valid_loss += loss.item() print(len(preds[preds == 1]), len(labels[labels == 1])) acc = ACC(preds, labels) pre = P(preds, labels) rec = R(preds, labels) f1 = F1(preds, labels) print( 'acc:{:.4f}, precision:{:.4f}, recall:{:.4f}, f1:{:.4f}, train_loss:{:.4f}, valid_loss:{:.4f}' .format(acc, pre, rec, f1, total_loss / len(train_dataloader), valid_loss / len(valid_dataloader))) model.eval() classifier.eval() with torch.no_grad(): preds, ids = [], [] for i, batch in enumerate(test_dataloader): data, mask = tensorized(batch[:, 0], vocab) id = np.array(list(batch[:, 1])) data, mask = data.to(DEVICE), mask.to(DEVICE) output = model(data, mask) logit, loss = classifier(output)