def evaluate_test_set(session, tags, preds, fnames, lines, batch_limit=None): batch_num = 0 num_sequences = 0 p_tp_total, p_fp_total, r_tp_total, r_fn_total = 0, 0, 0, 0 p_tp_total_binary, p_fp_total_binary, r_tp_total_binary, r_fn_total_binary = 0, 0, 0, 0 while True: try: #Train binary, eval binary setting y, y_, filenames, line_nums = \ session.run([tags, preds, fnames, lines]) p_tp, p_fp = metrics.precision(reader, y, y_, counts=True) r_tp, r_fn = metrics.recall(reader, y, y_, counts=True) p_tp_total += p_tp p_fp_total += p_fp r_tp_total += r_tp r_fn_total += r_fn #Train All tags, eval binary setting p_tp_binary, p_fp_binary = metrics.precision(reader, y, y_, binary=True, counts=True) r_tp_binary, r_fn_binary = metrics.recall(reader, y, y_, binary=True , counts=True) p_tp_total_binary += p_tp_binary p_fp_total_binary += p_fp_binary r_tp_total_binary += r_tp_binary r_fn_total_binary += r_fn_binary #TODO: Train binary, eval binary setting num_sequences += len(y) batch_num += 1 if batch_num == batch_limit: break except tf.errors.OutOfRangeError: print 'test queue is empty' break if p_tp_total: precision = p_tp_total / (p_tp_total + p_fp_total) recall = r_tp_total / (r_tp_total + r_fn_total) f1 = metrics.f1(precision, recall) precision_binary = p_tp_total_binary / (p_tp_total_binary + p_fp_total_binary) recall_binary = r_tp_total_binary / (r_tp_total_binary + r_fn_total_binary) f1_binary = metrics.f1(precision_binary, recall_binary) print 'Evaluated {} sequences from test set'.format(num_sequences) print 'Precision: ', precision print 'Recall: ', recall print 'f1: ', f1 print 'Precision Binary: ', precision_binary print 'Recall Binary: ', recall_binary print 'f1 Binary: ', f1_binary
def compute_all_metrics(execution_id, path_input, path_output, formula, append): from metrics import accuracy, precision, recall, f1, specificity """ Computes all metrics and persistes in a csv Args: execution_id (int): identifier of the execution path_input (string): path of the file that contains the classifications path_out (string): path of the file that will persist the metrics formula (string): mean_max | mean_mean append (boolean): true | false """ # loading results with open(path_input) as data_file: data = json.load(data_file) # computing metrics tp = tn = fp = fn = 0 for i in range(0, len(data)): if (data[i]['values'][formula]['positive'] >= data[i]['values'][formula]['negative']): if data[i]['values']['label'] == 'positive': tp += 1 else: fp += 1 elif (data[i]['values'][formula]['positive'] < data[i]['values'][formula]['negative']): if (data[i]['values']['label'] == 'negative'): tn += 1 else: fn += 1 else: raise Exception( "Positive similarity equals to negative similarity to news " + data[i]['id']) accuracy = accuracy(tp, tn, fp, fn) recall = recall(tp, fn) precision = precision(tp, fp) f1 = f1(precision, recall) specificity = specificity(tn, fp) # persiting the results with open(path_output, 'a' if append else 'w') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',') if (not append): spamwriter.writerow([ 'execution_id', 'tp', 'tn', 'fp', 'fn', 'accuracy', 'precision', 'recall', 'f1', 'specificity' ]) spamwriter.writerow([ execution_id, tp, tn, fp, fn, accuracy, precision, recall, f1, specificity ])
def test_1(self): actual = [1, 1, 0, 1, 1, 1, 0, 0, 1, 1] predicted = [0, 1, 0, 1, 0, 1, 0, 1, 0, 0] tp, fn, fp, tn = metrics.confusion_matrix(actual, predicted) self.assertEqual(tp, 3) self.assertEqual(fn, 4) self.assertEqual(fp, 1) self.assertEqual(tn, 2) self.assertEqual(metrics.accuracy(actual, predicted), 0.5) self.assertEqual(metrics.precision(actual, predicted), 3/4) self.assertEqual(metrics.recall(actual, predicted), 3/7) self.assertEqual(metrics.f1(actual, predicted), 6/11)
def compute_all_metrics(execution_id, path_input, path_output, formula, append): from metrics import accuracy, precision, recall, f1, specificity """ Computes all metrics and persistes in a csv Args: execution_id (int): identifier of the execution path_input (string): path of the file that contains the classifications path_out (string): path of the file that will persist the metrics formula (string): mean_max | mean_mean append (boolean): true | false """ # loading results with open(path_input) as data_file: data = json.load(data_file) # computing metrics tp = tn = fp = fn = 0 for i in range(0, len(data)): if (data[i]['values'][formula]['positive'] >= data[i]['values'][formula]['negative']): if data[i]['values']['label'] == 'positive': tp += 1 else: fp += 1 elif (data[i]['values'][formula]['positive'] < data[i]['values'][formula]['negative']): if (data[i]['values']['label'] == 'negative'): tn += 1 else: fn += 1 else: raise Exception("Positive similarity equals to negative similarity to news " + data[i]['id']) accuracy = accuracy(tp, tn, fp, fn) recall = recall(tp, fn) precision = precision(tp, fp) f1 = f1(precision, recall); specificity = specificity(tn, fp); # persiting the results with open(path_output, 'a' if append else 'w') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',') if (not append): spamwriter.writerow( ['execution_id', 'tp', 'tn', 'fp', 'fn', 'accuracy', 'precision', 'recall', 'f1', 'specificity']) spamwriter.writerow([execution_id, tp, tn, fp, fn, accuracy, precision, recall, f1, specificity])
def evaluate(model, data_loader, weights, suppress_output=True): """Evaluation routine""" device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') weights = weights.to(device) model.to(device) cross_entropy_loss = torch.nn.CrossEntropyLoss(weights) total_loss, num_steps = 0, 0 model.eval() all_labels = [] all_predictions = [] for batch in data_loader: batch = tuple(t.to(device) for t in batch) batch_inputs, batch_mask, batch_labels = batch batch_mask = batch_mask.repeat(cfg.getint('model', 'num_heads'), 1, 1) with torch.no_grad(): logits = model(batch_inputs, batch_mask) loss = cross_entropy_loss(logits, batch_labels) batch_logits = logits.detach().cpu().numpy() batch_labels = batch_labels.to('cpu').numpy() batch_preds = np.argmax(batch_logits, axis=1) all_labels.extend(batch_labels.tolist()) all_predictions.extend(batch_preds.tolist()) total_loss += loss.item() num_steps += 1 f1 = metrics.f1(all_labels, all_predictions, reldata.int2label, reldata.label2int, suppress_output) return total_loss / num_steps, f1
def metric_fn(label_ids, predict, num_labels, answer_num): mask = tf.sequence_mask(answer_num, FLAGS.max_answer_num) precision = metrics.precision(label_ids, predict, num_classes=num_labels, weights=mask, pos_indices=[1]) recall = metrics.recall(label_ids, predict, num_classes=num_labels, weights=mask, pos_indices=[1]) f1_score = metrics.f1(label_ids, predict, num_classes=num_labels, weights=mask, pos_indices=[1]) return { "precision": precision, "recall": recall, "f1_score": f1_score }
def evaluate_test_set_binary(session, tags, preds, fnames, lines, batch_limit=None): batch_num = 0 num_sequences = 0 p_tp_total, p_fp_total, r_tp_total, r_fn_total = 0, 0, 0, 0 p_tp_total_binary, p_fp_total_binary, r_tp_total_binary, r_fn_total_binary = 0, 0, 0, 0 while True: try: #Train binary, eval binary y, y_ = session.run([test_binary_tags, test_preds]) p_tp, p_fp = metrics.precision_binary(reader, y, y_, counts=True) r_tp, r_fn = metrics.recall_binary(reader, y, y_, counts=True) p_tp_total += p_tp p_fp_total += p_fp r_tp_total += r_tp r_fn_total += r_fn num_sequences += len(y) batch_num += 1 if batch_num == batch_limit: break except tf.errors.OutOfRangeError: print 'test queue is empty' break if p_tp_total: precision = p_tp_total / (p_tp_total + p_fp_total) recall = r_tp_total / (r_tp_total + r_fn_total) f1 = metrics.f1(precision, recall) print 'Evaluated {} sequences from test set'.format(num_sequences) print 'Precision: ', precision print 'Recall: ', recall print 'f1: ', f1
if warm_start_init_step != 0: ckpt_file = 'checkpoints/{}-{}'.format(model_name, warm_start_init_step) saver.restore(session, ckpt_file) for step_num in range(training_steps): _, batch_loss, filenames, line_nums = \ session.run([step, loss, fnames, lines]) # logging to stdout for sanity checks every 50 steps if step_num % 50 == 0: x, y, y_ = session.run([tokens, binary_tags, preds]) precision = metrics.precision_binary(reader, y, y_) recall = metrics.recall_binary(reader, y, y_) f1 = metrics.f1(precision, recall) # print some info about the batch print 'Loss: ', batch_loss print 'Precision: ', precision print 'Recall: ', recall print 'f1: ', f1 print 'Sentence: ', reader.decode_tokens(x[0, :15]) print 'Truth: ', y[0, :15] print 'Pred: ', y_[0, :15] print # write train accuracy to log files every 100 steps if step_num % 100 == 0: train_loss = 0 train_eval_size = 50
def main(): train_path = sys.argv[1] + '\\train\\' test_path = sys.argv[1] + '\\test\\' # load training data print(f'[INFO] - Loading training data from {train_path}') res = read_data(train_path) train_data = res[0] train_target = res[1] print(f'[INFO] - Total train data: {len(train_data)}') print(f'[INFO] - Loading testing data from {test_path}') res = read_data(test_path) test_data = res[0] test_target = res[1] print(f'[INFO] - Total test data: {len(test_data)}') # 10% of training data will go to developer data set print(f'[INFO] - Splitting training data into training data and developer data (keeping 10% for training data)') res = train_test_split(train_data, train_target, test_size=0.1) train_data = res[0] train_target = res[2] print(f'[INFO] - Total training data after split {len(train_data)}') dev_data = res[1] dev_target = res[3] print(f'[INFO] - Total developer data {len(dev_data)}') rf = RandomForest(100, 10) accuracy_train = [] accuracy_test = [] counter = 1 for train_size in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]: print(f'\n[INFO] - Iteration No.{counter} (using {int(train_size*100)}% of 90% of train data).') if train_size != 1.0: res = train_test_split(train_data, train_target, train_size=train_size, shuffle=False) fold_data = res[0] fold_target = res[2] else: fold_data = train_data fold_target = train_target feature_size = 100 vocabulary = frequent_features(train_data, feature_size) print(f'[INFO] - Fitting Random forest classifier using', feature_size, ' features...') rf.fit(fold_data, fold_target, vocabulary) print(f'[INFO] - Predicting with Random Forest classifier using train data...') rf_targets, _ = rf.predict(fold_data, vocabulary) accuracy_score = accuracy(fold_target, rf_targets) accuracy_train.append(accuracy_score) print(f'[INFO] - Accuracy: {accuracy_score}') print(f'[INFO] - Predicting with Random Forest classifier using developer data...') rf_targets, _ = rf.predict(dev_data, vocabulary) accuracy_score = accuracy(dev_target, rf_targets) print(f'[INFO] - Accuracy: {accuracy_score}') print(f'[INFO] - Predicting with Random Forest classifier using test data...') rf_targets, probabilities = rf.predict(test_data, vocabulary) accuracy_score = accuracy(test_target, rf_targets) accuracy_test.append(accuracy_score) print(f'[INFO] - Accuracy: {accuracy_score}') counter += 1 learning_curves_plot = plt.figure(1) plt.plot([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], accuracy_train, label='train') plt.plot([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], accuracy_test, label='test') plt.title('Learning Curves (Multinomial Naive Bayes)') plt.legend(loc='lower right') plt.xlabel('Number of Train Data') plt.ylabel('Accuracy') precision_recall_plot = plt.figure(2) average_precision, average_recall, thresholds = precision_recall(probabilities, test_target, 10) plt.step(average_recall, average_precision, where='post') plt.title('Precision-Recall Curve (Multinomial Naive Bayes)') plt.xlabel('Recall') plt.ylabel('Precision') f1_plot = plt.figure(3) f1_score = f1(average_precision, average_recall) plt.plot(thresholds, f1_score) plt.title('F1 Curve (Multinomial Naive Bayes)') plt.xlabel('Thresholds') plt.ylabel('F1 Measure') plt.show()
def metric_fn(per_example_loss, label_ids, logits, is_real_example): predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) eval_input_dict = { 'labels': label_ids, 'predictions': predictions, 'weights': is_real_example } accuracy = tf.metrics.accuracy(**eval_input_dict) loss = tf.metrics.mean(values=per_example_loss, weights=is_real_example) ################################### # precision,recall, f1 score # ################################### precision = metrics.precision(label_ids, predictions, 20, average="macro") recall = metrics.recall(label_ids, predictions, 20, average="macro") f = metrics.f1(label_ids, predictions, 20, average="macro") ################################### # confusion matrix # ################################### def eval_confusion_matrix(labels, predictions, num_classes): with tf.variable_scope("eval_confusion_matrix"): con_matrix = tf.confusion_matrix( labels=labels, predictions=predictions, num_classes=num_classes) con_matrix_sum = tf.Variable( tf.zeros(shape=(num_classes, num_classes), dtype=tf.int32), trainable=False, name="confusion_matrix_result", collections=[tf.GraphKeys.LOCAL_VARIABLES]) update_op = tf.assign_add(con_matrix_sum, con_matrix) return tf.convert_to_tensor(con_matrix_sum), update_op return { 'eval_accuracy': accuracy, 'eval_loss': loss, "eval_precision": precision, "eval_recall": recall, "eval_f": f, "conf_mat": eval_confusion_matrix(label_ids, predictions, num_classes=20) }
def test_regression(model, test_features, test_labels): with torch.no_grad(): model.eval() return f1(model(test_features), test_labels)
y_test = pd.concat(test_lab, axis=0).reset_index() y_test = y_test[['id','segment_id','prediction_topic']] X_test = pd.concat(test_feat, axis=0).reset_index() y_traindevel = np.concatenate((y_train, y_devel)) X_traindevel = np.concatenate((X_train, X_devel)) print('\n Begin training SVM... (may take a while)') uar_scores,fone_scores = [],[] for comp in complexities: print('\nComplexity {0:.6f}'.format(comp)) clf = svm.LinearSVC(C=comp, random_state=0) clf.fit(X_train, y_train) y_pred = clf.predict(X_devel) uar_scores.append(metric.uar(y_devel, y_pred)) fone_scores.append(metric.f1(y_devel, y_pred)) print('UAR on Devel {0:.3f}'.format(uar_scores[-1])) print('F1 on Devel {0:.3f}'.format(fone_scores[-1])) optimum_complexity = complexities[np.argmax(uar_scores)] uar = np.max(uar_scores) f1 = np.max(fone_scores) print('\nOptimum complexity: {0:.6f}, maximum UAR: {1:.3f}, F1: {2:.3f}'.format(optimum_complexity, uar, f1)) print('Devel Combined Score: {0:.3f} '.format(metric.combined_task2(f1,uar))) print('\nCalculating Test Predictions') clf = svm.LinearSVC(C=optimum_complexity, random_state=0) clf.fit(X_traindevel, y_traindevel) if label == 'arousal':
Y_pred, roc_preds = Naive.image_classifer(X_test) roc_nb = metrics.ROC(roc_preds, Y_test) metrics.gphs(roc_nb, 'Naive Bayes') stop_time = time.process_time() stop_time_nb = time.process_time() print( 'Y Predictions Calculated finished {:.2f} \nTotal elapsed time for Naive Bayes {:.2f}' .format(stop_time - start_time, stop_time_nb - start_time_nb)) f.write( 'Y Predictions Calculated finished {:.2f} \nTotal elapsed time for Naive Bayes {:.2f}\n' .format(stop_time - start_time, stop_time_nb - start_time_nb)) cfm = metrics.confusionmatrix(Y_test, Y_pred) nb_accuracy = metrics.accuracy(cfm[0], cfm[1], cfm[2], cfm[3]) nb_precision = metrics.precision(cfm[0], cfm[1]) nb_recall = metrics.recall(cfm[0], cfm[3]) nb_f1 = metrics.f1(nb_precision, nb_recall) np_TPR = cfm[0] / val_dict[1] np_FPR = cfm[1] / val_dict[0] print('Naive Bayes Accuracy {:.2f}'.format(nb_accuracy)) print('Naive Bayes Precision {:.2f}'.format(nb_precision)) print('Naive Bayes Recall {:.2f}'.format(nb_recall)) print('Naive Bayes F1 {:.2f}'.format(nb_f1)) print( 'Naive Bayes True positives:{} Out of {} positives TPR:{:.2f}'.format( cfm[0], val_dict[1], np_TPR)) print('Naive Bayes False Positives:{} Out of {} negatives FPR:{:.2f}\n'. format(cfm[1], val_dict[0], np_FPR)) f.write('Naive Bayes Accuracy {:.2f}\n'.format(nb_accuracy)) f.write('Naive Bayes Precision {:.2f}\n'.format(nb_precision)) f.write('Naive Bayes Recall {:.2f}\n'.format(nb_recall))
def model_fn(features, labels, mode, params): # For serving features are a bit different if isinstance(features, dict): features = ((features['words'], features['nwords']), (features['chars'], features['nchars'])) # Read vocabs and inputs (words, nwords), (chars, nchars) = features dropout = params['dropout'] training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( params['words'], num_oov_buckets=params['num_oov_buckets']) vocab_chars = tf.contrib.lookup.index_table_from_file( params['chars'], num_oov_buckets=params['num_oov_buckets']) with Path(params['tags']).open() as f: indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] num_tags = len(indices) + 1 with Path(params['chars']).open() as f: num_chars = sum(1 for _ in f) + params['num_oov_buckets'] # Char Embeddings char_ids = vocab_chars.lookup(chars) variable = tf.get_variable('chars_embeddings', [num_chars, params['dim_chars']], tf.float32) char_embeddings = tf.nn.embedding_lookup(variable, char_ids) char_embeddings = tf.layers.dropout(char_embeddings, rate=dropout, training=training) # Char LSTM dim_words = tf.shape(char_embeddings)[1] dim_chars = tf.shape(char_embeddings)[2] flat = tf.reshape(char_embeddings, [-1, dim_chars, params['dim_chars']]) t = tf.transpose(flat, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) _, (_, output_fw) = lstm_cell_fw(t, dtype=tf.float32, sequence_length=tf.reshape(nchars, [-1])) _, (_, output_bw) = lstm_cell_bw(t, dtype=tf.float32, sequence_length=tf.reshape(nchars, [-1])) output = tf.concat([output_fw, output_bw], axis=-1) char_embeddings = tf.reshape(output, [-1, dim_words, 50]) # Word Embeddings word_ids = vocab_words.lookup(words) glove = np.load(params['glove'])['embeddings'] # np.array variable = np.vstack([glove, [[0.] * params['dim']]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) word_embeddings = tf.nn.embedding_lookup(variable, word_ids) # Concatenate Word and Char Embeddings embeddings = tf.concat([word_embeddings, char_embeddings], axis=-1) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) # Need time-major lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size']) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, num_tags) crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( params['tags']) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags']) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # Metrics weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: train_op = tf.train.AdamOptimizer().minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def run_(data, args, g, features, labels, train_mask,val_mask,test_mask, percent, criteria, fold, degree,n_epochs): #dir_, dataset, percent, criteria = 'test_plots/', 'test', '1','betweenness' #g,features,labels,train_mask,val_mask,test_mask = na.network_preprocess(G, temp_G, node_list, features, labels, args.dir_, args.dataset, percent, criteria) #print('train_mask:',sum(train_mask)) #print('val_mask:',sum(val_mask)) #print('test_mask:',sum(test_mask)) features = torch.FloatTensor(features) labels = torch.LongTensor(labels) train_mask = torch.ByteTensor(train_mask) val_mask = torch.ByteTensor(val_mask) test_mask = torch.ByteTensor(test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = g.number_of_edges() n_nodes = g.number_of_nodes() ############################################## ''' features = torch.FloatTensor(data.features) #***************** commented ****************** labels = torch.LongTensor(data.labels) #***************** commented ****************** train_mask = torch.ByteTensor(data.train_mask) #***************** commented ****************** val_mask = torch.ByteTensor(data.val_mask) #***************** commented ****************** test_mask = torch.ByteTensor(data.test_mask) #***************** commented ****************** in_feats = features.shape[1] #***************** commented ****************** n_classes = data.num_labels #***************** commented ****************** n_edges = data.graph.number_of_edges() #***************** commented ****************** n_nodes = data.graph.number_of_nodes() #***************** commented ****************** ''' print("""----Data statistics------' #Nodes %d #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_nodes,n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor ''' g = DGLGraph(data.graph) #***************** commented ****************** ''' n_edges = g.number_of_edges() # add self loop g.add_edges(g.nodes(), g.nodes()) ##@@ # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # create SGC model model = SGCLayer(g, in_feats, n_classes, K=degree) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc , output , preds = evaluate(model, features, labels, val_mask) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc , output , preds = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) micro, macro, fpr, tpr, threshold,roc_auc, acc_, prc_mac, prc_mic, prc_wei, tn, fp, fn, tp = mt.f1(output.tolist(), preds.tolist()) with open("%s/%s_results_deg2.csv"%(args.dir_, criteria), "a") as myfile: myfile.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n"%(args.dataset, micro, macro, fpr, tpr, threshold,roc_auc, acc_, prc_mac, prc_mic, prc_wei, tn, fp, fn, tp,criteria,percent, fold, degree)) print(args.dataset, micro, macro, fpr, tpr, threshold,roc_auc, acc_, prc_mac, prc_mic, prc_wei, tn, fp, fn, tp,criteria,percent, fold,degree)
def train_model(device, config, model, dataloaders, dataset_sizes, criterion, optimizer, scheduler, name, early_stopping, train_per_epoch): since = time.time() best_score = 0.0 history = {} for epoch in range(config['MAX_EPOCHS']): print('Epoch {}/{}'.format(epoch, config['MAX_EPOCHS'] - 1)) if config['FREEZE'] is not None: if config['FREEZE']['epochs'] == epoch: model = freeze_unfreeze(False, model, config['FREEZE']['layers_from_bottom'], config['FREEZE']['layers_from_top']) # For each epoch run training and validation phase for phase in ['train', 'val']: if phase == 'train': model.train() else: model.eval() running_loss = 0.0 running_corrects = 0 running_labels = [] running_preds = [] # Iterate over data. for i, (inputs, labels) in enumerate(dataloaders[phase]): inputs = [i.float().to(device) for i in inputs] labels = labels.long().to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): if '_aux' in config['head']: outputs, aux = model(*inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, labels) loss2 = criterion(aux, labels) loss_total = 0.6 * loss + 0.4 * loss2 elif '_daux' in config['head']: outputs, aux1, aux2 = model(*inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, labels) loss2 = criterion(aux1, labels) loss3 = criterion(aux2, labels) loss_total = loss + 0.8 * loss2 + 0.1 * loss2 else: outputs = model(*inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, labels) loss_total = loss # backward + optimize only if in training phase if phase == 'train': loss_total.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1) optimizer.step() if config['FREEZE'] is not None: if epoch + 1 == config['FREEZE']['epochs']: model = freeze_unfreeze( False, model, config['FREEZE']['layers_from_bottom'], config['FREEZE']['layers_from_top']) if config['UNFREEZE'] is not None: if epoch + 1 == config['UNFREEZE']['epochs']: model = freeze_unfreeze( True, model, config['UNFREEZE']['layers_from_bottom'], config['UNFREEZE']['layers_from_top']) if config['batch_lr_adjustment'] is not None: try: steps = config['batch_lr_adjustment'][ 'defined'][epoch + 1] except KeyError: steps = config['batch_lr_adjustment'][ 'steps_non_defined'] if i + 1 % steps == 0: print(epoch, "Reduce scheduler at step {}".format(i)) scheduler.step() else: print(i + 1 % steps == 0) # store measures for averaging running_loss += loss_total.item() * inputs[0].size(0) running_corrects += torch.sum(preds == labels.data) running_labels.append(labels.data.cpu().tolist()) running_preds.append(preds.cpu().tolist()) # output during epoch if phase == 'train': if '_aux' in name: print( "Progress {:2.1%} - L1: {:8.4} L2: {:8.4}".format( i / train_per_epoch, float(loss.item()), float(loss2.item())), end="\r") elif '_doubleaux' in name: print( "Progress {:2.1%} - L All: {:8.4} L2: {:8.4} L3: {:8.4}" .format(i / train_per_epoch, float(loss.item()), float(loss2.item()), float(loss3.item())), end="\r") else: print("Progress {:2.1%} - {:8.4}".format( i / train_per_epoch, float(loss.item())), end="\r") # if config['batch_lr_adjustment'] is None: print("Reduce scheduler at epoch {}".format(epoch)) scheduler.step() epoch_loss = running_loss / dataset_sizes[phase] epoch_acc = acc( [item for sublist in running_labels for item in sublist], [item for sublist in running_preds for item in sublist]) epoch_f1 = f1( [item for sublist in running_labels for item in sublist], [item for sublist in running_preds for item in sublist]) print('{} Loss: {:.4f} Acc: {:.4f} F1: {:.4f}'.format( phase, epoch_loss, epoch_acc, epoch_f1)) # store previous run if phase == 'train': history.setdefault('acc', []).append(epoch_acc) history.setdefault('f1', []).append(epoch_f1) history.setdefault('loss', []).append(round(epoch_loss, 4)) if phase == 'val': # and epoch_acc > best_acc history.setdefault(phase + '_acc', []).append(epoch_acc) history.setdefault(phase + '_f1', []).append(epoch_f1) history.setdefault(phase + '_loss', []).append(round(epoch_loss, 4)) # early_stopping # needs a measure to check if it has decresed, # and if so, make a checkpoint of the current model if config['TRACKING_MEASURE'] == 'val_acc': best_score = early_stopping(epoch_acc, model, epoch_acc) elif config['TRACKING_MEASURE'] == 'val_loss': best_score = early_stopping(epoch_acc, model, epoch_loss) else: print("Tracking measure {} not known".format( config['TRACKING_MEASURE'])) exit() if early_stopping.early_stop: print("Early stopping") model = early_stopping.restore(model) break time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best score: {:4f}'.format(best_score)) return model, history
def unet_model_fn(features, labels, mode, params): tf.local_variables_initializer() loss, train_op, = None, None eval_metric_ops, training_hooks, evaluation_hooks = None, None, None predictions_dict = None unet = Unet(params=params) logits = unet.model(input_tensor=features['image']) y_pred = tf.math.softmax(logits, axis=-1) output_img = tf.expand_dims(tf.cast(tf.math.argmax(y_pred, axis=-1) * 255, dtype=tf.uint8), axis=-1) if mode in (estimator.ModeKeys.TRAIN, estimator.ModeKeys.EVAL): with tf.name_scope('Loss_Calculation'): loss = Losses(logits=logits, labels=labels['label']) loss = loss.custom_loss() with tf.name_scope('Dice_Score_Calculation'): dice = f1(labels=labels['label'], predictions=y_pred) with tf.name_scope('Images_{}'.format(mode)): with tf.name_scope('Reformat_Outputs'): label = tf.expand_dims(tf.cast(tf.argmax(labels['label'], -1) * 255, dtype=tf.uint8), axis=-1) image = tf.math.divide(features['image'] - tf.reduce_max(features['image'], [0, 1, 2]), tf.reduce_max(features['image'], [0, 1, 2]) - tf.reduce_min(features['image'], [0, 1, 2])) summary.image('1_Medical_Image', image, max_outputs=1) summary.image('2_Output', output_img, max_outputs=1) summary.image('3_Output_pred', tf.expand_dims(y_pred[:, :, :, 1], -1), max_outputs=1) summary.image('4_Output_label', label, max_outputs=1) if mode == estimator.ModeKeys.TRAIN: with tf.name_scope('Learning_Rate'): global_step = tf.compat.v1.train.get_or_create_global_step() learning_rate = tf.compat.v1.train.exponential_decay(params['lr'], global_step=global_step, decay_steps=params['decay_steps'], decay_rate=params['decay_rate'], staircase=False) with tf.name_scope('Optimizer_conf'): train_op = Adam(learning_rate=learning_rate).minimize(loss=loss, global_step=global_step) with tf.name_scope('Metrics'): summary.scalar('Output_DSC', dice[1]) summary.scalar('Learning_Rate', learning_rate) if mode == estimator.ModeKeys.EVAL: eval_metric_ops = {'Metrics/Output_DSC': dice} eval_summary_hook = tf.estimator.SummarySaverHook(output_dir=params['eval_path'], summary_op=summary.merge_all(), save_steps=params['eval_steps']) evaluation_hooks = [eval_summary_hook] if mode == estimator.ModeKeys.PREDICT: predictions_dict = {'image': features['image'], 'y_preds': y_pred[:, :, :, 1], 'output_img': output_img, 'path': features['path']} return estimator.EstimatorSpec(mode, predictions=predictions_dict, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, training_hooks=training_hooks, evaluation_hooks=evaluation_hooks)
def get_f1(self): return metrics.f1(self.p_num, self.p_den, self.r_num, self.r_den)
def test_regression(model, test_features, test_labels): model.eval() return f1(model(test_features), test_labels)
def main(): parser = argparse.ArgumentParser() ## Required parameters parser.add_argument("--data_dir", default=None, type=str, # required=True, help="The input data dir. Should contain the .tsv files (or other data files) for the task.") parser.add_argument("--bert_model", default=None, type=str, # required=True, help="Bert pre-trained model selected in the list: bert-base-uncased, " "bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese.") parser.add_argument("--task_name", default=None, type=str, # required=True, help="The name of the task to train.") parser.add_argument("--output_dir", default=None, type=str, # required=True, help="The output directory where the model checkpoints will be written.") ## Other parameters parser.add_argument("--max_seq_length", default=128, type=int, help="The maximum total input sequence length after WordPiece tokenization. \n" "Sequences longer than this will be truncated, and sequences shorter \n" "than this will be padded.") parser.add_argument("--do_train", default=False, action='store_true', help="Whether to run training.") parser.add_argument("--do_eval", default=False, action='store_true', help="Whether to run eval on the dev set.") parser.add_argument("--do_lower_case", default=False, action='store_true', help="Set this flag if you are using an uncased model.") parser.add_argument("--train_batch_size", default=32, type=int, help="Total batch size for training.") parser.add_argument("--eval_batch_size", default=8, type=int, help="Total batch size for eval.") parser.add_argument("--learning_rate", default=3e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform.") parser.add_argument("--warmup_proportion", default=0.1, type=float, help="Proportion of training to perform linear learning rate warmup for. " "E.g., 0.1 = 10%% of training.") parser.add_argument("--no_cuda", default=False, action='store_true', help="Whether not to use CUDA when available") parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus") parser.add_argument('--seed', type=int, default=82, help="random seed for initialization") parser.add_argument('--gradient_accumulation_steps', type=int, default=1, help="Number of updates steps to accumulate before performing a backward/update pass.") parser.add_argument('--optimize_on_cpu', default=False, action='store_true', help="Whether to perform optimization and keep the optimizer averages on CPU") parser.add_argument('--fp16', default=False, action='store_true', help="Whether to use 16-bit float precision instead of 32-bit") parser.add_argument('--loss_scale', type=float, default=128, help='Loss scaling, positive power of 2 values can improve fp16 convergence.') args = parser.parse_args() # configuration # args.task_name = "RITC" # args.data_dir = "/mnt/disks/disk-01/ai-dev/jupyter/rit/rit/RITC_clean_unique/" # args.task_name = "SECB" # args.data_dir = "/home/francisco.tacoa/work/jupyter/flair/secb/201902/20190221/20190224210431/" args.task_name = "JAIT" args.data_dir = "/home/weicheng.zhu/experiment/pytorch-pretrained-BERT/glue_data/JAIT" args.do_train = True args.do_eval = True args.fp16 = False args.bert_model = "/home/weicheng.zhu/pretrained/multi_cased_L-12_H-768_A-12" # args.bert_model = "/home/weicheng.zhu/pretrained/japanese_sentencepiece_L-12_H-768_A-12-finetuned" args.max_seq_length = 128 args.train_batch_size = 32 args.learning_rate = 5e-5 args.num_train_epochs = 15.0 args.output_dir = "/tmp/jait_output_best/" if os.path.exists(args.output_dir) and args.do_train: import shutil shutil.rmtree(args.output_dir) processors = { "cola": ColaProcessor, "mnli": MnliProcessor, "mrpc": MrpcProcessor, "jait": JaitProcessor, "enit": EnitProcessor, "ritc": RitcProcessor, "secb": SecbProcessor, } # num_labels_task = { # "cola": 2, # "mnli": 3, # "mrpc": 2, # "jait": 30, # "enit": 67, # "ritc": 3, # } if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") n_gpu = torch.cuda.device_count() else: device = torch.device("cuda", args.local_rank) n_gpu = 1 # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.distributed.init_process_group(backend='nccl') if args.fp16: logger.info("16-bits training currently not supported in distributed training") args.fp16 = False # (see https://github.com/pytorch/pytorch/pull/13496) logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1)) if args.gradient_accumulation_steps < 1: raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( args.gradient_accumulation_steps)) args.train_batch_size = int(args.train_batch_size / args.gradient_accumulation_steps) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) if not args.do_train and not args.do_eval: raise ValueError("At least one of `do_train` or `do_eval` must be True.") if not (os.path.exists(args.output_dir) and os.listdir(args.output_dir)): # raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir)) os.makedirs(args.output_dir, exist_ok=True) task_name = args.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() # num_labels = num_labels_task[task_name] label_list = processor.get_labels() num_labels = len(label_list) model_file = os.path.join(args.bert_model, "wiki-ja.model") vocab_file = os.path.join(args.bert_model, "wiki-ja.vocab") if os.path.exists(model_file) and os.path.exists(vocab_file): tokenizer = tokenization.FullTokenizer( model_file=model_file, vocab_file=vocab_file, do_lower_case=args.do_lower_case) else: tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) train_examples = None num_train_optimization_steps = None if args.do_train: train_examples = processor.get_train_examples(args.data_dir) num_train_optimization_steps = int( len(train_examples) / args.train_batch_size / args.gradient_accumulation_steps) * args.num_train_epochs if args.local_rank != -1: num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size() # Prepare model model = BertForSequenceClassification.from_pretrained(args.bert_model, num_labels=num_labels) # model = BertForClassification.from_pretrained(args.bert_model, # num_labels=num_labels) # model.freeze_bert() # model = BertForMultiLabelSequenceClassification.from_pretrained(args.bert_model, # num_labels=num_labels) if args.fp16: model.half() model.to(device) if args.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank) elif n_gpu > 1: model = torch.nn.DataParallel(model) # Prepare optimizer param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] if args.fp16: try: from apex.optimizers import FP16_Optimizer from apex.optimizers import FusedAdam except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.") optimizer = FusedAdam(optimizer_grouped_parameters, lr=args.learning_rate, bias_correction=False, max_grad_norm=1.0) if args.loss_scale == 0: optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True) else: optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale) else: optimizer = BertAdam(optimizer_grouped_parameters, lr=args.learning_rate, warmup=args.warmup_proportion, t_total=num_train_optimization_steps) # summary_writer = SummaryWriter() global_step = 0 if args.do_train: train_features = convert_examples_to_features( train_examples, label_list, args.max_seq_length, tokenizer) logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_examples)) logger.info(" Batch size = %d", args.train_batch_size) logger.info(" Num steps = %d", num_train_optimization_steps) all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long) train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids) if args.local_rank == -1: train_sampler = RandomSampler(train_data) else: train_sampler = DistributedSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size) best_eval_accuracy = 0 for epoch in trange(int(args.num_train_epochs), desc="Epoch"): # if epoch == 0: # model.module.freeze_bert_encoder() # elif epoch == 1: # model.module.unfreeze_bert_encoder() model.train() tr_loss = 0 nb_tr_examples, nb_tr_steps = 0, 0 for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")): batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, label_ids = batch loss = model(input_ids, segment_ids, input_mask, label_ids) # if step == 0: # summary_writer.add_graph(model, (input_ids, segment_ids, input_mask, label_ids)) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: optimizer.backward(loss) else: loss.backward() tr_loss += loss.item() nb_tr_examples += input_ids.size(0) nb_tr_steps += 1 if (step + 1) % args.gradient_accumulation_steps == 0: if args.fp16: # modify learning rate with special warm up BERT uses # if args.fp16 is False, BertAdam is used that handles this automatically lr_this_step = args.learning_rate * warmup_linear(global_step / num_train_optimization_steps, args.warmup_proportion) for param_group in optimizer.param_groups: param_group['lr'] = lr_this_step optimizer.step() optimizer.zero_grad() global_step += 1 # summary_writer.add_scalar("training/loss", tr_loss) if args.do_eval and (args.local_rank == -1 or torch.distributed.get_rank() == 0): eval_examples = processor.get_dev_examples(args.data_dir) eval_features = convert_examples_to_features( eval_examples, label_list, args.max_seq_length, tokenizer) logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", len(eval_examples)) logger.info(" Batch size = %d", args.eval_batch_size) all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids) # Run prediction for full data eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size) model.eval() eval_loss, eval_accuracy, eval_precision, eval_recall, eval_f1 = 0, 0, 0, 0, 0 nb_eval_steps, nb_eval_examples = 0, 0 for input_ids, input_mask, segment_ids, label_ids in eval_dataloader: input_ids = input_ids.to(device) input_mask = input_mask.to(device) segment_ids = segment_ids.to(device) label_ids = label_ids.to(device) with torch.no_grad(): tmp_eval_loss = model(input_ids, segment_ids, input_mask, label_ids) logits = model(input_ids, segment_ids, input_mask) logits = logits.detach().cpu().numpy() label_ids = label_ids.to('cpu').numpy() tmp_eval_accuracy = accuracy(logits, label_ids) tmp_eval_precision = precision(logits, label_ids) tmp_eval_recall = recall(logits, label_ids) tmp_eval_f1 = f1(logits, label_ids) # logger.info(" %s = %s", "tmp_eval_accuracy", tmp_eval_accuracy) # logger.info(" %s: %s", "input_ids", input_ids.to('cpu').numpy()) # # logger.info(" %s: %s", "input_mask", input_mask.to('cpu').numpy()) # # logger.info(" %s: %s", "segment_ids", segment_ids.to('cpu').numpy()) # logger.info(" %s: %s", "logits", logits) # logger.info(" %s: %s", "label_ids", label_ids) eval_loss += tmp_eval_loss.mean().item() eval_accuracy += tmp_eval_accuracy * input_ids.size(0) eval_precision += tmp_eval_precision * input_ids.size(0) eval_recall += tmp_eval_recall * input_ids.size(0) eval_f1 += tmp_eval_f1 * input_ids.size(0) nb_eval_examples += input_ids.size(0) nb_eval_steps += 1 eval_loss = eval_loss / nb_eval_steps eval_accuracy = eval_accuracy / nb_eval_examples eval_precision = eval_precision / nb_eval_examples eval_recall = eval_recall / nb_eval_examples eval_f1 = eval_f1 / nb_eval_examples # summary_writer.add_scalar("validation/loss", eval_loss) # summary_writer.add_scalar("validation/accuracy", eval_accuracy) result = {'eval_loss': eval_loss, 'eval_accuracy': eval_accuracy, 'eval_precision': eval_precision, 'eval_recall': eval_recall, 'eval_f1': eval_f1, 'epoch': epoch, 'global_step': global_step, 'loss': tr_loss / nb_tr_steps} logger.info("***** Eval results *****") for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) if eval_accuracy > best_eval_accuracy: best_eval_accuracy = eval_accuracy output_eval_file = os.path.join(args.output_dir, "eval_results-best.txt") with open(output_eval_file, "w") as writer: for key in sorted(result.keys()): writer.write("%s = %s\n" % (key, str(result[key]))) output_model_file = os.path.join(args.output_dir, "model-export-best.hkl") torch.save(model, output_model_file)
def run_epoch(dataloader, should_train, should_teach_force, should_teach_force_answer, summaries_writer, experiment, experiment_context, epoch, quiet): losses = [] qa_losses = [] qa_targets = [] qa_predictions = [] supp_facts_losses = [] supp_facts_targets = [] supp_facts_predictions = [] entnet.train(mode=should_train) for batch in tqdm(dataloader) if not quiet else dataloader: story = batch["story"].to(device) query = batch["query"].to(device) qa_target = batch["answer"].to(device) supp_facts_target = batch["supporting"].float().to(device) story_mask = batch["story_mask"].float().to(device) qa_predicted, supp_facts_alignment, supp_facts_attention = entnet( story, story_mask, query, supporting_facts=supp_facts_target if should_teach_force else None, answers=qa_target if should_teach_force_answer else None) qa_loss = qa_criterion(qa_predicted, qa_target) supp_facts_loss = supp_facts_criterion(supp_facts_alignment, supp_facts_target) loss = args.qa_lambda * qa_loss + args.supporting_facts_lambda * supp_facts_loss if should_train: loss.backward() nn.utils.clip_grad_norm_(entnet.parameters(), args.gradient_clipping) optimizer.step() optimizer.zero_grad() losses.append(loss.item()) qa_losses.append(qa_loss.item()) qa_targets.append(qa_target.tolist()) qa_predictions.append(qa_predicted.argmax(dim=1).tolist()) supp_facts_losses.append(supp_facts_loss.item()) supp_facts_targets.append(supp_facts_target.tolist()) supp_facts_predictions.append(supp_facts_attention.tolist()) if should_train: translated_story, translated_query, translated_answer = train_dataset.translate_story( story[-1], query[-1], qa_target[-1]) print("\nSTORY:", translated_story) print("QUERY:", translated_query) print("ANSWER:", translated_answer) print("\nSupporting facts:", supp_facts_target[-1, :]) print("Attended:", supp_facts_attention[-1, :], "\n") mean_loss = np.mean(losses) mean_qa_loss = np.mean(qa_losses) mean_supp_facts_loss = np.mean(supp_facts_losses) mean_qa_accuracy = accuracy(qa_targets, qa_predictions) mean_supp_facts_f1 = f1(supp_facts_targets, supp_facts_predictions) # Escribir summaries write_summaries(mean_loss, mean_qa_loss, mean_supp_facts_loss, mean_qa_accuracy, mean_supp_facts_f1, supp_facts_targets, supp_facts_predictions, entnet.named_parameters(), summaries_writer, epoch) if experiment is not None: with experiment_context(): metrics = { "loss": mean_loss, "qa_loss": mean_qa_loss, "supp_facts_loss": mean_supp_facts_loss, "qa_accuracy": mean_qa_accuracy, "supp_facts_f1": mean_supp_facts_f1 } experiment.log_metrics(metrics, step=epoch) experiment.log_epoch_end(args.epochs, step=epoch) return mean_loss, mean_qa_loss, mean_supp_facts_loss, mean_qa_accuracy, mean_supp_facts_f1
X, X_test, _, y, y_test, _, relevant_features_ind, opt_accuracy_test = generate_dataset_exp1(n, n_features, n_relevant_features, n_test=n_test) # Initialise settings of regularisation parameter Cs = np.logspace(3, -3, 13) # Run through different settings of regularisation parameters for C in Cs: # Initialise and fit classifier LR_sparse = LogisticRegression(C=C, penalty='l1') LR_sparse.fit(X, y) # Calculate realtive sparsity level and number of non-zero weights coefs = LR_sparse.coef_.ravel() nonzero_coefs = np.nonzero(coefs)[0] print("F1 score: %f" % f1(relevant_features_ind, nonzero_coefs)) from datetime import datetime from csv import DictReader from math import exp, log, sqrt # TL; DR, the main training process starts on line: 250, # you may want to start reading the code from there ############################################################################## # parameters ################################################################# ##############################################################################
train_per_epoch = len(data_set) // BATCH_SIZE if args.partition in ['train', 'val']: for i, (inputs, _) in enumerate(dataloaders[args.partition]): print("Progress {:2.1%} ".format(i / train_per_epoch), end="\r") inputs = [i.float().to(device) for i in inputs] outputs = model(*inputs) _, y_pred = torch.max(outputs, 1) prediction.setdefault(args.partition, []).append(y_pred.cpu().tolist()) else: for i, (inputs) in enumerate(dataloaders[args.partition]): print("Progress {:2.1%} ".format(i / train_per_epoch), end="\r") inputs = [i.float().to(device) for i in inputs] outputs = model(*inputs) _, y_pred = torch.max(outputs, 1) prediction.setdefault(args.partition, []).append(y_pred.cpu().tolist()) if args.partition in ['train', 'val']: # show results _acc = acc(labels[args.partition], [item for sublist in prediction[args.partition] for item in sublist]) _f1 = f1(labels[args.partition], [item for sublist in prediction[args.partition] for item in sublist]) print('{} - acc: {}'.format(args.partition, _acc)) print('{} - f1: {}'.format(args.partition, _f1)) print('confusion matrix for {} exported.'.format(args.partition)) plot_confusion_matrix(labels[args.partition], [item for sublist in prediction[args.partition] for item in sublist] , normalize=True, ticklabels=[str(i) for i in range(1, 9)] , title='Confusion matrix {}'.format(args.partition), path=config['log_dir']) else: export_predictions(prediction[args.partition], test_filenames, data_set, name)
def classification_report(y_true, y_pred): print("Accuracy", metrics.accuracy(y_true, y_pred).item()) print("Recall", metrics.recall(y_true, y_pred).item()) print("Precision", metrics.precision(y_true, y_pred).item()) print("F1", metrics.f1(y_true, y_pred).item())