def append_results(pred, prob, test, aucs, cm, count, obj): obj.preds.append(pred) obj.probs.append(prob) obj.tests.append(test) obj.aucs.append(aucs) auroc_and_auprc_prob(prob, test, obj) obj.cm.append(cm) calculate_metrics(count, obj)
def evaluate1(test_predictions, class_labels): # Results of evaluation 1 evaluation_results = [] # Ground truth labels test_actual_labels = read_test_data(class_labels)['labels'] for i in range(len(class_labels)): # Test predictions predictions = test_predictions[:, 0].copy() predictions[predictions != i] = -1 predictions[predictions == i] = 1 predictions[predictions == -1] = 0 # Ground truth ground_truth = test_actual_labels.copy() ground_truth[ground_truth != i] = -1 ground_truth[ground_truth == i] = 1 ground_truth[ground_truth == -1] = 0 # Calculating confusion matrix metrics metrics = calculate_metrics(predictions=predictions, actuals=ground_truth) evaluation_results.append({'label': i, 'metrics': metrics}) return evaluation_results
def test_model(model): """ Calculates the metrics on the validation data """ no_examples = len(dh.val_images) no_batches = int(np.ceil(float(no_examples) / params.batch_size)) preds = np.empty((no_examples, dh.no_classes[args.dataset]), dtype=np.float32) labels = np.empty((no_examples, dh.no_classes[args.dataset]), dtype=np.float32) gen = dh.generator('val', aug=False, shuffle_batches=False) for ind_batch in range(no_batches): images_batch, labels_batch = gen.next() preds_batch = model.predict(images_batch, batch_size=params.batch_size) if ind_batch == no_batches - 1: preds[no_examples - params.batch_size:no_examples] = preds_batch labels[no_examples - params.batch_size:no_examples] = labels_batch else: preds[ind_batch * params.batch_size:(ind_batch + 1) * params.batch_size] = preds_batch labels[ind_batch * params.batch_size:(ind_batch + 1) * params.batch_size] = labels_batch return metrics.calculate_metrics(labels, preds)
def main(): # load arguments args = parse_args() # load config with open(args.config) as f: config = yaml.load(f, Loader=yaml.FullLoader) # define logging level and format level = logging.INFO if args.debug: level = logging.DEBUG logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=level) # load splits test_doc_ids = utils.read_split(config["split"]) logging.info(f"Number of test documents: {len(test_doc_ids)}") # load dataset dataset = utils.read_jsonl(config["dataset"], dict_key="id") # create word embeddings for scene labels if os.path.basename(config["scene_labels"]) == "places365_en.txt": language = "en" else: # places365_de.txt language = "de" logging.info('Generate word embedding for scene labels ...') scene_labels = read_scene_labels(config["scene_labels"]) scene_word_embeddings = get_scene_word_embeddings(scene_labels, fasttext_bin_folder=args.fasttext, language=language) # generate results for each document testset_similarities = {} with multiprocessing.Pool(args.threads) as p: pool_args = [(doc, test_doc_ids, scene_word_embeddings, config) for doc in dataset.values()] cnt_docs = 0 for document_result in p.imap(calculate_results, pool_args): if document_result is None: continue cnt_docs += 1 if cnt_docs % 100 == 0: logging.info(f"{cnt_docs} / {len(test_doc_ids)} documents processed ...") for key, val in document_result.items(): if key not in testset_similarities: testset_similarities[key] = [] testset_similarities[key].append(val) results = metrics.calculate_metrics(testset_similarities) metrics.print_results(results) return 0
def predict(self): validation_data = self.validation_data data, true_labels = validation_data[0], validation_data[1] predictions = self.model.predict(data, batch_size=self.config.batch_size) accuracy, micro_precision, micro_recall, micro_f1 = calculate_metrics(predictions, true_labels) print(f'val_f1: {micro_f1}, acc: {accuracy}') self.metrics.append(f'{micro_f1}, {accuracy}') return micro_f1, accuracy
def predict(self): validation_data = self.validation_data data = [validation_data[0], validation_data[1]] true_labels = validation_data[2] predictions = self.model.predict(data, batch_size=self.config.batch_size) output = '' accuracy, micro_precision, micro_recall, micro_f1 = calculate_metrics(predictions, true_labels) output += f'{micro_f1}, {accuracy}, ' print(f'val_f1: {micro_f1}, acc: {accuracy}') regularized_predictions = others_class_regularizer(predictions, self.config.others_class_regularizer_param) accuracy, micro_precision, micro_recall, micro_f1 = calculate_metrics(regularized_predictions, true_labels) output += f'{micro_f1}, {accuracy}' print(f'val_f1 after regularization: {micro_f1}, acc after regularization: {accuracy}') self.metrics.append(output) return micro_f1, accuracy
def main(): # load arguments args = parse_args() # load config with open(args.config) as f: config = yaml.load(f, Loader=yaml.FullLoader) # define logging level and format level = logging.INFO if args.debug: level = logging.DEBUG logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=level) # load splits test_doc_ids = utils.read_split(config["split"]) logging.info(f"Number of test documents: {len(test_doc_ids)}") # load dataset dataset = utils.read_jsonl(config["dataset"], dict_key="id") # generate results for each document testset_similarities = {} with multiprocessing.Pool(args.threads) as p: pool_args = [(doc, test_doc_ids, config) for doc in dataset.values()] cnt_docs = 0 for document_result in p.imap(calculate_results, pool_args): if document_result is None: continue cnt_docs += 1 if cnt_docs % 100 == 0: logging.info( f"{cnt_docs} / {len(test_doc_ids)} documents processed ..." ) for key, val in document_result.items(): if key not in testset_similarities: testset_similarities[key] = [] testset_similarities[key].append(val) results = metrics.calculate_metrics(testset_similarities) metrics.print_results(results) return 0
def train_nn(X_train, Y_train, X_test, Y_test): model = Sequential() model.add(Dense(128, activation='relu', input_dim=X_train.shape[1])) model.add(Dropout(0.5)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(32, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(2, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(X_train, Y_train, epochs=200, batch_size=128, verbose=1) y_prob = model.predict(X_test) y_classes = y_prob.argmax(axis=-1) y_true = Y_test[:, 1] acc = calculate_metrics(y_true, y_classes) return acc
def test_all_users(classifier_all_user, xfeatures_test, y_test, yforg_test, global_threshold): genuinePreds = [] randomPreds = [] skilledPreds = [] users = np.unique(y_test) for user in users: model = classifier_all_user[user] # Test the performance for the user without replicates skilled_forgeries = xfeatures_test[(y_test == user) & (yforg_test == 1)] test_genuine = xfeatures_test[(y_test == user) & (yforg_test == 0)] random_forgeries = xfeatures_test[(y_test != user) & (yforg_test == 0)] genuinePredUser = model.decision_function(test_genuine) skilledPredUser = model.decision_function(skilled_forgeries) randomPredUser = model.decision_function(random_forgeries) genuinePreds.append(genuinePredUser) skilledPreds.append(skilledPredUser) randomPreds.append(randomPredUser) # Calculate al metrics (EER, FAR, FRR and AUC) decision threshold at 0 (global_threshold) all_metrics = metrics.calculate_metrics(global_threshold, genuinePreds, randomPreds, skilledPreds) results = { 'all_metrics': all_metrics, 'predictions': { 'genuinePreds': genuinePreds, 'randomPreds': randomPreds, 'skilledPreds': skilledPreds } } print(all_metrics['EER'], all_metrics['EER_userthresholds']) return results
def validate(file_name): with open(file_name, "r", encoding='utf-8') as f: json_data = f.read() data = json.loads(json_data) bleu_2scores = 0 bleu_4scores = 0 nist_2scores = 0 nist_4scores = 0 meteor_scores = 0 sentences = [] for d in tqdm(data): reference = list(d['reference']) predict = list(d['predict']) temp_bleu_2, \ temp_bleu_4, \ temp_nist_2, \ temp_nist_4, \ temp_meteor_scores = calculate_metrics(predict, reference) bleu_2scores += temp_bleu_2 bleu_4scores += temp_bleu_4 nist_2scores += temp_nist_2 nist_4scores += temp_nist_4 meteor_scores += temp_meteor_scores sentences.append(" ".join(predict)) entro, dist = cal_entropy(sentences) mean_len, var_len = cal_length(sentences) num = len(sentences) print(f'avg: {mean_len}, var: {var_len}') print(f'entro: {entro}') print(f'dist: {dist}') print(f'bleu_2scores: {bleu_2scores / num}') print(f'bleu_4scores: {bleu_4scores / num}') print(f'nist_2scores: {nist_2scores / num}') print(f'nist_4scores: {nist_4scores / num}') print(f'meteor_scores: {meteor_scores / num}')
import tensorflow as tf from tensorflow.contrib.keras.api.keras.losses import mean_absolute_error, mean_absolute_percentage_error from metrics import calculate_metrics a = tf.constant([[4.0, 4.0, 4.0], [3.0, 3.0, 3.0], [1.0, 1.0, 1.0]]) b = tf.constant([[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [2.0, 2.0, 2.0]]) with tf.Session() as sess: # print(sess.run(tf.reshape(a, [-1]))) #mae = mean_absolute_error(tf.reshape(a, [-1]), tf.reshape(b, [-1])) mmae = tf.reduce_mean(tf.abs(tf.reshape(a, [-1]) - tf.reshape(b, [-1]))) print(sess.run(mmae)) mae, mape, rmse = calculate_metrics(tf.reshape(a, [-1]), tf.reshape(b, [-1])) print(sess.run(mae)) print(sess.run(mape)) print(sess.run(rmse))
from metrics import calculate_metrics, plot_metric if __name__ == '__main__': # parse args parser = argparse.ArgumentParser() parser.add_argument('results_path', action='store', help='path to file containing results in csv format') parser.add_argument('--scalable', action='store_true', help='switch to scalable metrics') parser.add_argument('--output', action='store', help='path to output plot file') args = parser.parse_args() # load results results = load_results(args.results_path, grouping_col='npoints') results = sorted(results.iteritems()) # calculate metrics results = calculate_metrics(results, scalable=args.scalable) # plot metrics plt.figure(figsize=(22, 16), dpi=80) metrics_type = 'scalable' if args.scalable else 'non-scalable' plot_title = 'Monte Carlo approximation of PI number - %s metrics of parallel computation' % metrics_type plt.suptitle(plot_title, size=24) plot_opts = {'label': lambda npoints: '%s points' % npoints} legend_opts = {'loc': 'upper center', 'ncol': 3} plt.subplot(2, 2, 1) plot_metric('ncores', 'speedup', results, metric_arg_name='Number of cores', metric_val_name='Speedup',
def check_average_precision(verification_df, weights, k_range): df = apply_weights(verification_df, weights) return calculate_metrics(df, k_range)
def nn_train(model_config, train_data, dev_data, ner_cate2id_dict, classifier_cate2id_dict): if os.path.exists(model_config.save_dir): print("SaveVariable dir has exist") else: os.mkdir(model_config.save_dir) print("Making SaveVariable dir") train_features, train_labels = train_data dev_features, dev_labels = dev_data train_nums = len(train_features) num_batch = int((train_nums - 1) / model_config.batch_size) + 1 # 总批数 with tf.Graph().as_default(): gpu_options = tf.GPUOptions(visible_device_list=model_config.gpu_id, allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) # initializer = tf.contrib.layers.xavier_initializer() with sess.as_default(): myModel = globals()[model_config.model_name](model_config) global_step = tf.Variable(0, name="global_step", trainable=False) learning_rate = tf.train.exponential_decay(model_config.lr, global_step, num_batch, 0.98, True) optimizer = tf.train.RMSPropOptimizer(learning_rate) # 定义优化器 train_op = optimizer.minimize(myModel.loss, global_step=global_step) print("initializing...") print("=" * 40) print("Nums of TrainData:", train_nums) print("Batch Size:", model_config.batch_size) print("Steps per epoch:", train_nums // model_config.batch_size) print("=" * 40) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=50) def train_action(batch_xs, batch_ys): entity_pad, token_pad, real_chars_list, seq_lens_list, ner_vec_list, classifier_vec_list = convert_data( batch_xs, batch_ys) feed_dict = { myModel.entity_pad: entity_pad, myModel.token_pad: token_pad, myModel.real_chars_list: real_chars_list, myModel.seq_lens_list: seq_lens_list, myModel.ner_vec_list: ner_vec_list, myModel.classifier_vec_list: classifier_vec_list, myModel.keep_prob: 0.5, } _, step_loss = sess.run((train_op, myModel.loss), feed_dict=feed_dict) return step_loss epoch_counter = 0 first_flag = True min_loss, max_F1 = float("inf"), 0 while True: epoch_counter += 1 train_batch = batch_iter(train_features, train_labels, batch_size=model_config.batch_size) for batch_xs, batch_ys in train_batch: # 一个循环作为一个epoch step_loss = train_action(batch_xs, batch_ys) current_step = tf.train.global_step( sess, global_step) # step + 1 if current_step % 50 == 0: # 每50步显示当前batch的训练集上的实验结果 print("steps: %-7d\tloss:%2.4f" % (current_step, step_loss)) if current_step % model_config.dev_per_steps == 0: print("=" * 40) print("evaluating in dev...") dev_batch = batch_iter(dev_features, dev_labels, is_random=False, batch_size=20) loss_sum = 0 total_nums = 0 ner_pred_list, classifier_pred_list = [], [] ner_label_list, classifier_label_list = [], [] mask_list = [] for one_batch in dev_batch: batch_xs, batch_ys = one_batch current_batch = len(batch_xs) entity_pad, token_pad, real_chars_list, seq_lens_list, ner_vec_list, classifier_vec_list = convert_data( batch_xs, batch_ys) feed_dict = { myModel.entity_pad: entity_pad, myModel.token_pad: token_pad, myModel.real_chars_list: real_chars_list, myModel.seq_lens_list: seq_lens_list, myModel.ner_vec_list: ner_vec_list, myModel.classifier_vec_list: classifier_vec_list, myModel.keep_prob: 1, } batch_loss, batch_ner_pred, batch_ner_label, batch_classifier_pred, batch_classifier_label = sess.run( ( myModel.loss, myModel.decode_tags, myModel.ner_labels, myModel.classifier_predictions, myModel.classifier_labels, ), feed_dict=feed_dict, ) loss_sum += batch_loss * current_batch total_nums += current_batch ner_pred_list.append(batch_ner_pred) ner_label_list.append(batch_ner_label) classifier_pred_list.append(batch_classifier_pred) classifier_label_list.append( batch_classifier_label) mask_list.append(real_chars_list) ner_pred_list = np.concatenate(ner_pred_list, 0).tolist() ner_label_list = np.concatenate(ner_label_list, 0).tolist() mask_list = np.concatenate(mask_list, 0).tolist() classifier_pred_list = np.concatenate( classifier_pred_list, 0).tolist() classifier_label_list = np.concatenate( classifier_label_list, 0).tolist() prediction_list = merge_tag( ner_pred_list, classifier_pred_list, mask_list, ner_cate2id_dict, classifier_cate2id_dict, ) label_list = merge_tag( ner_label_list, classifier_label_list, mask_list, ner_cate2id_dict, classifier_cate2id_dict, ) loss_ = loss_sum / total_nums prediction_list_pro = metrics.process_labels( prediction_list, model_config.dev_path) label_list_pro = metrics.process_labels( label_list, model_config.dev_path) precision, recall, F1 = metrics.calculate_metrics( prediction_list_pro, label_list_pro) print("step:%-7d\tloss:%2.4f" % (current_step, loss_)) print("Precision:%2.4f Recall:%2.4f F1:%2.4f" % (precision, recall, F1)) print("After manual processed..") prediction_list_pro = metrics.further_process_labels( prediction_list_pro, model_config.dev_path) precision, recall, F1 = metrics.calculate_metrics( prediction_list_pro, label_list_pro) print("Precision:%2.4f Recall:%2.4f F1:%2.4f" % (precision, recall, F1)) if loss_ < min_loss or F1 > max_F1: # 保存模型 min_loss = min(loss_, min_loss) max_F1 = max(F1, max_F1) save_path = saver.save( sess, model_config.models_dir + str(current_step) + ".ckpt", ) print("save to path:", save_path) if first_flag: log_path = model_config.models_dir + "best_model_id.log" if os.path.exists(log_path): os.remove(log_path) fw = open(log_path, "w", encoding="utf8") first_flag = False else: fw = open( model_config.models_dir + "best_model_id.log", "a", encoding="utf8", ) fw.write( "step:%-7d\tloss:%2.4f\tPrecision:%2.4f Recall:%2.4f F1:%2.4f\n" % (current_step, loss_, precision, recall, F1)) fw.close() print("=" * 40)
else: if sys.argv[1] == 'naivebayes' or sys.argv[1] == 'knn': annotated_texts = read('blog-gender-dataset.xlsx') training_set_len = 0.7 * len(annotated_texts) training_set = [] test_set = [] for (text,gender) in annotated_texts: if 'M' in gender: gender = 'M' else: gender = 'F' if len(training_set) < training_set_len: training_set.append((preprocess(text), gender)) else: test_set.append((preprocess(text), gender)) if sys.argv[1] == 'naivebayes': classifier = NaiveBayesClassifier(training_set) else: classifier = KNNClassifier(training_set, 5) print(calculate_metrics(test_set, classifier)) else: print('Invalid classifier name. Choose from [naivebayes, knn]')
def nn_test(model_config, test_data, ner_cate2id_dict, classifier_cate2id_dict): id2cate_dict = {v: k for k, v in classifier_cate2id_dict.items()} print(id2cate_dict) with tf.Graph().as_default(): gpu_options = tf.GPUOptions(visible_device_list=model_config.gpu_id, allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) # initializer = tf.contrib.layers.xavier_initializer() myModel = globals()[model_config.model_name](model_config) print("initializing...") sess.run(tf.global_variables_initializer()) print("loading model by id:", model_config.model_id) saver = tf.train.Saver() saver.restore( sess, model_config.models_dir + str(model_config.model_id) + ".ckpt") test_features, test_labels = test_data # for val in test_labels: # print(val) test_batch = batch_iter(test_features, test_labels, is_random=False, batch_size=20) ner_pred_list, classifier_pred_list = [], [] ner_label_list, classifier_label_list = [], [] mask_list = [] for batch_xs, batch_ys in test_batch: entity_pad, token_pad, real_chars_list, seq_lens_list, ner_vec_list, classifier_vec_list = convert_data( batch_xs, batch_ys) feed_dict = { myModel.entity_pad: entity_pad, myModel.token_pad: token_pad, myModel.real_chars_list: real_chars_list, myModel.seq_lens_list: seq_lens_list, myModel.ner_vec_list: ner_vec_list, myModel.classifier_vec_list: classifier_vec_list, myModel.keep_prob: 1, } batch_ner_pred, batch_ner_label, batch_classifier_pred, batch_classifier_label = sess.run( ( myModel.decode_tags, myModel.ner_labels, myModel.classifier_predictions, myModel.classifier_labels, ), feed_dict=feed_dict, ) ner_pred_list.append(batch_ner_pred) ner_label_list.append(batch_ner_label) classifier_pred_list.append(batch_classifier_pred) classifier_label_list.append(batch_classifier_label) mask_list.append(real_chars_list) ner_pred_list = np.concatenate(ner_pred_list, 0).tolist() ner_label_list = np.concatenate(ner_label_list, 0).tolist() classifier_pred_list = np.concatenate(classifier_pred_list, 0).tolist() classifier_label_list = np.concatenate(classifier_label_list, 0).tolist() mask_list = np.concatenate(mask_list, 0).tolist() prediction_list = merge_tag( ner_pred_list, classifier_pred_list, mask_list, ner_cate2id_dict, classifier_cate2id_dict, ) label_list = merge_tag( ner_label_list, classifier_label_list, mask_list, ner_cate2id_dict, classifier_cate2id_dict, ) prediction_list_pro = metrics.process_labels(prediction_list, model_config.test_path) label_list_pro = metrics.process_labels(label_list, model_config.test_path) precision, recall, F1 = metrics.calculate_metrics( prediction_list_pro, label_list_pro) print("Precision:%2.4f Recall:%2.4f F1:%2.4f" % (precision, recall, F1)) print("After manual processed..") prediction_list_pro = metrics.further_process_labels( prediction_list_pro, model_config.test_path) precision, recall, F1 = metrics.calculate_metrics( prediction_list_pro, label_list_pro) print("Precision:%2.4f Recall:%2.4f F1:%2.4f" % (precision, recall, F1)) fw = open("test.tag", "w", encoding="utf8") for ssi in prediction_list: fw.write(str(ssi) + "\n") fw = open("label.tag", "w", encoding="utf8") for ssi in label_list: fw.write(str(ssi) + "\n")
# In[11]: # Decomposing highway pixel to superpixel decom_super_coh = decomposition_highway(centroids_highway, highway, highway_coh, coordinates_high, input_super) # Decomposing super pixel to pixel-level coh = decomposition_super(centroids_super, super_pixel, decom_super_coh, coordinates_super, input) # In[12]: # Calculating evaluation metrics mse, rmse, ssim = calculate_metrics(coh_3vg, coh) # In[13]: # Displaying the results plt = display_result(coh_3vg, coh) plt.savefig(os.path.join(path, "Experiment 1")) # ### Experiment 2. Differential Decomposition with level-specific weight # In[14]: # Decomposing highway pixel to superpixel decom_super_coh = decomposition_highway1(centroids_highway, highway, highway_coh, coordinates_high, input_super)
thrower=thrower, train=True, lr=(0.1 if epoch_idx < 100 else 0.001), ) cprint( "epoch_idx=", epoch_idx, "| grip_loss=", grip_loss, "| throw_loss=", throw_loss, ) gripper.save("/tmp/grip-and-throw/example_gripper") thrower.save("/tmp/grip-and-throw/example_thrower") gripper.load("/tmp/grip-and-throw/example_gripper") thrower.load("/tmp/grip-and-throw/example_thrower") eval_dataset = generate_data( gripper=gripper, aux_gripper=aux_gripper, thrower=thrower, image_transformer=image_transformer, seeds=[7], length=2, on_cuda=False, workdir_path="/tmp/grip-and-throw/example_eval_dataset", ) cprint(metrics.calculate_metrics(eval_dataset))