def external_entry_point(hypo_to_use, path_for_data_set=r"dataset\HC_Body_Temperature.txt", num_of_runs=8, length_of_round=100): (data_set) = data_prep.get_data_set_from_path( path_for_data_set) # read data set from path for i in range(1, num_of_runs + 1): list_of_acc_on_train = [] list_of_acc_on_test = [] for j in range(0, length_of_round): (shuffled_train_set, shuffled_test_set) = data_prep.shuffle_dataset( data_set) # shuffle data every iteration H_set_of_hypos = AdaBoost_Algo(shuffled_train_set, i, hypo_to_use) # get i best hypos train_acc = utils.calc_accuracy( shuffled_train_set, H_set_of_hypos, hypo_to_use) # calc train acc on this hypo group list_of_acc_on_train.append(train_acc) # collect to calc avg test_acc = utils.calc_accuracy(shuffled_test_set, H_set_of_hypos, hypo_to_use) list_of_acc_on_test.append(test_acc) print("avg TRAIN acc for round: ", i, " is: ", "%.3f" % utils.calc_avg(list_of_acc_on_train)) print("avg TEST acc for round: ", i, " is: ", "%.3f" % utils.calc_avg(list_of_acc_on_test))
def log_confusion_matrices(self, print_matrix = True, mod_name = ''): cm = generate_cm(self.best_model_preds) if print_matrix: print(cm) self.experiment.log_confusion_matrix(labels=self.train_loader.dataset.dataset.classes, matrix=cm.tolist(), title = mod_name + "Confusion matrix, individual clips", file_name= mod_name + "individual_clips.json") cm = confusion_matrix(*calc_accuracy(self.best_model_preds, method = 'sum_predictions', export_for_cm=True)) self.experiment.log_confusion_matrix(labels=self.train_loader.dataset.dataset.classes, matrix=cm, title = mod_name + "Confusion matrix, sum predictions", file_name= mod_name + "sum_predictions.json") cm = confusion_matrix(*calc_accuracy(self.best_model_preds, method = 'majority_vote', export_for_cm=True)) self.experiment.log_confusion_matrix(labels=self.train_loader.dataset.dataset.classes, matrix=cm, title = mod_name + "Confusion matrix, majority vote", file_name= mod_name + "majority_vote.json")
def train(n_epochs=50, lbcnn_depth=2, learning_rate=1e-2, momentum=0.9, weight_decay=1e-4, lr_scheduler_step=5): start = time.time() models_dir = os.path.dirname(MODEL_PATH) if not os.path.exists(models_dir): os.makedirs(models_dir) train_loader = get_mnist_loader(train=True) test_loader = get_mnist_loader(train=False) model = Lbcnn(depth=lbcnn_depth) use_cuda = torch.cuda.is_available() if use_cuda: model = model.cuda() best_accuracy = 0. criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(filter(lambda param: param.requires_grad, model.parameters()), lr=learning_rate, momentum=momentum, weight_decay=weight_decay, nesterov=True) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=lr_scheduler_step) for epoch in range(n_epochs): for batch_id, (inputs, labels) in enumerate( tqdm(train_loader, desc="Epoch {}/{}".format(epoch, n_epochs))): if use_cuda: inputs = inputs.cuda() labels = labels.cuda() optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() accuracy_train = calc_accuracy(model, loader=train_loader) accuracy_test = calc_accuracy(model, loader=test_loader) print("Epoch {} accuracy: train={:.3f}, test={:.3f}".format( epoch, accuracy_train, accuracy_test)) if accuracy_train > best_accuracy: best_accuracy = accuracy_train torch.save((lbcnn_depth, model.state_dict()), MODEL_PATH) scheduler.step(epoch=epoch) train_duration_sec = int(time.time() - start) print('Finished Training. Total training time: {} sec'.format( train_duration_sec))
def log_cv_confusion_matrices(self, mod_name = ''): cm_all_videos = np.empty((10,6,6)) cm_sum_predictions = np.empty((10,6,6)) cm_majority_vote = np.empty((10,6,6)) for i, preds in enumerate(self.best_folds_model_preds): cm_all_videos[i] = generate_cm(preds) cm_sum_predictions[i] = confusion_matrix(*calc_accuracy(preds, method = 'sum_predictions', export_for_cm=True)) cm_majority_vote[i] = confusion_matrix(*calc_accuracy(preds, method = 'majority_vote', export_for_cm=True)) cm = cm_all_videos.sum(axis=0) self.experiment.log_confusion_matrix(labels=self.train_loader.dataset.dataset.classes, matrix=cm, title = mod_name + "Confusion matrix, individual clips", file_name= mod_name + "individual_clips.json") cm = cm_sum_predictions.sum(axis=0) self.experiment.log_confusion_matrix(labels=self.train_loader.dataset.dataset.classes, matrix=cm, title = mod_name + "Confusion matrix, sum predictions", file_name= mod_name + "sum_predictions.json") cm = cm_majority_vote.sum(axis=0) self.experiment.log_confusion_matrix(labels=self.train_loader.dataset.dataset.classes, matrix=cm, title = mod_name + "Confusion matrix, majority vote", file_name= mod_name + "majority_vote.json")
def validate(data_loader, model): losses = AverageMeter() accuracy = AverageMeter() model.eval() with torch.no_grad(): tq = tqdm(data_loader, desc="Val progress: ") for idx, (input_seq, target, _) in enumerate(tq): input_seq = input_seq.to(cuda) target = target.to(cuda) B = input_seq.size(0) output, _ = model(input_seq) [_, N, D] = output.size() output = output.view(B * N, D) target = target.repeat(1, N).view(-1) loss = criterion(output, target) acc = calc_accuracy(output, target) losses.update(loss.item(), B) accuracy.update(acc.item(), B) tq.set_postfix({ 'loss': losses.avg, 'acc': accuracy.avg, }) print('Val - Loss {loss.avg:.4f}\t' 'Acc: {acc.avg:.4f} \t'.format(loss=losses, acc=accuracy)) return losses.avg, accuracy.avg
def train(): net.train() losses = [] accuracies = [] for image, question, answer in tqdm(train_dataloader, desc='train'): image, question, answer = image.cuda(), question.cuda(), answer.cuda() pred, _ = net(image, question) loss = criterion(pred, answer) optimizer.zero_grad() loss.backward() optimizer.step() tb.add_scalar('train_loss', loss) tb.iter() losses.append(loss.item()) accuracy = calc_accuracy(pred, answer) accuracies += [accuracy] * answer.size(0) return { 'loss': sum(losses) / len(losses), 'acc': sum(accuracies) / len(accuracies), }
def train(epoch, run, mod_name=''): total_train_loss = 0 total_train_correct = 0 incorrect_classifications_train = [] epoch_classifications_train = [] run.model.train() for batch_number, (images, labels, paths) in enumerate(run.train_loader): # for i, (image, label, path) in enumerate(zip(images, labels, paths)): # save_plot_clip_frames(image, label, path, added_info_to_path = epoch) if run.grayscale: images = torch.unsqueeze( images, 1).double() # added channel dimensions (grayscale) else: images = images.float().permute(0, 4, 1, 2, 3).float() labels = labels.long() if torch.cuda.is_available(): images, labels = images.cuda(), labels.cuda() run.optimizer.zero_grad( ) # Whenever pytorch calculates gradients it always adds it to whatever it has, so we need to reset it each batch. preds = run.model(images) # Pass Batch loss = run.criterion(preds, labels) # Calculate Loss total_train_loss += loss.item() loss.backward( ) # Calculate Gradients - the gradient is the direction we need to move towards the loss function minimum (LR will tell us how far to step) run.optimizer.step( ) # Update Weights - the optimizer is able to update the weights because we passed it the weights as an argument in line 4. num_correct = get_num_correct(preds, labels) total_train_correct += num_correct run.experiment.log_metric(mod_name + "Train batch accuracy", num_correct / len(labels) * 100, step=run.log_number_train) run.experiment.log_metric(mod_name + "Avg train batch loss", loss.item(), step=run.log_number_train) run.log_number_train += 1 # print('Train: Batch number:', batch_number, 'Num correct:', num_correct, 'Accuracy:', "{:.2%}".format(num_correct/len(labels)), 'Loss:', loss.item()) incorrect_classifications_train.append( get_mistakes(preds, labels, paths)) for prediction in zip(preds, labels, paths): epoch_classifications_train.append(prediction) epoch_accuracy = calc_accuracy(epoch_classifications_train) run.experiment.log_metric(mod_name + "Train epoch accuracy", epoch_accuracy, step=epoch) run.experiment.log_metric(mod_name + "Avg train epoch loss", total_train_loss / batch_number, step=epoch) print('\nTrain: Epoch:', epoch, 'num correct:', total_train_correct, 'Accuracy:', str(epoch_accuracy) + '%')
def evaluate_data(evaluation_type, test_labels, parameter_string, fold, all_probs, sess, init_opt, ys): logging.info('# {} evaluation'.format(evaluation_type)) test_probs = [] test_results = [] for _ in range(num_test_bathces): tmp_probs, tmp_results, tmp_labels = sess.run( [probs_test, pred_test, ys]) test_probs.extend(tmp_probs) test_results.extend(tmp_results) test_labels.extend(tmp_labels) accuracy = calc_accuracy(test_results, test_labels) logging.info('The {} accuracy of parameter {} fold {} is {}'.format( evaluation_type, parameter_string, fold, accuracy)) if evaluation_type == 'Test': if fold == 0: all_probs = np.array(test_probs) else: all_probs += np.array(test_probs) logging.info('Reset the test iteration') if fold != cfg.fold_num - 1: test_labels = [] if evaluation_type == 'Dev': sess.run(init_opt) else: pass return all_probs, test_labels, accuracy
def validate(data_loader, model): losses = AverageMeter() accuracy = AverageMeter() model.eval() with torch.no_grad(): for idx, (input_seq, target) in tqdm(enumerate(data_loader), total=len(data_loader)): input_seq = input_seq.to(cuda) target = target.to(cuda) B = input_seq.size(0) output, _ = model(input_seq) [_, N, D] = output.size() output = output.view(B * N, D) target = target.repeat(1, N).view(-1) loss = criterion(output, target) acc = calc_accuracy(output, target) losses.update(loss.item(), B) accuracy.update(acc.item(), B) print('Loss {loss.avg:.4f}\t' 'Acc: {acc.avg:.4f} \t'.format(loss=losses, acc=accuracy)) return losses.avg, accuracy.avg
def train(data_loader, model, optimizer, epoch): losses = AverageMeter() accuracy = AverageMeter() model.train() global iteration for idx, (input_seq, target) in enumerate(data_loader): tic = time.time() input_seq = input_seq.to(cuda) target = target.to(cuda) B = input_seq.size(0) output, _ = model(input_seq) # visualize if (iteration == 0) or (iteration == args.print_freq): if B > 2: input_seq = input_seq[0:2,:] writer_train.add_image('input_seq', de_normalize(vutils.make_grid( input_seq.transpose(2,3).contiguous().view(-1,3,args.img_dim,args.img_dim), nrow=args.num_seq*args.seq_len)), iteration) del input_seq [_, N, D] = output.size() output = output.view(B*N, D) target = target.repeat(1, N).view(-1) loss = criterion(output, target) acc = calc_accuracy(output, target) del target losses.update(loss.item(), B) accuracy.update(acc.item(), B) optimizer.zero_grad() loss.backward() optimizer.step() if idx % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.local_avg:.4f})\t' 'Acc: {acc.val:.4f} ({acc.local_avg:.4f}) T:{3:.2f}\t'.format( epoch, idx, len(data_loader), time.time()-tic, loss=losses, acc=accuracy)) total_weight = 0.0 decay_weight = 0.0 for m in model.parameters(): if m.requires_grad: decay_weight += m.norm(2).data total_weight += m.norm(2).data print('Decay weight / Total weight: %.3f/%.3f' % (decay_weight, total_weight)) writer_train.add_scalar('local/loss', losses.val, iteration) writer_train.add_scalar('local/accuracy', accuracy.val, iteration) iteration += 1 return losses.local_avg, accuracy.local_avg
def main(argv=None): all_data = pickle.load(open(FLAGS.CUB_data + 'cub_image_dict.pkl', 'r')) attention_dict = pickle.load( open(FLAGS.CUB_data + 'cub_attention_dict.pkl', 'r')) knowledge_dict = pickle.load( open(FLAGS.CUB_data + 'cub_knowledge_dict.pkl', 'r')) train_files, train_labels, test_files, test_labels = load_data() test_f = [] test_l = [] test_fine_f = [] test_fine_l = [] for i in xrange(len(test_labels)): if 'layer_fine' in all_data[test_labels[i]].keys(): # continue test_f.append(test_files[i]) test_l.append(test_labels[i]) test_fine_f.append(test_files[i] + 'f') test_fine_l.append(test_labels[i]) else: test_f.append(test_files[i]) test_l.append(test_labels[i]) train_data = Dataset(train_files, train_labels, 64, attention_dict, knowledge_dict, all_data) test_data = Dataset(test_f, test_l, 64, attention_dict, knowledge_dict, all_data, is_train=False) test_data_fine = Dataset(test_fine_f, test_fine_l, 64, attention_dict, knowledge_dict, all_data, is_train=False) the_model = Model(lr=0.002) fine_predict = the_model.classifier.predict( input_fn=test_data_fine.input_fn) coarse_predict = the_model.classifier.predict(input_fn=test_data.input_fn) accuracy = utils.calc_accuracy(test_l, fine_predict, coarse_predict, all_data) print(accuracy) #the_model.classifier.evaluate(input_fn=test_data.input_fn) ''' for i in xrange(FLAGS.epoch): the_model.classifier.evaluate(input_fn=test_data_fine.input_fn) fine_predict = the_model.classifier.predict(input_fn=test_data_fine.input_fn) coarse_predict = the_model.classifier.predict(input_fn=test_data.input_fn) the_model.classifier.train(input_fn=train_data.input_fn, hooks=the_model.train_hooks, steps=200) the_model.classifier.evaluate(input_fn=test_data.input_fn) ''' '''
def test(model=None): if model is None: assert os.path.exists(MODEL_PATH), "Train a model first" lbcnn_depth, state_dict = torch.load(MODEL_PATH) model = Lbcnn(depth=lbcnn_depth) model.load_state_dict(state_dict) loader = get_mnist_loader(train=False) accuracy = calc_accuracy(model, loader=loader, verbose=True) print("MNIST test accuracy: {:.3f}".format(accuracy))
def reduced_rule2(rules, training_corpus, predictions, accuracy): reduced_rules = [] for temprule in rules.keys(): if (rules[temprule] > 500): copypredictions = copy.deepcopy(predictions) singlerule = [] singlerule.append(temprule) new_predictions = apply_rule2(copypredictions, singlerule) newaccuracy = utils.calc_accuracy(training_corpus, new_predictions) if (newaccuracy > accuracy): reduced_rules.append(temprule) return reduced_rules
def reduced_rule1(rules, train_sents, predict_sents, accuracy): reduced_rules = [] for temprule in rules.keys(): if (rules[temprule] > 500): copypredict_sents = copy.deepcopy(predict_sents) singlerule = [] singlerule.append(temprule) newPrediction = apply_rule1(copypredict_sents, singlerule) newaccuracy = utils.calc_accuracy(train_sents, newPrediction) if (newaccuracy > accuracy): reduced_rules.append(temprule) return reduced_rules
def val_(dataloader): net.eval() losses = [] accuracies = [] with torch.no_grad(): for image, question, answer in tqdm(dataloader, desc='val'): image, question, answer = image.cuda(), question.cuda(), answer.cuda() pred, _ = net(image, question) loss = criterion(pred, answer) losses.append(loss.item()) accuracy = calc_accuracy(pred, answer) accuracies += [accuracy] * answer.size(0) return sum(losses) / len(losses), sum(accuracies) / len(accuracies)
def test_run(data_type, lower_and_remove_punctuation, remove_stop_words, distance_method): """ Performs a test run, according to the given parameters :param data_type: Defines how to store the sentences, expects: 'boolean' / 'tf' / 'tfidf' :param lower_and_remove_punctuation: bool, if true turns all words to lower case and removes punctuation :param remove_stop_words: bool, if true removes all stop words :param distance_method: defines how to calculate distance, expects: 'euclidean' / 'cosine' :return: accuracy, the accuracy of the test run """ file_name = "./dataset/amazon_cells_labelled_full.txt" train_file_name = "./dataset/amazon_cells_labelled_train.txt" test_file_name = "./dataset/amazon_cells_labelled_test.txt" data = FileReader(file_name, lower_and_remove_punctuation, remove_stop_words) train_set, _ = data.build_set(data_type, train_file_name) test_set, _ = data.build_set(data_type, test_file_name) classifier = RocchioClassifier(train_set) accuracy = calc_accuracy(test_set, classifier, distance_method) return accuracy
def test_solver(): num_test_cases = 50 # Generate Test cases generate_test_cases(100, 450, 3, num_test_cases) # Obtain labels from online solver obtain_labels(num_test_cases) row_solution = [] time_taken_total = 0 # Loop through test cases for i in range(1, num_test_cases + 1): file = os.path.join(base_dir, f"test_case_{i}.cnf") _, _, cnf = read_data(file) start = time.perf_counter() # Run our solver sat, _ = CDCL(cnf, single_UIP, branching_heuristic) time_taken = time.perf_counter() - start time_taken_total += time_taken # Store solution row_solution.append([i, sat, time_taken]) print(time_taken) # Save solution with open(os.path.join(base_dir, 'predictions.csv'), 'w', newline='') as csvfile: spamwriter = csv.writer(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) for row in row_solution: spamwriter.writerow(row) # Print metrics accuracy, average_time_slower = calc_accuracy(num_test_cases) print(f"Accuracy = {accuracy * 100}%") print(f"Average Time Taken: {time_taken_total / num_test_cases}") print(f"Slower by {average_time_slower}s on average")
def calc_distinct(results_dict, config): tf.reset_default_graph() print('Calculating metrics for: Distinct') # ============= Metrics Folder - Distinct ============= output_dir = os.path.join(config['log_dir'], config['name'], 'test', 'metrics', 'distinct') logs_dir = os.path.join(output_dir, 'logs') if not os.path.exists(logs_dir): os.makedirs(logs_dir) # ============= Experiment Parameters ============= BATCH_SIZE = config['metrics_batch_size'] EPOCHS = config['metrics_epochs'] TEST_RATIO = config['metrics_test_ratio'] NUM_BINS = config['num_bins'] if 'k_dim' in config.keys(): N_KNOBS = config['k_dim'] elif 'w_dim' in config.keys(): N_KNOBS = config['w_dim'] else: print('Number of knobs not specified. Returning...') return {} TARGET_CLASS = config['target_class'] if N_KNOBS <= 1: print('This model has only one dimension. Distinctness metrics are not applicable.') return {} channels = config['num_channel'] input_size = config['input_size'] dataset = config['dataset'] # ============= Data ============= data = _EMPTY_ARR labels = _EMPTY_ARR source_len = len(results_dict['real_imgs']) for dim in range(N_KNOBS): for bin_i in range(NUM_BINS): data_dim_bin = np.append(results_dict['real_imgs'], results_dict['fake_t_imgs'][:, dim, bin_i], axis=-1) # dimension dim has been switched switched_dim = np.ones(source_len, dtype=int)*dim # unless the real probability and fake target probability are the same, # in which no dimension has been switched fixed_indices = (np.around(results_dict['real_ps'][:, dim, bin_i, TARGET_CLASS], decimals=2) == results_dict['fake_target_ps'][:, dim, bin_i]) labels_dim_bin = np.eye(N_KNOBS)[switched_dim] labels_dim_bin[fixed_indices] = 0 data = safe_append(data, data_dim_bin) labels = safe_append(labels, labels_dim_bin) data_len = len(data) data_inds = np.array(range(data_len)) np.random.shuffle(data_inds) train_inds = data_inds[int(data_len * TEST_RATIO):] test_inds = data_inds[:int(data_len * TEST_RATIO)] print('The size of the training set: ', train_inds.shape[0]) print('The size of the testing set: ', test_inds.shape[0]) # ============= placeholder ============= with tf.name_scope('input'): x_ = tf.placeholder(tf.float32, [None, input_size, input_size, channels*2], name='x-input') y = tf.placeholder(tf.float32, [None, N_KNOBS], name='y-input') isTrain = tf.placeholder(tf.bool) # ============= Model ============= logit, prediction = classifier_distinct_64(x_, num_dims=N_KNOBS, isTrain=isTrain) classif_loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=y, logits=logit) acc = calc_accuracy(prediction=prediction, labels=y) loss = tf.losses.get_total_loss() # ============= Optimization functions ============= train_step = tf.train.AdamOptimizer(0.0001).minimize(loss) # ============= summary ============= cls_loss = tf.summary.scalar('distinct/cls_loss', classif_loss) total_loss = tf.summary.scalar('distinct/loss', loss) cls_acc = tf.summary.scalar('distinct/acc', acc) summary_tf = tf.summary.merge([cls_loss, total_loss, cls_acc]) # ============= Variables ============= # Note that this list of variables only include the weights and biases in the model. lst_vars = [] for v in tf.global_variables(): lst_vars.append(v) # ============= Session ============= sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(var_list=lst_vars) writer = tf.summary.FileWriter(output_dir + '/train', sess.graph) writer_test = tf.summary.FileWriter(output_dir + '/test', sess.graph) # ============= Training ============= train_loss = [] itr_train = 0 for epoch in range(EPOCHS): total_loss = 0.0 np.random.shuffle(train_inds) num_batch = math.ceil(train_inds.shape[0] / BATCH_SIZE) for i in range(0, num_batch): start = i * BATCH_SIZE xs = data[train_inds[start:start + BATCH_SIZE]] ys = labels[train_inds[start:start + BATCH_SIZE]] [_, _loss, summary_str] = sess.run([train_step, loss, summary_tf], feed_dict={x_: xs, isTrain: True, y: ys}) writer.add_summary(summary_str, itr_train) itr_train += 1 total_loss += _loss total_loss /= num_batch print("Epoch: " + str(epoch) + " loss: " + str(total_loss) + '\n') train_loss.append(total_loss) checkpoint_name = os.path.join(output_dir, 'cp_epoch_{}.ckpt'.format(epoch)) saver.save(sess, checkpoint_name) np.save(os.path.join(output_dir, 'logs', 'train_loss.npy'), np.asarray(train_loss)) # ============= Testing ============= test_preds = _EMPTY_ARR test_loss = [] itr_test = 0 total_test_loss = 0.0 num_batch = math.ceil(test_inds.shape[0] / BATCH_SIZE) for i in range(0, num_batch): start = i * BATCH_SIZE xs = data[test_inds[start:start + BATCH_SIZE]] ys = labels[test_inds[start:start + BATCH_SIZE]] [_loss, summary_str, _pred] = sess.run([loss, summary_tf, prediction], feed_dict={x_: xs, isTrain: False, y: ys}) writer_test.add_summary(summary_str, itr_test) itr_test += 1 total_test_loss += _loss test_preds = safe_append(test_preds, _pred, axis=0) total_test_loss /= num_batch print("Epoch: " + str(epoch) + " Test loss: " + str(total_loss) + '\n') test_loss.append(total_test_loss) np.save(os.path.join(output_dir, 'logs', 'test_loss.npy'), np.asarray(test_loss)) np.save(os.path.join(output_dir, 'logs', 'test_preds.npy'), np.asarray(test_preds)) np.save(os.path.join(output_dir, 'logs', 'test_ys.npy'), np.asarray(labels[test_inds])) np.save(os.path.join(output_dir, 'logs', 'test_xs.npy'), np.asarray(data[test_inds])) accuracy, precision_per_dim, recall_per_dim = calc_metrics_arr(np.round(test_preds), labels[test_inds], average=None) _, precision_micro, recall_micro = calc_metrics_arr(np.round(test_preds), labels[test_inds], average='micro') _, precision_macro, recall_macro = calc_metrics_arr(np.round(test_preds), labels[test_inds], average='macro') print('Distinct - accuracy: {:.3f}, ' 'precision: per dim: {}, micro: {:.3f}, macro: {:.3f}, ' 'recall: per dim: {}, micro: {:.3f}, macro: {:.3f}'.format( accuracy, precision_per_dim, precision_micro, precision_macro, recall_per_dim, recall_micro, recall_macro)) metrics_dict = {} for metric in ['accuracy', 'precision_per_dim', 'precision_micro', 'precision_macro', 'recall_per_dim', 'recall_micro', 'recall_macro']: metrics_dict.update({'distinct_{}'.format(metric): [eval(metric)]}) print('Metrics successfully calculated: Distinct') return metrics_dict
def calc_realistic(results_dict, config): tf.reset_default_graph() print('Calculating metrics for: Realistic') # ============= Metrics Folder - Realistic ============= output_dir = os.path.join(config['log_dir'], config['name'], 'test', 'metrics', 'realistic') logs_dir = os.path.join(output_dir, 'logs') if not os.path.exists(logs_dir): os.makedirs(logs_dir) # ============= Experiment Parameters ============= BATCH_SIZE = config['metrics_batch_size'] EPOCHS = config['metrics_epochs'] TEST_RATIO = config['metrics_test_ratio'] channels = config['num_channel'] input_size = config['input_size'] NUM_BINS = config['num_bins'] if 'k_dim' in config.keys(): N_KNOBS = config['k_dim'] elif 'w_dim' in config.keys(): N_KNOBS = config['w_dim'] else: print('Number of knobs not specified. Returning...') return {} # ============= Data ============= half_len = len(results_dict['real_imgs']) data_real = results_dict['real_imgs'] fake_inds = np.arange(half_len) fake_knob = np.random.randint(low=0, high=N_KNOBS, size=half_len) # fake_bin = np.random.randint(low=0, high=NUM_BINS, size=half_len) fake_bin = np.random.randint(low=0, high=2, size=half_len) fake_bin = fake_bin * (NUM_BINS-1) data_fake = results_dict['fake_t_imgs'][fake_inds, fake_knob, fake_bin] data = np.append(data_real, data_fake, axis=0) labels = np.append(np.ones(half_len), np.zeros(half_len), axis=0) data_len = len(data) data_inds = np.array(range(data_len)) np.random.shuffle(data_inds) train_inds = data_inds[int(data_len * TEST_RATIO):] test_inds = data_inds[:int(data_len * TEST_RATIO)] print('The size of the training set: ', train_inds.shape[0]) print('The size of the testing set: ', test_inds.shape[0]) # ============= placeholder ============= with tf.name_scope('input'): x_ = tf.placeholder(tf.float32, [None, input_size, input_size, channels], name='x-input') y_ = tf.placeholder(tf.int64, [None], name='y-input') isTrain = tf.placeholder(tf.bool) # ============= Model ============= y = tf.one_hot(y_, 2, on_value=1.0, off_value=0.0, axis=-1) logit, prediction = classifier_realistic_64(x_, n_label=2, isTrain=isTrain) classif_loss = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=logit) acc = calc_accuracy(prediction=prediction, labels=y) loss = tf.losses.get_total_loss() # ============= Optimization functions ============= train_step = tf.train.AdamOptimizer(0.0001).minimize(loss) # ============= summary ============= cls_loss = tf.summary.scalar('realistic/cls_loss', classif_loss) total_loss = tf.summary.scalar('realistic/loss', loss) cls_acc = tf.summary.scalar('realistic/acc', acc) summary_tf = tf.summary.merge([cls_loss, total_loss, cls_acc]) # ============= Variables ============= # Note that this list of variables only include the weights and biases in the model. lst_vars = [] for v in tf.global_variables(): lst_vars.append(v) # ============= Session ============= sess = tf.Session() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(var_list=lst_vars) writer = tf.summary.FileWriter(output_dir + '/train', sess.graph) writer_test = tf.summary.FileWriter(output_dir + '/test', sess.graph) # ============= Training ============= train_loss = [] itr_train = 0 for epoch in range(EPOCHS): total_loss = 0.0 np.random.shuffle(train_inds) num_batch = math.ceil(train_inds.shape[0] / BATCH_SIZE) for i in range(0, num_batch): start = i * BATCH_SIZE xs = data[train_inds[start:start + BATCH_SIZE]] ys = labels[train_inds[start:start + BATCH_SIZE]] [_, _loss, summary_str] = sess.run([train_step, loss, summary_tf], feed_dict={x_: xs, isTrain: True, y_: ys}) writer.add_summary(summary_str, itr_train) itr_train += 1 total_loss += _loss total_loss /= num_batch print("Epoch: " + str(epoch) + " loss: " + str(total_loss) + '\n') train_loss.append(total_loss) checkpoint_name = os.path.join(output_dir, 'cp_epoch_{}.ckpt'.format(epoch)) saver.save(sess, checkpoint_name) np.save(os.path.join(output_dir, 'logs', 'train_loss.npy'), np.asarray(train_loss)) # ============= Testing ============= test_preds = _EMPTY_ARR test_loss = [] itr_test = 0 total_test_loss = 0.0 num_batch = math.ceil(test_inds.shape[0] / BATCH_SIZE) for i in range(0, num_batch): start = i * BATCH_SIZE xs = data[test_inds[start:start + BATCH_SIZE]] ys = labels[test_inds[start:start + BATCH_SIZE]] [_loss, summary_str, _pred] = sess.run([loss, summary_tf, prediction], feed_dict={x_: xs, isTrain: False, y_: ys}) writer_test.add_summary(summary_str, itr_test) itr_test += 1 total_test_loss += _loss test_preds = safe_append(test_preds, _pred, axis=0) total_test_loss /= num_batch print("Epoch: " + str(epoch) + " Test loss: " + str(total_loss) + '\n') test_loss.append(total_test_loss) np.save(os.path.join(output_dir, 'logs', 'test_loss.npy'), np.asarray(test_loss)) np.save(os.path.join(output_dir, 'logs', 'test_preds.npy'), np.asarray(test_preds)) np.save(os.path.join(output_dir, 'logs', 'test_ys.npy'), np.asarray(labels[test_inds])) np.save(os.path.join(output_dir, 'logs', 'test_xs.npy'), np.asarray(data[test_inds])) accuracy, precision, recall = calc_metrics_arr(np.argmax(test_preds, axis=1), labels[test_inds]) print('Realistic - accuracy: {:.3f}, precision: {:.3f}, recall: {:.3f}'.format(accuracy, precision, recall)) metrics_dict = {} for metric in ['accuracy', 'precision', 'recall']: metrics_dict.update({'realistic_{}'.format(metric): [eval(metric)]}) print('Metrics successfully calculated: Realistic') return metrics_dict
def evaluate(epoch, run, mod_name=''): incorrect_classifications_val = [] total_val_loss = 0 total_val_correct = 0 best_val_acc = 0 epoch_classifications_val = [] run.model.eval() with torch.no_grad(): for batch_number, (images, labels, paths) in enumerate(run.val_loader): if run.grayscale: images = torch.unsqueeze( images, 1).double() # added channel dimensions (grayscale) else: images = images.float().permute(0, 4, 1, 2, 3).float() labels = labels.long() if torch.cuda.is_available(): images, labels = images.cuda(), labels.cuda() preds = run.model(images) # Pass Batch loss = run.criterion(preds, labels) # Calculate Loss total_val_loss += loss.item() num_correct = get_num_correct(preds, labels) total_val_correct += num_correct run.experiment.log_metric(mod_name + "Val batch accuracy", num_correct / len(labels) * 100, step=run.log_number_val) run.experiment.log_metric(mod_name + "Avg val batch loss", loss.item(), step=run.log_number_val) run.log_number_val += 1 # print('Val: Batch number:', batch_number, 'Num correct:', num_correct, 'Accuracy:', "{:.2%}".format(num_correct / len(labels)), 'Loss:', loss.item()) # print_mistakes(preds, labels, paths) incorrect_classifications_val.append( get_mistakes(preds, labels, paths)) for prediction in zip(preds, labels, paths): epoch_classifications_val.append(prediction) epoch_accuracy = calc_accuracy(epoch_classifications_val) run.experiment.log_metric(mod_name + "Val epoch accuracy", epoch_accuracy, step=epoch) run.experiment.log_metric(mod_name + "Avg val epoch loss", total_val_loss / batch_number, step=epoch) print('Val Epoch:', epoch, 'num correct:', total_val_correct, 'Accuracy:', str(epoch_accuracy) + '%') is_best = (epoch_accuracy > run.best_val_acc) | ( (epoch_accuracy >= run.best_val_acc) & (total_val_loss / batch_number < run.best_val_loss)) if is_best: print("Best run so far! updating params...") run.best_val_acc = epoch_accuracy run.best_val_loss = total_val_loss / batch_number run.best_model_preds = epoch_classifications_val run.best_model_mistakes = incorrect_classifications_val save_checkpoint( { 'epoch': epoch + 1, 'state_dict': run.model.state_dict(), 'best_acc1': run.best_val_acc, 'optimizer': run.optimizer.state_dict(), }, is_best) # Step lr_scheduler run.lr_scheduler.step()
X, Y = get_classification_data(sd=10, m=50) adaboost = AdaBoost(n_models=20) adaboost.fit(X, Y) print("This is the final prediction:", adaboost.final_prediction(X)) print("This is the original labels:", Y) print(adaboost.final_prediction(X) == Y) print("Shape:", adaboost.final_prediction(X).shape) print("type:", type(adaboost.final_prediction(X))) visualise_predictions(adaboost.final_prediction, X) print(f'accuracy: {calc_accuracy(adaboost.final_prediction(X), Y)}') show_data(X, Y) print("Evaluate for a point: ", adaboost.final_prediction(np.array([[1, 1]]))) # %% import sklearn.ensemble adaBoost = sklearn.ensemble.AdaBoostClassifier() adaBoost.fit(X, Y) predictions = adaBoost.predict(X) calc_accuracy(predictions, Y) #visualise_predictions(adaBoost.predict, X,Y) #show_data(X, Y) print("Adaboosts sklearn predictions:", predictions) print(predictions.shape) print(type(predictions)) print(f'accuracy: {calc_accuracy(predictions, Y)}') # %%
logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.CRITICAL) rules = [] training_file = args[0] training_sents = utils.read_tokens(training_file) test_file = args[1] test_sents = utils.read_tokens(test_file) model = create_model(training_sents) sents = utils.read_tokens(training_file) predictions = predict_tags(sents, model) accuracy = utils.calc_accuracy(training_sents, predictions) print "Accuracy in training before rules applied [%s sentences]: %s" % ( len(sents), accuracy) rules = template1(training_sents, predictions) reduced_rules = reduced_rule1(rules, training_sents, predictions, accuracy) new_predictions = apply_rule1(predictions, reduced_rules) accuracy = utils.calc_accuracy(training_sents, new_predictions) print "Accuracy in training after rule1 applied [%s sentences]: %s" % ( len(sents), accuracy) test_sents1 = utils.read_tokens(test_file)
def main(): X_train = np.loadtxt('../datasets/gisette/gisette_train_data.txt') y_train = np.loadtxt('../datasets/gisette/gisette_train_labels.txt', dtype=int) X_test = np.loadtxt('../datasets/gisette/gisette_valid_data.txt') y_test = np.loadtxt('../datasets/gisette/gisette_valid_labels.txt', dtype=int) y_train = trans(y_train) y_test = trans(y_test) print(X_train.shape) print(y_train.shape) print(X_test.shape) print(y_test.shape) X_train, y_train = X_train[:200], y_train[:200] X_test, y_test = X_test[:100], y_test[:100] n_train = X_train.shape[0] n0_list = [1, 4, 16, 64] M_list = [1, 4, 16, 64, 256] r = 1 accuracy = -1 n0_best = -1 M_best = -1 n_folds = 3 # requirement: 10 classes = 2 # train: # 10-fold CV # n0 \in {1,4,16,64} # M \in {1,4,16,64,256} # get the best pair of (n, M) print('------------------now begin--------------------------') for n0 in n0_list: for M in M_list: cur_accuracy_list = [] skf = StratifiedKFold(n_splits=n_folds) cv = [(t, v) for (t, v) in skf.split(range(n_train), y_train)] for k in range(n_folds): train_idx, val_idx = cv[k] comp_rf = CompRF(n0, M, r, "Classification", classes) time_start = time.time() y_predict = comp_rf.train_then_predict(X_train[train_idx], y_train[train_idx], X_train[val_idx]) time_end = time.time() cur_accuracy = calc_accuracy(y_train[val_idx], y_predict) cur_accuracy_list.append(cur_accuracy) print("(n0={0}, M={1}, fold={3}): {2:.2f}% [time={4:.2f}]". format(n0, M, cur_accuracy * 100, k, time_end - time_start)) cur_accuracy = sum(cur_accuracy_list) / len(cur_accuracy_list) print("(n0={0}, M={1}, average): {2:.2f}%".format( n0, M, cur_accuracy * 100)) if cur_accuracy > accuracy: accuracy = cur_accuracy n0_best = n0 M_best = M print("best_accuracy={0}%, best_n_0={1}, best_M={2}".format( accuracy * 100, n0_best, M_best)) # test: comp_rf = CompRF(n0_best, M_best, r) y_predict = comp_rf.train_then_predict(X_train, y_train, X_test) accuracy = calc_accuracy(y_test, y_predict) print("accuracy: ", accuracy)
def train(self,dataset_path,num_classes,batch_size,lr_base,lr_decay,step_size,\ max_iteration,pretrained_model=None): ''' @description: 构建VGG-Net16网络结构,训练网络模型,输出训练过程中的logs,保存网络模型 @params: - dataset_path: 训练样本集和验证样本集对应的txt文件所在的路径 - num_classes: 分类数目 - batch_size: 训练过程中的每次输入网络中的样本数 - lr_base: 初始学习率 - lr_decay: 学习率衰减系数 - step_size: 学习率衰减速度 lr = lr_base * lr_decay ^ (global_step / step_size) - max_iteration: 迭代的最大次数 - pretrained_model: 预训练的模型所在的路径 @return: None ''' train_file_name = dataset_path + 'train_list.txt' valid_file_name = dataset_path + 'valid_list.txt' log_dir = './log/vgg' model_dir = './model/vgg' vgg = VGG(weight_decay=0.0005, keep_prob=0.5, num_classes=num_classes) train_summary_list = [] valid_summary_list = [] with tf.Graph().as_default(), tf.device('/gpu:0'): with tf.name_scope('input'): #队列读取训练数据 train_image,train_label = get_batch(train_file_name,self._image_H,\ self._image_W,batch_size) valid_image,valid_label = get_batch(valid_file_name,self._image_H,\ self._image_W,250,is_train=False) x = tf.placeholder(tf.float32,[None,self._image_H,self._image_W,\ self._image_channels],name='x') y = tf.placeholder(tf.int64, [None], name='y') #loss, accuracy, train_op logits, _ = vgg.vgg16(x) loss = utils.calc_loss(logits, y) accuracy = utils.calc_accuracy(logits, y) train_op, learning_rate, global_step = utils.optimizer( lr_base, step_size, lr_decay, loss) #summary train_summary_list.append(tf.summary.scalar('train_loss', loss)) valid_summary_list.append(tf.summary.scalar('valid_loss', loss)) train_summary_list.append( tf.summary.scalar('train_accuracy', accuracy)) valid_summary_list.append( tf.summary.scalar('test_accuracy', accuracy)) train_summary_list.append( tf.summary.scalar('learning rate', learning_rate)) valid_summary_list.append( tf.summary.scalar('learning rate', learning_rate)) for var in tf.trainable_variables(): valid_summary_list.append(tf.summary.histogram(var.name, var)) train_summary = tf.summary.merge(train_summary_list) valid_summary = tf.summary.merge(valid_summary_list) #session saver = tf.train.Saver(max_to_keep=50) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,\ log_device_placement=True)) as sess: train_writer = tf.summary.FileWriter(log_dir + 'train', sess.graph) test_writer = tf.summary.FileWriter(log_dir + 'valid') tf.global_variables_initializer().run() tf.local_variables_initializer().run() #启动多线程 coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) #加载预训练的模型 if pretrained_model != None: ckpt = tf.train.get_checkpoint_state(pretrained_model) print('Restoring pretrained model: %s' % ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) train_time = 0 for step in range(max_iteration): #模型持久化操作 # graph_def = tf.get_default_graph().as_graph_def() # output_graph_def = graph_util.convert_variables_to_constants(sess,graph_def,['input/x','deepid/Relu']) # with tf.gfile.GFile(model_dir+'deepid_model.pb','wb') as file: # file.write(output_graph_def.SerializeToString()) # break start_time = time.time() image, label = sess.run([train_image, train_label]) _, train_loss, summary_str, train_step = sess.run( [train_op, loss, train_summary, global_step], feed_dict={ x: image, y: label }) train_writer.add_summary(summary_str, global_step=train_step) train_writer.flush() duration = time.time() - start_time train_time += duration #valid and save model if step % 1000 == 0 or (step + 1) == max_iteration: image, label = sess.run([valid_image, valid_label]) lr,summary_str,valid_loss,validation_accuracy,\ train_step = sess.run([learning_rate, valid_summary, loss, accuracy, global_step], feed_dict={x:image,y:label}) test_writer.add_summary(summary_str, global_step=train_step) test_writer.flush() print('Step %d: train loss = %.3f, valid loss = %.3f,valid accuracy = %.3f%%, lr = %.6f (%.3f sec)'%\ (train_step,train_loss,valid_loss,validation_accuracy,\ lr,train_time)) saver.save(sess, model_dir + 'model.ckpt', global_step=train_step) with open(log_dir + 'valid_result.txt', 'at') as file_writer: file_writer.write('%d\t%.3f%%\t%.5f\t%d\r\n' % (train_step, validation_accuracy, lr, train_time)) #退出多线程 coord.request_stop() coord.join(threads)
def train(data_loader, model, optimizer, epoch): losses = AverageMeter() accuracy = AverageMeter() model.train() global iteration tq = tqdm(data_loader, desc="Train progress: Ep {}".format(epoch)) for idx, (input_seq, target, _) in enumerate(tq): tic = time.time() input_seq = input_seq.to(cuda) target = target.to(cuda) B = input_seq.size(0) output, _ = model(input_seq) # visualize if (iteration == 0) or (iteration == args.print_freq): if B > 2: input_seq = input_seq[0:2, :] writer_train.add_image( 'input_seq', de_normalize( vutils.make_grid(input_seq[:, :3, ...].transpose( 2, 3).contiguous().view(-1, 3, args.img_dim, args.img_dim), nrow=args.num_seq * args.seq_len)), iteration) del input_seq [_, N, D] = output.size() output = output.view(B * N, D) target = target.repeat(1, N).view(-1) loss = criterion(output, target) acc = calc_accuracy(output, target) del target losses.update(loss.item(), B) accuracy.update(acc.item(), B) optimizer.zero_grad() loss.backward() optimizer.step() total_weight = 0.0 decay_weight = 0.0 for m in model.parameters(): if m.requires_grad: decay_weight += m.norm(2).data total_weight += m.norm(2).data tq_stats = { 'loss': losses.local_avg, 'acc': accuracy.local_avg, 'decay_wt': decay_weight.item(), 'total_wt': total_weight.item(), } tq.set_postfix(tq_stats) if idx % args.print_freq == 0: writer_train.add_scalar('local/loss', losses.val, iteration) writer_train.add_scalar('local/accuracy', accuracy.val, iteration) iteration += 1 return losses.local_avg, accuracy.local_avg
gt_labels = np.array(gt_labels) return logits, gt_labels if __name__ == '__main__': model = NaiveBayes() tokenizer = stemmedTokenizer model.create_dict(json_reader("col774_yelp_data/train.json"), tokenizer) model.train(json_reader("col774_yelp_data/train.json"), tokenizer) # outputs = model.predict(json_reader("col774_yelp_data/test.json"), tokenizer) # f = open("outputs_stemmed_test.pickle","wb") # pickle.dump(outputs, f) # f.close() logits, gt_labels = _load_object("outputs_stemmed_test.pickle") conf_matrix = create_confusion_matrix(logits, gt_labels) print(calc_accuracy(logits, gt_labels) * 100) print(conf_matrix) plot_confusion_matrix(conf_matrix, model.classes) probs = logits_to_prob_vector(logits) plot_roc_curve(logits, gt_labels)
logging.info("# dev evaluation") sess.run(dev_init_opt) dev_results = [] dev_labels = [] cnt=0 for _ in range(num_dev_batches): # cnt+=1 # print(cnt) tmp_pred,tmp_target = sess.run([pred_eval,ys]) dev_results.extend(tmp_pred) dev_labels.extend(tmp_target) # print('DEV3') # print(len(dev_results)) # print(len(dev_labels)) accuracy = calc_accuracy(dev_results,dev_labels) dev_history.append(accuracy) if accuracy > dev_best: dev_best = accuracy stop_times = 0 else: stop_times += 1 if stop_times > cfg.patience: logging.info('The model did not improve after{} times, you have got an excellent' +'enough model.') break logging.info('# The dev accuracy is:{}'.format(accuracy))
def train(data_loader, model, optimizer, epoch): losses = AverageMeter() model.train() global iteration dissimilarity_score_dict = {} target_dict = {} number_of_chunks_dict = {} for idx, (video_seq, audio_seq, target, audiopath) in enumerate(data_loader): tic = time.time() video_seq = video_seq.to(cuda) audio_seq = audio_seq.to(cuda) target = target.to(cuda) B = video_seq.size(0) vid_out = model.module.forward_lip(video_seq) aud_out = model.module.forward_aud(audio_seq) vid_class = model.module.final_classification_lip(vid_out) aud_class = model.module.final_classification_aud(aud_out) del video_seq del audio_seq loss1 = calc_loss(vid_out, aud_out, target, args.hyper_param) loss2 = criterion(vid_class, target.view(-1)) loss3 = criterion(aud_class, target.view(-1)) acc = calc_accuracy(vid_out, aud_out, target, args.threshold) loss = loss1 + loss2 + loss3 losses.update(loss.item(), B) optimizer.zero_grad() loss.backward() optimizer.step() for batch in range(B): vid_name = audiopath[batch].split('/')[-2] dist = torch.dist(vid_out[batch, :].view(-1), aud_out[batch, :].view(-1), 2) tar = target[batch, :].view(-1).item() if (dissimilarity_score_dict.get(vid_name)): dissimilarity_score_dict[vid_name] += dist number_of_chunks_dict[vid_name] += 1 else: dissimilarity_score_dict[vid_name] = dist number_of_chunks_dict[vid_name] = 1 if (target_dict.get(vid_name)): pass else: target_dict[vid_name] = tar if idx % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.local_avg:.4f})\t'.format( epoch, idx, len(data_loader), time.time() - tic, loss=losses)) total_weight = 0.0 decay_weight = 0.0 for m in model.parameters(): if m.requires_grad: decay_weight += m.norm(2).data total_weight += m.norm(2).data print('Decay weight / Total weight: %.3f/%.3f' % (decay_weight, total_weight)) writer_train.add_scalar('local/loss', losses.val, iteration) iteration += 1 avg_score_real, avg_score_fake = get_scores(dissimilarity_score_dict, number_of_chunks_dict, target_dict) return losses.local_avg, avg_score_real, avg_score_fake
action="store_true", help="turn on debug mode") (options, args) = parser.parse_args() if len(args) != 2: parser.error("Please provide required arguments") if options.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.CRITICAL) training_file = args[0] training_sents = utils.read_tokens(training_file) test_file = args[1] test_sents = utils.read_tokens(test_file) model = create_model(training_sents) ## read sentences again because predict_tags(...) rewrites the tags sents = utils.read_tokens(training_file) predictions = predict_tags(sents, model) accuracy = utils.calc_accuracy(training_sents, predictions) print "Accuracy in training [%s sentences]: %s" % (len(sents), accuracy) ## read sentences again because predict_tags(...) rewrites the tags sents = utils.read_tokens(test_file) predictions = predict_tags(sents, model) accuracy = utils.calc_accuracy(test_sents, predictions) print "Accuracy in training [%s sentences]: %s" % (len(sents), accuracy)
def eval_full_spectrograms(dataset, model_id, predictions_path, pred_threshold=0.5, overlap_threshold=0.1, smooth=True, in_seconds=False, use_call_bounds=False, min_call_length=10, visualize=False, hierarchical_model=True): """ After saving predictions for the test set of full spectrograms, we now want to calculate evaluation metrics on each of these spectrograms. First we load in the sigmoid (0, 1) predictions and use the defined threshold and smoothing flag to convert the predictions into binary 0/1 predcitions for each time slice. Then, we convert time slice predictions to full call (start, end) time predictions so that we can calculate elephant call specific evaluation metrics. Bellow we discuss the metrics that are calculated individually for each full spectrogram, as well as accross all the test spectrograms Metrics: - Call Prediction True Positives - Call Prediction False Positives - Call Recall True Positives - Call Recall False Negatives - F-score - Accuracy - Old Call Precision --- To be implemented - Old Call Recall --- To be implemented """ # Maps spectrogram ids to dictionary of results for each spect # Additionally includes a key "summary" that computes aggregated # statistics over the entire test set of spectrograms results = {} results['summary'] = {'true_pos': 0, 'false_pos': 0, 'true_pos_recall': 0, 'false_neg': 0, 'f_score': 0, 'accuracy': 0 } # Used to track the number of total calls for averaging # aggregated statistics num_preds = 0 num_gt = 0 for data in dataset: spectrogram = data[0] labels = data[1] gt_call_path = data[2] # Get the spec id tags = gt_call_path.split('/') tags = tags[-1].split('_') data_id = tags[0] + '_' + tags[1] print ("Generating Prediction for:", data_id) predictions = np.load(os.path.join(predictions_path, model_id, data_id + '.npy')) binary_preds, smoothed_predictions = get_binary_predictions(predictions, threshold=pred_threshold, smooth=smooth) # Process the predictions to get predicted elephant calls # Figure out better way to try different combinations of this # Note that processed_preds zeros out predictions that are not long # enough to be an elephant call predicted_calls, processed_preds = find_elephant_calls(binary_preds, min_call_length=min_call_length, in_seconds=in_seconds) print ("Num predicted calls", len(predicted_calls)) # Use the calls as defined in the orginal hand labeled file. # This looks to avoid issues of overlapping calls seeming like # single very large calls in the gt labeling if use_call_bounds: print ("Using CSV file with ground truth call start and end times") gt_calls = process_ground_truth(gt_call_path, in_seconds=in_seconds) else: print ("Using spectrogram labeling to generate GT calls") # We should never compute this in seconds # Also let us keep all the calls, i.e. set min_length = 0 gt_calls, _ = find_elephant_calls(labels, min_call_length=0) print ("Number of ground truth calls", len(gt_calls)) # Visualize the predictions around the gt calls if visualize: # This is not super important visual_full_recall(spectrogram, smoothed_predictions, labels, processed_preds) # Look at precision metrics # Call Prediction True Positives # Call Prediction False Positives true_pos, false_pos = call_prec_recall(predicted_calls, gt_calls, threshold=overlap_threshold, is_truth=False, spectrogram=spectrogram, preds=binary_preds, gt_labels=labels) # Look at recall metrics # Call Recall True Positives # Call Recall False Negatives true_pos_recall, false_neg = call_prec_recall(gt_calls, predicted_calls, threshold=overlap_threshold, is_truth=True) f_score = f1_score(labels, binary_preds) accuracy = calc_accuracy(binary_preds, labels) results[data_id] = {'true_pos': true_pos, 'false_pos': false_pos, 'true_pos_recall': true_pos_recall, 'false_neg': false_neg, 'f_score': f_score, 'predictions': smoothed_predictions, 'binary_preds': processed_preds, 'accuracy': accuracy } # If doing hierarchical modeling, save the model_0 predictions # specifically for visualization! if hierarchical_model: model_0_predictions = np.load(os.path.join(predictions_path, model_id, 'Model_0', data_id + '.npy')) _, model_0_smoothed_predictions = get_binary_predictions(model_0_predictions, threshold=pred_threshold, smooth=smooth) results[data_id]['model_0_predictions'] = model_0_smoothed_predictions # Update summary stats results['summary']['true_pos'] += len(true_pos) results['summary']['false_pos'] += len(false_pos) results['summary']['true_pos_recall'] += len(true_pos_recall) results['summary']['false_neg'] += len(false_neg) results['summary']['f_score'] += f_score results['summary']['accuracy'] += accuracy # Calculate averaged statistics results['summary']['f_score'] /= len(dataset) results['summary']['accuracy'] /= len(dataset) return results