Esempio n. 1
0
def external_entry_point(hypo_to_use,
                         path_for_data_set=r"dataset\HC_Body_Temperature.txt",
                         num_of_runs=8,
                         length_of_round=100):
    (data_set) = data_prep.get_data_set_from_path(
        path_for_data_set)  # read data set from path
    for i in range(1, num_of_runs + 1):
        list_of_acc_on_train = []
        list_of_acc_on_test = []
        for j in range(0, length_of_round):
            (shuffled_train_set,
             shuffled_test_set) = data_prep.shuffle_dataset(
                 data_set)  # shuffle data every iteration
            H_set_of_hypos = AdaBoost_Algo(shuffled_train_set, i,
                                           hypo_to_use)  # get i best hypos
            train_acc = utils.calc_accuracy(
                shuffled_train_set, H_set_of_hypos,
                hypo_to_use)  # calc train acc on this hypo group
            list_of_acc_on_train.append(train_acc)  # collect to calc avg
            test_acc = utils.calc_accuracy(shuffled_test_set, H_set_of_hypos,
                                           hypo_to_use)
            list_of_acc_on_test.append(test_acc)
        print("avg TRAIN acc for round: ", i, " is: ",
              "%.3f" % utils.calc_avg(list_of_acc_on_train))
        print("avg TEST acc for round: ", i, " is: ",
              "%.3f" % utils.calc_avg(list_of_acc_on_test))
 def log_confusion_matrices(self, print_matrix = True, mod_name = ''):
     cm = generate_cm(self.best_model_preds)
     if print_matrix:
         print(cm)
     self.experiment.log_confusion_matrix(labels=self.train_loader.dataset.dataset.classes, matrix=cm.tolist(), title = mod_name + "Confusion matrix, individual clips", file_name= mod_name + "individual_clips.json")
     cm = confusion_matrix(*calc_accuracy(self.best_model_preds, method = 'sum_predictions', export_for_cm=True))
     self.experiment.log_confusion_matrix(labels=self.train_loader.dataset.dataset.classes, matrix=cm, title = mod_name + "Confusion matrix, sum predictions", file_name= mod_name + "sum_predictions.json")
     cm = confusion_matrix(*calc_accuracy(self.best_model_preds, method = 'majority_vote', export_for_cm=True))
     self.experiment.log_confusion_matrix(labels=self.train_loader.dataset.dataset.classes, matrix=cm, title = mod_name + "Confusion matrix, majority vote", file_name= mod_name + "majority_vote.json")
Esempio n. 3
0
def train(n_epochs=50,
          lbcnn_depth=2,
          learning_rate=1e-2,
          momentum=0.9,
          weight_decay=1e-4,
          lr_scheduler_step=5):
    start = time.time()
    models_dir = os.path.dirname(MODEL_PATH)
    if not os.path.exists(models_dir):
        os.makedirs(models_dir)

    train_loader = get_mnist_loader(train=True)
    test_loader = get_mnist_loader(train=False)
    model = Lbcnn(depth=lbcnn_depth)
    use_cuda = torch.cuda.is_available()
    if use_cuda:
        model = model.cuda()
    best_accuracy = 0.
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(filter(lambda param: param.requires_grad,
                                 model.parameters()),
                          lr=learning_rate,
                          momentum=momentum,
                          weight_decay=weight_decay,
                          nesterov=True)

    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          step_size=lr_scheduler_step)

    for epoch in range(n_epochs):
        for batch_id, (inputs, labels) in enumerate(
                tqdm(train_loader, desc="Epoch {}/{}".format(epoch,
                                                             n_epochs))):
            if use_cuda:
                inputs = inputs.cuda()
                labels = labels.cuda()
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        accuracy_train = calc_accuracy(model, loader=train_loader)
        accuracy_test = calc_accuracy(model, loader=test_loader)
        print("Epoch {} accuracy: train={:.3f}, test={:.3f}".format(
            epoch, accuracy_train, accuracy_test))
        if accuracy_train > best_accuracy:
            best_accuracy = accuracy_train
            torch.save((lbcnn_depth, model.state_dict()), MODEL_PATH)
        scheduler.step(epoch=epoch)
    train_duration_sec = int(time.time() - start)
    print('Finished Training. Total training time: {} sec'.format(
        train_duration_sec))
    def log_cv_confusion_matrices(self, mod_name = ''):
        cm_all_videos = np.empty((10,6,6))
        cm_sum_predictions = np.empty((10,6,6))
        cm_majority_vote = np.empty((10,6,6))
        for i, preds in enumerate(self.best_folds_model_preds):
            cm_all_videos[i] = generate_cm(preds)
            cm_sum_predictions[i] = confusion_matrix(*calc_accuracy(preds, method = 'sum_predictions', export_for_cm=True))
            cm_majority_vote[i] = confusion_matrix(*calc_accuracy(preds, method = 'majority_vote', export_for_cm=True))

        cm = cm_all_videos.sum(axis=0)
        self.experiment.log_confusion_matrix(labels=self.train_loader.dataset.dataset.classes, matrix=cm, title = mod_name + "Confusion matrix, individual clips", file_name= mod_name + "individual_clips.json")
        cm = cm_sum_predictions.sum(axis=0)
        self.experiment.log_confusion_matrix(labels=self.train_loader.dataset.dataset.classes, matrix=cm, title = mod_name + "Confusion matrix, sum predictions", file_name= mod_name + "sum_predictions.json")
        cm = cm_majority_vote.sum(axis=0)
        self.experiment.log_confusion_matrix(labels=self.train_loader.dataset.dataset.classes, matrix=cm, title = mod_name + "Confusion matrix, majority vote", file_name= mod_name + "majority_vote.json")
Esempio n. 5
0
def validate(data_loader, model):
    losses = AverageMeter()
    accuracy = AverageMeter()
    model.eval()
    with torch.no_grad():
        tq = tqdm(data_loader, desc="Val progress: ")
        for idx, (input_seq, target, _) in enumerate(tq):
            input_seq = input_seq.to(cuda)
            target = target.to(cuda)
            B = input_seq.size(0)
            output, _ = model(input_seq)

            [_, N, D] = output.size()
            output = output.view(B * N, D)
            target = target.repeat(1, N).view(-1)

            loss = criterion(output, target)
            acc = calc_accuracy(output, target)

            losses.update(loss.item(), B)
            accuracy.update(acc.item(), B)

            tq.set_postfix({
                'loss': losses.avg,
                'acc': accuracy.avg,
            })

    print('Val - Loss {loss.avg:.4f}\t'
          'Acc: {acc.avg:.4f} \t'.format(loss=losses, acc=accuracy))
    return losses.avg, accuracy.avg
Esempio n. 6
0
def train():
    net.train()
    losses = []
    accuracies = []

    for image, question, answer in tqdm(train_dataloader, desc='train'):
        image, question, answer = image.cuda(), question.cuda(), answer.cuda()
        pred, _ = net(image, question)
        loss = criterion(pred, answer)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        tb.add_scalar('train_loss', loss)
        tb.iter()
        
        losses.append(loss.item())
        accuracy = calc_accuracy(pred, answer)
        accuracies += [accuracy] * answer.size(0)
    
    return {
        'loss': sum(losses) / len(losses),
        'acc': sum(accuracies) / len(accuracies),
    }
def train(epoch, run, mod_name=''):
    total_train_loss = 0
    total_train_correct = 0
    incorrect_classifications_train = []
    epoch_classifications_train = []
    run.model.train()
    for batch_number, (images, labels, paths) in enumerate(run.train_loader):

        # for i, (image, label, path) in enumerate(zip(images, labels, paths)):
        #     save_plot_clip_frames(image, label, path, added_info_to_path = epoch)

        if run.grayscale:
            images = torch.unsqueeze(
                images, 1).double()  # added channel dimensions (grayscale)
        else:
            images = images.float().permute(0, 4, 1, 2, 3).float()
        labels = labels.long()

        if torch.cuda.is_available():
            images, labels = images.cuda(), labels.cuda()

        run.optimizer.zero_grad(
        )  # Whenever pytorch calculates gradients it always adds it to whatever it has, so we need to reset it each batch.
        preds = run.model(images)  # Pass Batch

        loss = run.criterion(preds, labels)  # Calculate Loss
        total_train_loss += loss.item()
        loss.backward(
        )  # Calculate Gradients - the gradient is the direction we need to move towards the loss function minimum (LR will tell us how far to step)
        run.optimizer.step(
        )  # Update Weights - the optimizer is able to update the weights because we passed it the weights as an argument in line 4.

        num_correct = get_num_correct(preds, labels)
        total_train_correct += num_correct

        run.experiment.log_metric(mod_name + "Train batch accuracy",
                                  num_correct / len(labels) * 100,
                                  step=run.log_number_train)
        run.experiment.log_metric(mod_name + "Avg train batch loss",
                                  loss.item(),
                                  step=run.log_number_train)
        run.log_number_train += 1

        # print('Train: Batch number:', batch_number, 'Num correct:', num_correct, 'Accuracy:', "{:.2%}".format(num_correct/len(labels)), 'Loss:', loss.item())
        incorrect_classifications_train.append(
            get_mistakes(preds, labels, paths))
        for prediction in zip(preds, labels, paths):
            epoch_classifications_train.append(prediction)
    epoch_accuracy = calc_accuracy(epoch_classifications_train)

    run.experiment.log_metric(mod_name + "Train epoch accuracy",
                              epoch_accuracy,
                              step=epoch)
    run.experiment.log_metric(mod_name + "Avg train epoch loss",
                              total_train_loss / batch_number,
                              step=epoch)

    print('\nTrain: Epoch:', epoch, 'num correct:', total_train_correct,
          'Accuracy:',
          str(epoch_accuracy) + '%')
Esempio n. 8
0
def evaluate_data(evaluation_type, test_labels, parameter_string, fold,
                  all_probs, sess, init_opt, ys):
    logging.info('# {} evaluation'.format(evaluation_type))
    test_probs = []
    test_results = []
    for _ in range(num_test_bathces):
        tmp_probs, tmp_results, tmp_labels = sess.run(
            [probs_test, pred_test, ys])
        test_probs.extend(tmp_probs)
        test_results.extend(tmp_results)
        test_labels.extend(tmp_labels)

    accuracy = calc_accuracy(test_results, test_labels)

    logging.info('The {} accuracy of parameter {} fold {} is {}'.format(
        evaluation_type, parameter_string, fold, accuracy))

    if evaluation_type == 'Test':
        if fold == 0:
            all_probs = np.array(test_probs)
        else:
            all_probs += np.array(test_probs)

    logging.info('Reset the test iteration')
    if fold != cfg.fold_num - 1:
        test_labels = []

    if evaluation_type == 'Dev':
        sess.run(init_opt)
    else:
        pass
    return all_probs, test_labels, accuracy
Esempio n. 9
0
def validate(data_loader, model):
    losses = AverageMeter()
    accuracy = AverageMeter()
    model.eval()
    with torch.no_grad():
        for idx, (input_seq, target) in tqdm(enumerate(data_loader),
                                             total=len(data_loader)):
            input_seq = input_seq.to(cuda)
            target = target.to(cuda)
            B = input_seq.size(0)
            output, _ = model(input_seq)

            [_, N, D] = output.size()
            output = output.view(B * N, D)
            target = target.repeat(1, N).view(-1)

            loss = criterion(output, target)
            acc = calc_accuracy(output, target)

            losses.update(loss.item(), B)
            accuracy.update(acc.item(), B)

    print('Loss {loss.avg:.4f}\t'
          'Acc: {acc.avg:.4f} \t'.format(loss=losses, acc=accuracy))
    return losses.avg, accuracy.avg
Esempio n. 10
0
def train(data_loader, model, optimizer, epoch):
    losses = AverageMeter()
    accuracy = AverageMeter()
    model.train()
    global iteration

    for idx, (input_seq, target) in enumerate(data_loader):
        tic = time.time()
        input_seq = input_seq.to(cuda)
        target = target.to(cuda)
        B = input_seq.size(0)
        output, _ = model(input_seq)

        # visualize
        if (iteration == 0) or (iteration == args.print_freq):
            if B > 2: input_seq = input_seq[0:2,:]
            writer_train.add_image('input_seq', 
                                   de_normalize(vutils.make_grid(
                                       input_seq.transpose(2,3).contiguous().view(-1,3,args.img_dim,args.img_dim), 
                                       nrow=args.num_seq*args.seq_len)), 
                                   iteration)
        del input_seq

        [_, N, D] = output.size()
        output = output.view(B*N, D)
        target = target.repeat(1, N).view(-1)

        loss = criterion(output, target)
        acc = calc_accuracy(output, target)

        del target 

        losses.update(loss.item(), B)
        accuracy.update(acc.item(), B)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if idx % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Loss {loss.val:.4f} ({loss.local_avg:.4f})\t'
                  'Acc: {acc.val:.4f} ({acc.local_avg:.4f}) T:{3:.2f}\t'.format(
                   epoch, idx, len(data_loader), time.time()-tic,
                   loss=losses, acc=accuracy))

            total_weight = 0.0
            decay_weight = 0.0
            for m in model.parameters():
                if m.requires_grad: decay_weight += m.norm(2).data
                total_weight += m.norm(2).data
            print('Decay weight / Total weight: %.3f/%.3f' % (decay_weight, total_weight))
            
            writer_train.add_scalar('local/loss', losses.val, iteration)
            writer_train.add_scalar('local/accuracy', accuracy.val, iteration)

            iteration += 1

    return losses.local_avg, accuracy.local_avg
def main(argv=None):
    all_data = pickle.load(open(FLAGS.CUB_data + 'cub_image_dict.pkl', 'r'))
    attention_dict = pickle.load(
        open(FLAGS.CUB_data + 'cub_attention_dict.pkl', 'r'))
    knowledge_dict = pickle.load(
        open(FLAGS.CUB_data + 'cub_knowledge_dict.pkl', 'r'))
    train_files, train_labels, test_files, test_labels = load_data()

    test_f = []
    test_l = []
    test_fine_f = []
    test_fine_l = []
    for i in xrange(len(test_labels)):
        if 'layer_fine' in all_data[test_labels[i]].keys():
            #    continue
            test_f.append(test_files[i])
            test_l.append(test_labels[i])
            test_fine_f.append(test_files[i] + 'f')
            test_fine_l.append(test_labels[i])
        else:
            test_f.append(test_files[i])
            test_l.append(test_labels[i])

    train_data = Dataset(train_files, train_labels, 64, attention_dict,
                         knowledge_dict, all_data)
    test_data = Dataset(test_f,
                        test_l,
                        64,
                        attention_dict,
                        knowledge_dict,
                        all_data,
                        is_train=False)
    test_data_fine = Dataset(test_fine_f,
                             test_fine_l,
                             64,
                             attention_dict,
                             knowledge_dict,
                             all_data,
                             is_train=False)
    the_model = Model(lr=0.002)
    fine_predict = the_model.classifier.predict(
        input_fn=test_data_fine.input_fn)
    coarse_predict = the_model.classifier.predict(input_fn=test_data.input_fn)
    accuracy = utils.calc_accuracy(test_l, fine_predict, coarse_predict,
                                   all_data)
    print(accuracy)
    #the_model.classifier.evaluate(input_fn=test_data.input_fn)
    '''
    for i in xrange(FLAGS.epoch):
        the_model.classifier.evaluate(input_fn=test_data_fine.input_fn)
        fine_predict = the_model.classifier.predict(input_fn=test_data_fine.input_fn)
        coarse_predict = the_model.classifier.predict(input_fn=test_data.input_fn)
        the_model.classifier.train(input_fn=train_data.input_fn,
                                hooks=the_model.train_hooks,
                                steps=200)
        the_model.classifier.evaluate(input_fn=test_data.input_fn)
    '''
    '''
Esempio n. 12
0
def test(model=None):
    if model is None:
        assert os.path.exists(MODEL_PATH), "Train a model first"
        lbcnn_depth, state_dict = torch.load(MODEL_PATH)
        model = Lbcnn(depth=lbcnn_depth)
        model.load_state_dict(state_dict)
    loader = get_mnist_loader(train=False)
    accuracy = calc_accuracy(model, loader=loader, verbose=True)
    print("MNIST test accuracy: {:.3f}".format(accuracy))
Esempio n. 13
0
def reduced_rule2(rules, training_corpus, predictions, accuracy):
    reduced_rules = []

    for temprule in rules.keys():
        if (rules[temprule] > 500):
            copypredictions = copy.deepcopy(predictions)
            singlerule = []
            singlerule.append(temprule)
            new_predictions = apply_rule2(copypredictions, singlerule)
            newaccuracy = utils.calc_accuracy(training_corpus, new_predictions)

            if (newaccuracy > accuracy):
                reduced_rules.append(temprule)
    return reduced_rules
Esempio n. 14
0
def reduced_rule1(rules, train_sents, predict_sents, accuracy):
    reduced_rules = []

    for temprule in rules.keys():
        if (rules[temprule] > 500):
            copypredict_sents = copy.deepcopy(predict_sents)
            singlerule = []
            singlerule.append(temprule)
            newPrediction = apply_rule1(copypredict_sents, singlerule)
            newaccuracy = utils.calc_accuracy(train_sents, newPrediction)

            if (newaccuracy > accuracy):
                reduced_rules.append(temprule)

    return reduced_rules
Esempio n. 15
0
def val_(dataloader):
    net.eval()
    losses = []
    accuracies = []
    
    with torch.no_grad():
        for image, question, answer in tqdm(dataloader, desc='val'):
            image, question, answer = image.cuda(), question.cuda(), answer.cuda()
            pred, _ = net(image, question)
            loss = criterion(pred, answer)        
            losses.append(loss.item())
            accuracy = calc_accuracy(pred, answer)
            accuracies += [accuracy] * answer.size(0)
    
    return sum(losses) / len(losses), sum(accuracies) / len(accuracies)
Esempio n. 16
0
def test_run(data_type, lower_and_remove_punctuation, remove_stop_words, distance_method):
    """
    Performs a test run, according to the given parameters
    :param data_type: Defines how to store the sentences, expects: 'boolean' / 'tf' / 'tfidf'
    :param lower_and_remove_punctuation: bool, if true turns all words to lower case and removes punctuation
    :param remove_stop_words: bool, if true removes all stop words
    :param distance_method: defines how to calculate distance, expects: 'euclidean' / 'cosine'
    :return: accuracy, the accuracy of the test run
    """
    file_name = "./dataset/amazon_cells_labelled_full.txt"
    train_file_name = "./dataset/amazon_cells_labelled_train.txt"
    test_file_name = "./dataset/amazon_cells_labelled_test.txt"

    data = FileReader(file_name, lower_and_remove_punctuation, remove_stop_words)
    train_set, _ = data.build_set(data_type, train_file_name)
    test_set, _ = data.build_set(data_type, test_file_name)
    classifier = RocchioClassifier(train_set)
    accuracy = calc_accuracy(test_set, classifier, distance_method)
    return accuracy
Esempio n. 17
0
def test_solver():
    num_test_cases = 50
    # Generate Test cases
    generate_test_cases(100, 450, 3, num_test_cases)
    # Obtain labels from online solver
    obtain_labels(num_test_cases)

    row_solution = []
    time_taken_total = 0
    # Loop through test cases
    for i in range(1, num_test_cases + 1):
        file = os.path.join(base_dir, f"test_case_{i}.cnf")
        _, _, cnf = read_data(file)
        start = time.perf_counter()
        # Run our solver
        sat, _ = CDCL(cnf, single_UIP, branching_heuristic)
        time_taken = time.perf_counter() - start
        time_taken_total += time_taken
        # Store solution
        row_solution.append([i, sat, time_taken])
        print(time_taken)

    # Save solution
    with open(os.path.join(base_dir, 'predictions.csv'), 'w',
              newline='') as csvfile:
        spamwriter = csv.writer(csvfile,
                                delimiter=' ',
                                quotechar='|',
                                quoting=csv.QUOTE_MINIMAL)
        for row in row_solution:
            spamwriter.writerow(row)
    # Print metrics
    accuracy, average_time_slower = calc_accuracy(num_test_cases)
    print(f"Accuracy = {accuracy * 100}%")
    print(f"Average Time Taken: {time_taken_total / num_test_cases}")
    print(f"Slower by {average_time_slower}s on average")
Esempio n. 18
0
def calc_distinct(results_dict, config):
    tf.reset_default_graph()
    print('Calculating metrics for: Distinct')

    # ============= Metrics Folder - Distinct =============
    output_dir = os.path.join(config['log_dir'], config['name'], 'test', 'metrics', 'distinct')
    logs_dir = os.path.join(output_dir, 'logs')
    if not os.path.exists(logs_dir):
        os.makedirs(logs_dir)

    # ============= Experiment Parameters =============
    BATCH_SIZE = config['metrics_batch_size']
    EPOCHS = config['metrics_epochs']
    TEST_RATIO = config['metrics_test_ratio']
    NUM_BINS = config['num_bins']
    if 'k_dim' in config.keys():
        N_KNOBS = config['k_dim']
    elif 'w_dim' in config.keys():
        N_KNOBS = config['w_dim']
    else:
        print('Number of knobs not specified. Returning...')
        return {}
    TARGET_CLASS = config['target_class']
    if N_KNOBS <= 1:
        print('This model has only one dimension. Distinctness metrics are not applicable.')
        return {}
    channels = config['num_channel']
    input_size = config['input_size']
    dataset = config['dataset']
    # ============= Data =============
    data = _EMPTY_ARR
    labels = _EMPTY_ARR
    source_len = len(results_dict['real_imgs'])
    for dim in range(N_KNOBS):
        for bin_i in range(NUM_BINS):
            data_dim_bin = np.append(results_dict['real_imgs'], results_dict['fake_t_imgs'][:, dim, bin_i], axis=-1)
            # dimension dim has been switched
            switched_dim = np.ones(source_len, dtype=int)*dim
            # unless the real probability and fake target probability are the same,
            # in which no dimension has been switched
            fixed_indices = (np.around(results_dict['real_ps'][:, dim, bin_i, TARGET_CLASS], decimals=2) ==
                             results_dict['fake_target_ps'][:, dim, bin_i])
            labels_dim_bin = np.eye(N_KNOBS)[switched_dim]
            labels_dim_bin[fixed_indices] = 0
            data = safe_append(data, data_dim_bin)
            labels = safe_append(labels, labels_dim_bin)
    data_len = len(data)
    data_inds = np.array(range(data_len))
    np.random.shuffle(data_inds)

    train_inds = data_inds[int(data_len * TEST_RATIO):]
    test_inds = data_inds[:int(data_len * TEST_RATIO)]

    print('The size of the training set: ', train_inds.shape[0])
    print('The size of the testing set: ', test_inds.shape[0])
    # ============= placeholder =============
    with tf.name_scope('input'):
        x_ = tf.placeholder(tf.float32, [None, input_size, input_size, channels*2], name='x-input')
        y = tf.placeholder(tf.float32, [None, N_KNOBS], name='y-input')
        isTrain = tf.placeholder(tf.bool)
    # ============= Model =============
    logit, prediction = classifier_distinct_64(x_, num_dims=N_KNOBS, isTrain=isTrain)
    classif_loss = tf.losses.sigmoid_cross_entropy(multi_class_labels=y, logits=logit)
    acc = calc_accuracy(prediction=prediction, labels=y)
    loss = tf.losses.get_total_loss()
    # ============= Optimization functions =============
    train_step = tf.train.AdamOptimizer(0.0001).minimize(loss)
    # ============= summary =============
    cls_loss = tf.summary.scalar('distinct/cls_loss', classif_loss)
    total_loss = tf.summary.scalar('distinct/loss', loss)
    cls_acc = tf.summary.scalar('distinct/acc', acc)
    summary_tf = tf.summary.merge([cls_loss, total_loss, cls_acc])
    # ============= Variables =============
    # Note that this list of variables only include the weights and biases in the model.
    lst_vars = []
    for v in tf.global_variables():
        lst_vars.append(v)
    # ============= Session =============
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(var_list=lst_vars)
    writer = tf.summary.FileWriter(output_dir + '/train', sess.graph)
    writer_test = tf.summary.FileWriter(output_dir + '/test', sess.graph)
    # ============= Training =============
    train_loss = []
    itr_train = 0
    for epoch in range(EPOCHS):
        total_loss = 0.0
        np.random.shuffle(train_inds)
        num_batch = math.ceil(train_inds.shape[0] / BATCH_SIZE)
        for i in range(0, num_batch):
            start = i * BATCH_SIZE
            xs = data[train_inds[start:start + BATCH_SIZE]]
            ys = labels[train_inds[start:start + BATCH_SIZE]]
            [_, _loss, summary_str] = sess.run([train_step, loss, summary_tf],
                                               feed_dict={x_: xs, isTrain: True, y: ys})
            writer.add_summary(summary_str, itr_train)
            itr_train += 1
            total_loss += _loss
        total_loss /= num_batch
        print("Epoch: " + str(epoch) + " loss: " + str(total_loss) + '\n')
        train_loss.append(total_loss)

        checkpoint_name = os.path.join(output_dir, 'cp_epoch_{}.ckpt'.format(epoch))
        saver.save(sess, checkpoint_name)
        np.save(os.path.join(output_dir, 'logs', 'train_loss.npy'), np.asarray(train_loss))

    # ============= Testing =============
    test_preds = _EMPTY_ARR
    test_loss = []
    itr_test = 0

    total_test_loss = 0.0
    num_batch = math.ceil(test_inds.shape[0] / BATCH_SIZE)
    for i in range(0, num_batch):
        start = i * BATCH_SIZE
        xs = data[test_inds[start:start + BATCH_SIZE]]
        ys = labels[test_inds[start:start + BATCH_SIZE]]
        [_loss, summary_str, _pred] = sess.run([loss, summary_tf, prediction],
                                               feed_dict={x_: xs, isTrain: False, y: ys})
        writer_test.add_summary(summary_str, itr_test)
        itr_test += 1
        total_test_loss += _loss
        test_preds = safe_append(test_preds, _pred, axis=0)
    total_test_loss /= num_batch
    print("Epoch: " + str(epoch) + " Test loss: " + str(total_loss) + '\n')
    test_loss.append(total_test_loss)

    np.save(os.path.join(output_dir, 'logs', 'test_loss.npy'), np.asarray(test_loss))
    np.save(os.path.join(output_dir, 'logs', 'test_preds.npy'), np.asarray(test_preds))
    np.save(os.path.join(output_dir, 'logs', 'test_ys.npy'), np.asarray(labels[test_inds]))
    np.save(os.path.join(output_dir, 'logs', 'test_xs.npy'), np.asarray(data[test_inds]))

    accuracy, precision_per_dim, recall_per_dim = calc_metrics_arr(np.round(test_preds), labels[test_inds], average=None)
    _, precision_micro, recall_micro = calc_metrics_arr(np.round(test_preds), labels[test_inds],
                                                                   average='micro')
    _, precision_macro, recall_macro = calc_metrics_arr(np.round(test_preds), labels[test_inds],
                                                        average='macro')

    print('Distinct - accuracy: {:.3f}, '
          'precision: per dim: {}, micro: {:.3f}, macro: {:.3f}, '
          'recall: per dim: {}, micro: {:.3f}, macro: {:.3f}'.format(
        accuracy, precision_per_dim, precision_micro, precision_macro,
        recall_per_dim, recall_micro, recall_macro))
    metrics_dict = {}
    for metric in ['accuracy', 'precision_per_dim', 'precision_micro', 'precision_macro',
                   'recall_per_dim', 'recall_micro', 'recall_macro']:
        metrics_dict.update({'distinct_{}'.format(metric): [eval(metric)]})

    print('Metrics successfully calculated: Distinct')
    return metrics_dict
Esempio n. 19
0
def calc_realistic(results_dict, config):
    tf.reset_default_graph()
    print('Calculating metrics for: Realistic')

    # ============= Metrics Folder - Realistic =============
    output_dir = os.path.join(config['log_dir'], config['name'], 'test', 'metrics', 'realistic')
    logs_dir = os.path.join(output_dir, 'logs')
    if not os.path.exists(logs_dir):
        os.makedirs(logs_dir)

    # ============= Experiment Parameters =============
    BATCH_SIZE = config['metrics_batch_size']
    EPOCHS = config['metrics_epochs']
    TEST_RATIO = config['metrics_test_ratio']
    channels = config['num_channel']
    input_size = config['input_size']
    NUM_BINS = config['num_bins']
    if 'k_dim' in config.keys():
        N_KNOBS = config['k_dim']
    elif 'w_dim' in config.keys():
        N_KNOBS = config['w_dim']
    else:
        print('Number of knobs not specified. Returning...')
        return {}
    # ============= Data =============
    half_len = len(results_dict['real_imgs'])
    data_real = results_dict['real_imgs']
    fake_inds = np.arange(half_len)
    fake_knob = np.random.randint(low=0, high=N_KNOBS, size=half_len)
    # fake_bin = np.random.randint(low=0, high=NUM_BINS, size=half_len)
    fake_bin = np.random.randint(low=0, high=2, size=half_len)
    fake_bin = fake_bin * (NUM_BINS-1)
    data_fake = results_dict['fake_t_imgs'][fake_inds, fake_knob, fake_bin]

    data = np.append(data_real, data_fake, axis=0)
    labels = np.append(np.ones(half_len), np.zeros(half_len), axis=0)
    data_len = len(data)
    data_inds = np.array(range(data_len))
    np.random.shuffle(data_inds)

    train_inds = data_inds[int(data_len * TEST_RATIO):]
    test_inds = data_inds[:int(data_len * TEST_RATIO)]

    print('The size of the training set: ', train_inds.shape[0])
    print('The size of the testing set: ', test_inds.shape[0])
    # ============= placeholder =============
    with tf.name_scope('input'):
        x_ = tf.placeholder(tf.float32, [None, input_size, input_size, channels], name='x-input')
        y_ = tf.placeholder(tf.int64, [None], name='y-input')
        isTrain = tf.placeholder(tf.bool)
    # ============= Model =============
    y = tf.one_hot(y_, 2, on_value=1.0, off_value=0.0, axis=-1)
    logit, prediction = classifier_realistic_64(x_, n_label=2, isTrain=isTrain)
    classif_loss = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=logit)
    acc = calc_accuracy(prediction=prediction, labels=y)
    loss = tf.losses.get_total_loss()
    # ============= Optimization functions =============
    train_step = tf.train.AdamOptimizer(0.0001).minimize(loss)
    # ============= summary =============
    cls_loss = tf.summary.scalar('realistic/cls_loss', classif_loss)
    total_loss = tf.summary.scalar('realistic/loss', loss)
    cls_acc = tf.summary.scalar('realistic/acc', acc)
    summary_tf = tf.summary.merge([cls_loss, total_loss, cls_acc])
    # ============= Variables =============
    # Note that this list of variables only include the weights and biases in the model.
    lst_vars = []
    for v in tf.global_variables():
        lst_vars.append(v)
    # ============= Session =============
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver(var_list=lst_vars)
    writer = tf.summary.FileWriter(output_dir + '/train', sess.graph)
    writer_test = tf.summary.FileWriter(output_dir + '/test', sess.graph)
    # ============= Training =============
    train_loss = []
    itr_train = 0
    for epoch in range(EPOCHS):
        total_loss = 0.0
        np.random.shuffle(train_inds)
        num_batch = math.ceil(train_inds.shape[0] / BATCH_SIZE)
        for i in range(0, num_batch):
            start = i * BATCH_SIZE
            xs = data[train_inds[start:start + BATCH_SIZE]]
            ys = labels[train_inds[start:start + BATCH_SIZE]]
            [_, _loss, summary_str] = sess.run([train_step, loss, summary_tf],
                                               feed_dict={x_: xs, isTrain: True, y_: ys})
            writer.add_summary(summary_str, itr_train)
            itr_train += 1
            total_loss += _loss
        total_loss /= num_batch
        print("Epoch: " + str(epoch) + " loss: " + str(total_loss) + '\n')
        train_loss.append(total_loss)

        checkpoint_name = os.path.join(output_dir, 'cp_epoch_{}.ckpt'.format(epoch))
        saver.save(sess, checkpoint_name)
        np.save(os.path.join(output_dir, 'logs', 'train_loss.npy'), np.asarray(train_loss))

    # ============= Testing =============
    test_preds = _EMPTY_ARR
    test_loss = []
    itr_test = 0

    total_test_loss = 0.0
    num_batch = math.ceil(test_inds.shape[0] / BATCH_SIZE)
    for i in range(0, num_batch):
        start = i * BATCH_SIZE
        xs = data[test_inds[start:start + BATCH_SIZE]]
        ys = labels[test_inds[start:start + BATCH_SIZE]]
        [_loss, summary_str, _pred] = sess.run([loss, summary_tf, prediction],
                                              feed_dict={x_: xs, isTrain: False, y_: ys})
        writer_test.add_summary(summary_str, itr_test)
        itr_test += 1
        total_test_loss += _loss
        test_preds = safe_append(test_preds, _pred, axis=0)
    total_test_loss /= num_batch
    print("Epoch: " + str(epoch) + " Test loss: " + str(total_loss) + '\n')
    test_loss.append(total_test_loss)

    np.save(os.path.join(output_dir, 'logs', 'test_loss.npy'), np.asarray(test_loss))
    np.save(os.path.join(output_dir, 'logs', 'test_preds.npy'), np.asarray(test_preds))
    np.save(os.path.join(output_dir, 'logs', 'test_ys.npy'), np.asarray(labels[test_inds]))
    np.save(os.path.join(output_dir, 'logs', 'test_xs.npy'), np.asarray(data[test_inds]))

    accuracy, precision, recall = calc_metrics_arr(np.argmax(test_preds, axis=1), labels[test_inds])

    print('Realistic - accuracy: {:.3f}, precision: {:.3f}, recall: {:.3f}'.format(accuracy, precision, recall))
    metrics_dict = {}
    for metric in ['accuracy', 'precision', 'recall']:
        metrics_dict.update({'realistic_{}'.format(metric): [eval(metric)]})

    print('Metrics successfully calculated: Realistic')
    return metrics_dict
def evaluate(epoch, run, mod_name=''):
    incorrect_classifications_val = []
    total_val_loss = 0
    total_val_correct = 0
    best_val_acc = 0
    epoch_classifications_val = []
    run.model.eval()
    with torch.no_grad():
        for batch_number, (images, labels, paths) in enumerate(run.val_loader):

            if run.grayscale:
                images = torch.unsqueeze(
                    images, 1).double()  # added channel dimensions (grayscale)
            else:
                images = images.float().permute(0, 4, 1, 2, 3).float()
            labels = labels.long()

            if torch.cuda.is_available():
                images, labels = images.cuda(), labels.cuda()

            preds = run.model(images)  # Pass Batch
            loss = run.criterion(preds, labels)  # Calculate Loss
            total_val_loss += loss.item()

            num_correct = get_num_correct(preds, labels)
            total_val_correct += num_correct

            run.experiment.log_metric(mod_name + "Val batch accuracy",
                                      num_correct / len(labels) * 100,
                                      step=run.log_number_val)
            run.experiment.log_metric(mod_name + "Avg val batch loss",
                                      loss.item(),
                                      step=run.log_number_val)
            run.log_number_val += 1

            # print('Val: Batch number:', batch_number, 'Num correct:', num_correct, 'Accuracy:', "{:.2%}".format(num_correct / len(labels)), 'Loss:', loss.item())
            # print_mistakes(preds, labels, paths)

            incorrect_classifications_val.append(
                get_mistakes(preds, labels, paths))

            for prediction in zip(preds, labels, paths):
                epoch_classifications_val.append(prediction)

        epoch_accuracy = calc_accuracy(epoch_classifications_val)

        run.experiment.log_metric(mod_name + "Val epoch accuracy",
                                  epoch_accuracy,
                                  step=epoch)
        run.experiment.log_metric(mod_name + "Avg val epoch loss",
                                  total_val_loss / batch_number,
                                  step=epoch)
        print('Val Epoch:', epoch, 'num correct:', total_val_correct,
              'Accuracy:',
              str(epoch_accuracy) + '%')

    is_best = (epoch_accuracy > run.best_val_acc) | (
        (epoch_accuracy >= run.best_val_acc) &
        (total_val_loss / batch_number < run.best_val_loss))
    if is_best:
        print("Best run so far! updating params...")
        run.best_val_acc = epoch_accuracy
        run.best_val_loss = total_val_loss / batch_number
        run.best_model_preds = epoch_classifications_val
        run.best_model_mistakes = incorrect_classifications_val
    save_checkpoint(
        {
            'epoch': epoch + 1,
            'state_dict': run.model.state_dict(),
            'best_acc1': run.best_val_acc,
            'optimizer': run.optimizer.state_dict(),
        }, is_best)

    # Step lr_scheduler
    run.lr_scheduler.step()
Esempio n. 21
0
X, Y = get_classification_data(sd=10, m=50)
adaboost = AdaBoost(n_models=20)
adaboost.fit(X, Y)
print("This is the final prediction:", adaboost.final_prediction(X))
print("This is the original labels:", Y)
print(adaboost.final_prediction(X) == Y)
print("Shape:", adaboost.final_prediction(X).shape)
print("type:", type(adaboost.final_prediction(X)))
visualise_predictions(adaboost.final_prediction, X)
print(f'accuracy: {calc_accuracy(adaboost.final_prediction(X), Y)}')

show_data(X, Y)
print("Evaluate for a point: ", adaboost.final_prediction(np.array([[1, 1]])))

# %%
import sklearn.ensemble

adaBoost = sklearn.ensemble.AdaBoostClassifier()
adaBoost.fit(X, Y)
predictions = adaBoost.predict(X)
calc_accuracy(predictions, Y)
#visualise_predictions(adaBoost.predict, X,Y)
#show_data(X, Y)
print("Adaboosts sklearn predictions:", predictions)
print(predictions.shape)
print(type(predictions))
print(f'accuracy: {calc_accuracy(predictions, Y)}')

# %%
Esempio n. 22
0
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.CRITICAL)

    rules = []

    training_file = args[0]
    training_sents = utils.read_tokens(training_file)
    test_file = args[1]
    test_sents = utils.read_tokens(test_file)

    model = create_model(training_sents)

    sents = utils.read_tokens(training_file)
    predictions = predict_tags(sents, model)
    accuracy = utils.calc_accuracy(training_sents, predictions)
    print "Accuracy in training before rules applied [%s sentences]: %s" % (
        len(sents), accuracy)

    rules = template1(training_sents, predictions)

    reduced_rules = reduced_rule1(rules, training_sents, predictions, accuracy)

    new_predictions = apply_rule1(predictions, reduced_rules)

    accuracy = utils.calc_accuracy(training_sents, new_predictions)

    print "Accuracy in training after rule1 applied [%s sentences]: %s" % (
        len(sents), accuracy)

    test_sents1 = utils.read_tokens(test_file)
Esempio n. 23
0
def main():
    X_train = np.loadtxt('../datasets/gisette/gisette_train_data.txt')
    y_train = np.loadtxt('../datasets/gisette/gisette_train_labels.txt',
                         dtype=int)
    X_test = np.loadtxt('../datasets/gisette/gisette_valid_data.txt')
    y_test = np.loadtxt('../datasets/gisette/gisette_valid_labels.txt',
                        dtype=int)
    y_train = trans(y_train)
    y_test = trans(y_test)
    print(X_train.shape)
    print(y_train.shape)
    print(X_test.shape)
    print(y_test.shape)

    X_train, y_train = X_train[:200], y_train[:200]
    X_test, y_test = X_test[:100], y_test[:100]

    n_train = X_train.shape[0]

    n0_list = [1, 4, 16, 64]
    M_list = [1, 4, 16, 64, 256]
    r = 1

    accuracy = -1
    n0_best = -1
    M_best = -1
    n_folds = 3  # requirement: 10
    classes = 2

    # train:
    # 10-fold CV
    # n0 \in {1,4,16,64}
    # M \in {1,4,16,64,256}
    # get the best pair of (n, M)

    print('------------------now begin--------------------------')

    for n0 in n0_list:
        for M in M_list:
            cur_accuracy_list = []

            skf = StratifiedKFold(n_splits=n_folds)
            cv = [(t, v) for (t, v) in skf.split(range(n_train), y_train)]

            for k in range(n_folds):
                train_idx, val_idx = cv[k]

                comp_rf = CompRF(n0, M, r, "Classification", classes)

                time_start = time.time()
                y_predict = comp_rf.train_then_predict(X_train[train_idx],
                                                       y_train[train_idx],
                                                       X_train[val_idx])
                time_end = time.time()

                cur_accuracy = calc_accuracy(y_train[val_idx], y_predict)

                cur_accuracy_list.append(cur_accuracy)

                print("(n0={0}, M={1}, fold={3}): {2:.2f}% [time={4:.2f}]".
                      format(n0, M, cur_accuracy * 100, k,
                             time_end - time_start))

            cur_accuracy = sum(cur_accuracy_list) / len(cur_accuracy_list)
            print("(n0={0}, M={1}, average): {2:.2f}%".format(
                n0, M, cur_accuracy * 100))
            if cur_accuracy > accuracy:
                accuracy = cur_accuracy
                n0_best = n0
                M_best = M

    print("best_accuracy={0}%, best_n_0={1}, best_M={2}".format(
        accuracy * 100, n0_best, M_best))

    # test:
    comp_rf = CompRF(n0_best, M_best, r)
    y_predict = comp_rf.train_then_predict(X_train, y_train, X_test)
    accuracy = calc_accuracy(y_test, y_predict)

    print("accuracy: ", accuracy)
Esempio n. 24
0
    def train(self,dataset_path,num_classes,batch_size,lr_base,lr_decay,step_size,\
              max_iteration,pretrained_model=None):
        '''
        @description: 构建VGG-Net16网络结构,训练网络模型,输出训练过程中的logs,保存网络模型
        @params:
            - dataset_path: 训练样本集和验证样本集对应的txt文件所在的路径
            - num_classes: 分类数目
            - batch_size: 训练过程中的每次输入网络中的样本数
            - lr_base: 初始学习率
            - lr_decay: 学习率衰减系数
            - step_size: 学习率衰减速度   lr = lr_base * lr_decay ^ (global_step / step_size)
            - max_iteration: 迭代的最大次数
            - pretrained_model: 预训练的模型所在的路径
        @return: None
        '''

        train_file_name = dataset_path + 'train_list.txt'
        valid_file_name = dataset_path + 'valid_list.txt'

        log_dir = './log/vgg'
        model_dir = './model/vgg'

        vgg = VGG(weight_decay=0.0005, keep_prob=0.5, num_classes=num_classes)

        train_summary_list = []
        valid_summary_list = []

        with tf.Graph().as_default(), tf.device('/gpu:0'):

            with tf.name_scope('input'):
                #队列读取训练数据
                train_image,train_label = get_batch(train_file_name,self._image_H,\
                                                    self._image_W,batch_size)
                valid_image,valid_label = get_batch(valid_file_name,self._image_H,\
                                                    self._image_W,250,is_train=False)

                x = tf.placeholder(tf.float32,[None,self._image_H,self._image_W,\
                                               self._image_channels],name='x')
                y = tf.placeholder(tf.int64, [None], name='y')

            #loss, accuracy, train_op
            logits, _ = vgg.vgg16(x)
            loss = utils.calc_loss(logits, y)
            accuracy = utils.calc_accuracy(logits, y)
            train_op, learning_rate, global_step = utils.optimizer(
                lr_base, step_size, lr_decay, loss)

            #summary
            train_summary_list.append(tf.summary.scalar('train_loss', loss))
            valid_summary_list.append(tf.summary.scalar('valid_loss', loss))
            train_summary_list.append(
                tf.summary.scalar('train_accuracy', accuracy))
            valid_summary_list.append(
                tf.summary.scalar('test_accuracy', accuracy))
            train_summary_list.append(
                tf.summary.scalar('learning rate', learning_rate))
            valid_summary_list.append(
                tf.summary.scalar('learning rate', learning_rate))
            for var in tf.trainable_variables():
                valid_summary_list.append(tf.summary.histogram(var.name, var))
            train_summary = tf.summary.merge(train_summary_list)
            valid_summary = tf.summary.merge(valid_summary_list)

            #session
            saver = tf.train.Saver(max_to_keep=50)
            with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,\
                                                  log_device_placement=True)) as sess:
                train_writer = tf.summary.FileWriter(log_dir + 'train',
                                                     sess.graph)
                test_writer = tf.summary.FileWriter(log_dir + 'valid')
                tf.global_variables_initializer().run()
                tf.local_variables_initializer().run()

                #启动多线程
                coord = tf.train.Coordinator()
                threads = tf.train.start_queue_runners(sess=sess, coord=coord)

                #加载预训练的模型
                if pretrained_model != None:
                    ckpt = tf.train.get_checkpoint_state(pretrained_model)
                    print('Restoring pretrained model: %s' %
                          ckpt.model_checkpoint_path)
                    saver.restore(sess, ckpt.model_checkpoint_path)

                train_time = 0
                for step in range(max_iteration):

                    #模型持久化操作
                    #                    graph_def = tf.get_default_graph().as_graph_def()
                    #                    output_graph_def = graph_util.convert_variables_to_constants(sess,graph_def,['input/x','deepid/Relu'])
                    #                    with tf.gfile.GFile(model_dir+'deepid_model.pb','wb') as file:
                    #                        file.write(output_graph_def.SerializeToString())
                    #                    break

                    start_time = time.time()
                    image, label = sess.run([train_image, train_label])
                    _, train_loss, summary_str, train_step = sess.run(
                        [train_op, loss, train_summary, global_step],
                        feed_dict={
                            x: image,
                            y: label
                        })
                    train_writer.add_summary(summary_str,
                                             global_step=train_step)
                    train_writer.flush()
                    duration = time.time() - start_time
                    train_time += duration

                    #valid and save model
                    if step % 1000 == 0 or (step + 1) == max_iteration:
                        image, label = sess.run([valid_image, valid_label])
                        lr,summary_str,valid_loss,validation_accuracy,\
                        train_step = sess.run([learning_rate,
                                               valid_summary,
                                               loss,
                                               accuracy,
                                               global_step],
                                               feed_dict={x:image,y:label})
                        test_writer.add_summary(summary_str,
                                                global_step=train_step)
                        test_writer.flush()
                        print('Step %d: train loss = %.3f, valid loss = %.3f,valid accuracy = %.3f%%, lr = %.6f (%.3f sec)'%\
                              (train_step,train_loss,valid_loss,validation_accuracy,\
                               lr,train_time))
                        saver.save(sess,
                                   model_dir + 'model.ckpt',
                                   global_step=train_step)
                        with open(log_dir + 'valid_result.txt',
                                  'at') as file_writer:
                            file_writer.write('%d\t%.3f%%\t%.5f\t%d\r\n' %
                                              (train_step, validation_accuracy,
                                               lr, train_time))
                #退出多线程
                coord.request_stop()
                coord.join(threads)
Esempio n. 25
0
def train(data_loader, model, optimizer, epoch):
    losses = AverageMeter()
    accuracy = AverageMeter()
    model.train()
    global iteration

    tq = tqdm(data_loader, desc="Train progress: Ep {}".format(epoch))

    for idx, (input_seq, target, _) in enumerate(tq):
        tic = time.time()
        input_seq = input_seq.to(cuda)
        target = target.to(cuda)
        B = input_seq.size(0)
        output, _ = model(input_seq)

        # visualize
        if (iteration == 0) or (iteration == args.print_freq):
            if B > 2: input_seq = input_seq[0:2, :]
            writer_train.add_image(
                'input_seq',
                de_normalize(
                    vutils.make_grid(input_seq[:, :3, ...].transpose(
                        2, 3).contiguous().view(-1, 3, args.img_dim,
                                                args.img_dim),
                                     nrow=args.num_seq * args.seq_len)),
                iteration)
        del input_seq

        [_, N, D] = output.size()
        output = output.view(B * N, D)
        target = target.repeat(1, N).view(-1)

        loss = criterion(output, target)
        acc = calc_accuracy(output, target)

        del target

        losses.update(loss.item(), B)
        accuracy.update(acc.item(), B)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_weight = 0.0
        decay_weight = 0.0
        for m in model.parameters():
            if m.requires_grad: decay_weight += m.norm(2).data
            total_weight += m.norm(2).data

        tq_stats = {
            'loss': losses.local_avg,
            'acc': accuracy.local_avg,
            'decay_wt': decay_weight.item(),
            'total_wt': total_weight.item(),
        }

        tq.set_postfix(tq_stats)

        if idx % args.print_freq == 0:
            writer_train.add_scalar('local/loss', losses.val, iteration)
            writer_train.add_scalar('local/accuracy', accuracy.val, iteration)

            iteration += 1

    return losses.local_avg, accuracy.local_avg
Esempio n. 26
0
        gt_labels = np.array(gt_labels)

        return logits, gt_labels

if __name__ == '__main__':
    model = NaiveBayes()
    tokenizer = stemmedTokenizer

    model.create_dict(json_reader("col774_yelp_data/train.json"), tokenizer)
    model.train(json_reader("col774_yelp_data/train.json"), tokenizer)

    # outputs = model.predict(json_reader("col774_yelp_data/test.json"), tokenizer)
    # f = open("outputs_stemmed_test.pickle","wb")
    # pickle.dump(outputs, f)
    # f.close()

    logits, gt_labels = _load_object("outputs_stemmed_test.pickle")
    conf_matrix = create_confusion_matrix(logits, gt_labels)
    
    print(calc_accuracy(logits, gt_labels) * 100)
    print(conf_matrix)

    plot_confusion_matrix(conf_matrix, model.classes)

    probs = logits_to_prob_vector(logits)
    plot_roc_curve(logits, gt_labels)
    



Esempio n. 27
0
					logging.info("# dev evaluation")
					sess.run(dev_init_opt)
					dev_results = []
					dev_labels = []
					cnt=0
					for _ in range(num_dev_batches):
						# cnt+=1
						# print(cnt)
						tmp_pred,tmp_target = sess.run([pred_eval,ys])
						dev_results.extend(tmp_pred)
						dev_labels.extend(tmp_target)
					# print('DEV3')
					# print(len(dev_results))
					# print(len(dev_labels))
					accuracy = calc_accuracy(dev_results,dev_labels)

					dev_history.append(accuracy)
					if accuracy > dev_best:
						dev_best = accuracy
						stop_times = 0
					else:
						stop_times += 1

					if stop_times > cfg.patience:
						logging.info('The model did not improve after{} times, you have got an excellent'
									 +'enough model.')
						break


					logging.info('# The dev accuracy is:{}'.format(accuracy))
Esempio n. 28
0
def train(data_loader, model, optimizer, epoch):
    losses = AverageMeter()
    model.train()
    global iteration
    dissimilarity_score_dict = {}
    target_dict = {}
    number_of_chunks_dict = {}
    for idx, (video_seq, audio_seq, target, audiopath) in enumerate(data_loader):
        tic = time.time()
        video_seq = video_seq.to(cuda)
        audio_seq = audio_seq.to(cuda)
        target = target.to(cuda)
        B = video_seq.size(0)

        vid_out = model.module.forward_lip(video_seq)
        aud_out = model.module.forward_aud(audio_seq)

        vid_class = model.module.final_classification_lip(vid_out)
        aud_class = model.module.final_classification_aud(aud_out)

        del video_seq
        del audio_seq

        loss1 = calc_loss(vid_out, aud_out, target, args.hyper_param)
        loss2 = criterion(vid_class, target.view(-1))
        loss3 = criterion(aud_class, target.view(-1))
        acc = calc_accuracy(vid_out, aud_out, target, args.threshold)

        loss = loss1 + loss2 + loss3

        losses.update(loss.item(), B)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        for batch in range(B):
            vid_name = audiopath[batch].split('/')[-2]
            dist = torch.dist(vid_out[batch, :].view(-1), aud_out[batch, :].view(-1), 2)
            tar = target[batch, :].view(-1).item()
            if (dissimilarity_score_dict.get(vid_name)):
                dissimilarity_score_dict[vid_name] += dist
                number_of_chunks_dict[vid_name] += 1
            else:
                dissimilarity_score_dict[vid_name] = dist
                number_of_chunks_dict[vid_name] = 1

            if (target_dict.get(vid_name)):
                pass
            else:
                target_dict[vid_name] = tar

        if idx % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Loss {loss.val:.4f} ({loss.local_avg:.4f})\t'.format(
                epoch, idx, len(data_loader), time.time() - tic,
                loss=losses))

            total_weight = 0.0
            decay_weight = 0.0
            for m in model.parameters():
                if m.requires_grad: decay_weight += m.norm(2).data
                total_weight += m.norm(2).data
            print('Decay weight / Total weight: %.3f/%.3f' % (decay_weight, total_weight))

            writer_train.add_scalar('local/loss', losses.val, iteration)

            iteration += 1

    avg_score_real, avg_score_fake = get_scores(dissimilarity_score_dict, number_of_chunks_dict, target_dict)
    return losses.local_avg, avg_score_real, avg_score_fake
Esempio n. 29
0
                      action="store_true",
                      help="turn on debug mode")

    (options, args) = parser.parse_args()
    if len(args) != 2:
        parser.error("Please provide required arguments")

    if options.debug:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.CRITICAL)

    training_file = args[0]
    training_sents = utils.read_tokens(training_file)
    test_file = args[1]
    test_sents = utils.read_tokens(test_file)

    model = create_model(training_sents)

    ## read sentences again because predict_tags(...) rewrites the tags
    sents = utils.read_tokens(training_file)
    predictions = predict_tags(sents, model)
    accuracy = utils.calc_accuracy(training_sents, predictions)
    print "Accuracy in training [%s sentences]: %s" % (len(sents), accuracy)

    ## read sentences again because predict_tags(...) rewrites the tags
    sents = utils.read_tokens(test_file)
    predictions = predict_tags(sents, model)
    accuracy = utils.calc_accuracy(test_sents, predictions)
    print "Accuracy in training [%s sentences]: %s" % (len(sents), accuracy)
Esempio n. 30
0
def eval_full_spectrograms(dataset, model_id, predictions_path, pred_threshold=0.5, overlap_threshold=0.1, smooth=True, 
            in_seconds=False, use_call_bounds=False, min_call_length=10, visualize=False, hierarchical_model=True):
    """

        After saving predictions for the test set of full spectrograms, we
        now want to calculate evaluation metrics on each of these spectrograms. 
        First we load in the sigmoid (0, 1) predictions and use the defined
        threshold and smoothing flag to convert the predictions into binary
        0/1 predcitions for each time slice. Then, we convert time slice
        predictions to full call (start, end) time predictions so that
        we can calculate elephant call specific evaluation metrics. Bellow
        we discuss the metrics that are calculated individually for each
        full spectrogram, as well as accross all the test spectrograms
        
        Metrics:
        - Call Prediction True Positives
        - Call Prediction False Positives
        - Call Recall True Positives
        - Call Recall False Negatives
        - F-score
        - Accuracy
        - Old Call Precision --- To be implemented
        - Old Call Recall --- To be implemented

    """
    # Maps spectrogram ids to dictionary of results for each spect
    # Additionally includes a key "summary" that computes aggregated
    # statistics over the entire test set of spectrograms
    results = {} 
    results['summary'] = {'true_pos': 0,
                            'false_pos': 0,
                            'true_pos_recall': 0,
                            'false_neg': 0,
                            'f_score': 0,
                            'accuracy': 0
                            }

    # Used to track the number of total calls for averaging
    # aggregated statistics
    num_preds = 0
    num_gt = 0
    for data in dataset:
        spectrogram = data[0]
        labels = data[1]
        gt_call_path = data[2]

        # Get the spec id
        tags = gt_call_path.split('/')
        tags = tags[-1].split('_')
        data_id = tags[0] + '_' + tags[1]
        print ("Generating Prediction for:", data_id)
         
        predictions = np.load(os.path.join(predictions_path, model_id, data_id + '.npy'))

        binary_preds, smoothed_predictions = get_binary_predictions(predictions, threshold=pred_threshold, smooth=smooth)

        # Process the predictions to get predicted elephant calls
        # Figure out better way to try different combinations of this
        # Note that processed_preds zeros out predictions that are not long
        # enough to be an elephant call
        predicted_calls, processed_preds = find_elephant_calls(binary_preds, min_call_length=min_call_length, in_seconds=in_seconds)
        print ("Num predicted calls", len(predicted_calls))

        # Use the calls as defined in the orginal hand labeled file.
        # This looks to avoid issues of overlapping calls seeming like
        # single very large calls in the gt labeling 
        if use_call_bounds:
            print ("Using CSV file with ground truth call start and end times")
            gt_calls = process_ground_truth(gt_call_path, in_seconds=in_seconds)
        else:
            print ("Using spectrogram labeling to generate GT calls")
            # We should never compute this in seconds
            # Also let us keep all the calls, i.e. set min_length = 0
            gt_calls, _ = find_elephant_calls(labels, min_call_length=0)

        print ("Number of ground truth calls", len(gt_calls))

        # Visualize the predictions around the gt calls
        if visualize: # This is not super important
            visual_full_recall(spectrogram, smoothed_predictions, labels, processed_preds)       
        
        # Look at precision metrics
        # Call Prediction True Positives
        # Call Prediction False Positives
        true_pos, false_pos = call_prec_recall(predicted_calls, gt_calls, threshold=overlap_threshold, is_truth=False,
                                                spectrogram=spectrogram, preds=binary_preds, gt_labels=labels)

        # Look at recall metrics
        # Call Recall True Positives
        # Call Recall False Negatives
        true_pos_recall, false_neg = call_prec_recall(gt_calls, predicted_calls, threshold=overlap_threshold, is_truth=True)

        f_score = f1_score(labels, binary_preds)
        accuracy = calc_accuracy(binary_preds, labels)

        results[data_id] = {'true_pos': true_pos,
                            'false_pos': false_pos,
                            'true_pos_recall': true_pos_recall,
                            'false_neg': false_neg,
                            'f_score': f_score,
                            'predictions': smoothed_predictions,
                            'binary_preds': processed_preds,
                            'accuracy': accuracy
                            }

        # If doing hierarchical modeling, save the model_0 predictions 
        # specifically for visualization!
        if hierarchical_model:
            model_0_predictions = np.load(os.path.join(predictions_path, model_id, 'Model_0', data_id + '.npy'))
            _, model_0_smoothed_predictions = get_binary_predictions(model_0_predictions, threshold=pred_threshold, smooth=smooth)
            results[data_id]['model_0_predictions'] = model_0_smoothed_predictions

        # Update summary stats
        results['summary']['true_pos'] += len(true_pos)
        results['summary']['false_pos'] += len(false_pos)
        results['summary']['true_pos_recall'] += len(true_pos_recall)
        results['summary']['false_neg'] += len(false_neg)
        results['summary']['f_score'] += f_score
        results['summary']['accuracy'] += accuracy

    # Calculate averaged statistics
    results['summary']['f_score'] /= len(dataset)
    results['summary']['accuracy'] /= len(dataset)

    return results