Example #1
0
def eval_loop(args,
              dataset,
              eval_accuracy,
              eval_metric,
              eval_accuracy2=None,
              mode="Validation"):
    for (x_batch_dev, x_batch_lemmas, mask,
         true_idxs), y_batch_dev in dataset.__iter__():
        lemmas = tf.boolean_mask(x_batch_lemmas, mask)
        if args.wsd_method == "classification":
            possible_synsets = [[
                synset2id[syn] if syn in synset2id else synset2id["<UNK>"]
                for syn in lemma2synsets[lemma.numpy().decode("utf-8")]
            ] for lemma in lemmas]
        elif args.wsd_method == "context_embedding":
            possible_synsets = [
                lemma2synset_ids[lemma.numpy()] for lemma in lemmas
            ]
        elif args.wsd_method == "multitask":
            possible_synsets = ([[
                synset2id[syn] if syn in synset2id else synset2id["<UNK>"]
                for syn in lemma2synsets[lemma.numpy().decode("utf-8")]
            ] for lemma in lemmas], [
                lemma2synset_ids[lemma.numpy()] for lemma in lemmas
            ])
        outputs = model((x_batch_dev, mask))
        if args.wsd_method == "classification" or args.wsd_method == "context_embedding":
            outputs = tf.boolean_mask(outputs, mask)
            true_preds = tf.boolean_mask(y_batch_dev, mask)
        elif args.wsd_method == "multitask":
            outputs = (tf.boolean_mask(outputs[0], mask),
                       tf.boolean_mask(outputs[1], mask))
            true_preds = (tf.boolean_mask(y_batch_dev[0], mask),
                          tf.boolean_mask(y_batch_dev[1], mask))
        if eval_metric is not None:
            if args.wsd_method == "context_embedding":
                eval_metric.update_state(true_preds, outputs)
            elif args.wsd_method == "multitask":
                eval_metric.update_state(true_preds[1], outputs[1])
        true_idxs = tf.boolean_mask(true_idxs, mask)
        accuracy = models.accuracy(outputs, possible_synsets, embeddings1,
                                   true_idxs, eval_accuracy, args.wsd_method,
                                   eval_accuracy2)
    eval_cosine_sim = eval_metric.result()
    eval_metric.reset_states()
    print('%s cosine similarity metric: %s' % (
        mode,
        float(eval_cosine_sim),
    ))
    eval_acc = eval_accuracy.result()
    eval_accuracy.reset_states()
    print('%s accuracy: %s' % (
        mode,
        float(eval_acc),
    ))
    if args.wsd_method == "multitask":
        eval_acc2 = eval_accuracy2.result()
        eval_accuracy2.reset_states()
        print('Alternative %s accuracy (cosine similarity): %s' %
              (mode, float(eval_acc2)))
def test_logistic_regression_predict(samples1, logistic_regression_model):
    x, y = samples1
    x = StandardScaler().transform(x)
    iterations = 100
    logistic_regression_model.fit(x, y, 1, iterations, add_intercept=True)
    predictions = [logistic_regression_model.predict(x.iloc[i, :]) for i in range(np.size(x, 0))]
    assert accuracy(predictions, y) > 80
def test_cost_function(train_data, test_data):
    x, y, y_onehot, = train_data
    x_test, y_test = test_data
    network = NeuralNetwork(layers=[784, 25, 10])
    network.fit(x, y_onehot, alpha=0.1, iterations=40)
    prediction_test = network.predict(x_test)
    accuracy_test = accuracy(prediction_test, y_test)
    assert accuracy_test > 90
Example #4
0
def test_full_accuracy():
    prediction = np.array([1, 2, 3, 4, 5, 6])
    y = np.array([1, 2, 3, 4, 5, 6])
    assert accuracy(prediction, y) == 100
Example #5
0
def test_no_accuracy():
    prediction = np.array([8, 8, 8, 8, 8, 8])
    y = np.array([1, 2, 3, 4, 5, 6])
    assert accuracy(prediction, y) == 0
Example #6
0
         true_preds = tf.boolean_mask(y_batch_train, mask)
     elif args.wsd_method == "multitask":
         outputs = (tf.boolean_mask(outputs[0], mask),
                    tf.boolean_mask(outputs[1], mask))
         true_preds = (tf.boolean_mask(y_batch_train[0], mask),
                       tf.boolean_mask(y_batch_train[1], mask))
     loss_value = loss_fn(true_preds, outputs)
     if train_metric is not None:
         if args.wsd_method == "context_embedding":
             train_metric.update_state(true_preds, outputs)
         elif args.wsd_method == "multitask":
             train_metric.update_state(true_preds[1], outputs[1])
     if step % 50 == 0:
         true_idxs = tf.boolean_mask(true_idxs, mask)
         accuracy = models.accuracy(outputs, possible_synsets,
                                    embeddings1, true_idxs,
                                    train_accuracy, args.wsd_method,
                                    train_accuracy2)
         train_accuracy.reset_states()
 grads = tape.gradient(loss_value, model.trainable_weights)
 optimizer.apply_gradients(zip(grads, model.trainable_weights))
 if step % 50 == 0:
     print('Training loss (for one batch) at step %s: %s' %
           (step, float(loss_value)))
     print('Accuracy on last batch is: %s' % (accuracy[0]))
     if args.wsd_method == "multitask":
         print(
             'Alternative accuracy (cosine similarity) on last batch is: %s'
             % (accuracy[1]))
     print('Seen so far: %s samples' %
           ((step + 1) * int(args.batch_size)))
 step += 1
Example #7
0
argument_parser = argparse.ArgumentParser()
argument_parser.add_argument('--model',
                             nargs="+",
                             dest="model_paths",
                             help="models to test the accuracy of")

args = argument_parser.parse_args()

if not args.model_paths:
    model_paths = glob("model/*")  # compute accuracy for all the found models
else:
    model_paths = args.model_paths

result = []
_, _, X_test, y_test = mnist()
for path in model_paths:
    if not os.path.exists(path):
        continue

    print(f"Computing for {path}...", end="")
    sys.stdout.flush()
    model = load_from_file(path)
    test_set_accuracy = accuracy(model, X_test, y_test)
    result.append((path, test_set_accuracy))
    print()

result.sort(key=lambda pair: pair[1], reverse=True)
for pair in result:
    path, test_set_accuracy = pair
    print(f"{path} -> {test_set_accuracy}")
Example #8
0
def test_pca_filtered_keeping_10_components_accuracy(mnist):
    X_train, y_train, X_test, y_test = mnist
    model = pca_filtered_model(fc_100_100_10(), X_train, 10)

    train(model, X_train, y_train, epochs=2)
    assert isclose(accuracy(model, X_test, y_test), 0.44, abs_tol=0.01)
Example #9
0
def test_fc_100_100_10_accuracy(mnist):
    model = fc_100_100_10()
    X_train, y_train, X_test, y_test = mnist

    train(model, X_train, y_train, epochs=2)
    assert isclose(accuracy(model, X_test, y_test), 0.544, abs_tol=0.01)
Example #10
0
def traintestsvp(args):
    # extract dataset
    dataset = args.datapath.split("/")[-1]
    print("Start reading in data for {0}...".format(dataset))
    train_data_loader, val_data_loader, classes = GET_DATASETLOADER[dataset](
        args)
    print("Done!")
    print("Start training and testing model...")
    # model which obtains hidden representations
    phi = GET_PHI[dataset](args)
    if args.hmodel:
        if args.randomh:
            model = SVPNet(
                phi,
                args.hidden,
                classes,
                hierarchy="random",
                random_state=args.randomseed,
            )
        else:
            model = SVPNet(
                phi,
                args.hidden,
                classes,
                hierarchy="predefined",
                random_state=args.randomseed,
            )
    else:
        model = SVPNet(phi,
                       args.hidden,
                       classes,
                       hierarchy="none",
                       random_state=args.randomseed)
    if args.gpu:
        model = model.cuda()
    print(model.transformer.hstruct_)
    # optimizer
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.learnrate,
                                momentum=args.momentum)
    # train
    for epoch in range(args.nepochs):
        train_loss, train_acc, train_time = 0.0, 0.0, 0.0
        for i, data in enumerate(train_data_loader, 1):
            inputs, labels = data
            labels = list(labels)
            if args.gpu:
                inputs = inputs.cuda()
            optimizer.zero_grad()
            start_time = time.time()
            loss = model(inputs, labels)
            loss.backward()
            optimizer.step()
            stop_time = time.time()
            train_time += (stop_time - start_time) / args.batchsize
            train_loss += loss.item()
            with torch.no_grad():
                preds = model.predict(inputs)
                train_acc += accuracy(preds, labels)
            if i % args.nitprint == args.nitprint - 1:
                print(
                    "Epoch {0}: training loss={1}   training accuracy={2}    training time={3}s"
                    .format(
                        epoch,
                        train_loss / args.nitprint,
                        train_acc / args.nitprint,
                        train_time / args.nitprint,
                    ))
                train_loss, train_acc, train_time = 0.0, 0.0, 0.0
    # validate: top-1 accuracy
    model.eval()
    val_acc, val_time = 0.0, 0.0
    for i, data in enumerate(val_data_loader, 1):
        inputs, labels = data
        labels = list(labels)
        if args.gpu:
            inputs = inputs.cuda()
        with torch.no_grad():
            start_time = time.time()
            preds = model.predict(inputs)
            stop_time = time.time()
            val_time += (stop_time - start_time) / args.batchsize
            val_acc += accuracy(preds, labels)
    print("Test accuracy={0}   test time={1}s".format(val_acc / i,
                                                      val_time / i))
    # validate: svp performance
    params = paramparser(args)
    for param in params:
        preds_out, labels_out = [], []
        if not args.ilp:
            if not args.hmodel and param["c"] != len(np.unique(classes)):
                hstruct = get_hstruct_tensor(np.unique(classes), param)
                print(hstruct.shape)
                if args.gpu:
                    hstruct = hstruct.cuda()
                model.SVP.set_hstruct(hstruct)
            val_recall, val_setsize, val_time = 0.0, 0.0, 0.0
            for i, data in enumerate(val_data_loader, 1):
                inputs, labels = data
                labels = list(labels)
                if args.gpu:
                    inputs = inputs.cuda()
                with torch.no_grad():
                    start_time = time.time()
                    preds = model.predict_set(inputs, param)
                    stop_time = time.time()
                    val_time += (stop_time - start_time) / args.batchsize
                    val_recall += recall(preds, labels)
                    val_setsize += setsize(preds)
                    if args.out != "":
                        preds_out.extend(preds)
                        labels_out.extend(labels)
            if args.out != "":
                with open(
                        "./{0}_{1}_{2}.csv".format(args.out, param["c"],
                                                   param["size"]), "w") as f:
                    for (pi, lj) in zip(preds_out, labels_out):
                        f.write("{0},{1}\n".format(pi, lj))
                f.close()
            print("Test SVP for setting {0}: recall={1}, |Ÿ|={2}, time={3}s".
                  format(param, val_recall / i, val_setsize / i, val_time / i))
            print("Done!")
        else:
            # first extract hstruct
            hlt = HLabelTransformer(sep=";")
            hlt.fit(np.unique(classes))
            hstruct = hlt.hstruct_
            # get matrices for KCG problem
            A, b = pwk_ilp_get_ab(hstruct, param)
            val_recall, val_setsize, val_time = 0.0, 0.0, 0.0
            for i, data in enumerate(val_data_loader, 1):
                inputs, labels = data
                labels = list(labels)
                if args.gpu:
                    inputs = inputs.cuda()
                with torch.no_grad():
                    P = model.forward(inputs).cpu().detach().numpy()
                    preds, t = pwk_ilp(P, A, b, hstruct, args.solver,
                                       model.transformer)
                    val_time += t
                    val_recall += recall(preds, labels)
                    val_setsize += setsize(preds)

            print("Test SVP for setting {0}: recall={1}, |Ÿ|={2}, time={3}s".
                  format(param, val_recall / i, val_setsize / i, val_time / i))