Example #1
0
def main():
    x_train, y_train, x_test, y_test = load_features()
    x_train_conc = concate_x(x_train)
    x_test_conc = concate_x(x_test)

    # log_reg = LogisticRegressionCV(verbose=1, penalty='l2', n_jobs=1)
    # linear_svc = linear_svc_cv(x_train_conc, y_train)
    rand_for = random_forest_cv(x_train_conc, y_train)

    # y_hat = one_vs_rest(x_train_conc, y_train, x_test_conc, log_reg)

    y_hat = one_vs_rest(x_train_conc, y_train, x_test_conc, rand_for)
    # y_hat = one_vs_rest(x_train_conc, y_train, x_test_conc, linear_svc)
    # y_hat = one_for_each_class(x_train_conc, y_train, x_test_conc, rand_for)
    # y_hat = one_for_each_class(x_train_conc, y_train, x_test_conc, linear_svc)

    # np.savetxt('test.csv', y_test, delimiter=',')
    # np.savetxt('hat.csv', y_hat, delimiter=',')

    print('Test accuracy: ', score.accuracy(y_hat, y_test))
    print('F1 score: ', score.f1score(y_hat, y_test))
    print('F1 score by class:')
    score_byclass = score.f1_by_class(y_hat, y_test)
    for c, class_score in enumerate(score_byclass):
        print(c, ':', class_score)
Example #2
0
    def learn(self):
        hypothesis = self.initial_hypothesis()
        current_score = 0.0

        while True:
            next_hypothesis = self.best_rule(hypothesis)
            new_score = accuracy(next_hypothesis)
            getLogger(self._logger).info('RULE LEARNED: %s %s' %
                                         (next_hypothesis, new_score))

            s = significance(next_hypothesis)
            if self._min_significance is not None and s < self._min_significance:
                break
            if new_score > current_score:
                hypothesis = next_hypothesis
                current_score = new_score
            else:
                break
            if self.interrupted:
                break
            if hypothesis.get_literal() and hypothesis.get_literal(
            ).functor == '_recursive':
                break  # can't extend after recursive

        return hypothesis
Example #3
0
def main():
    batch_size = 64
    nb_epoch = 20
    img_size = 256
    gen = read_data.read_data_photo_labels(
            2000, img_size, batch_size = batch_size)
    X_test, Y_test = next(gen)

  # model = get_image_model(img_size)

    with open(JSON_NAME, 'r') as jfile:
        model = models.model_from_json(jfile.read())
    model.load_weights(WEIGHTS_NAME)

  # model.fit_generator(gen, 
  #         samples_per_epoch=10000, nb_epoch=nb_epoch, verbose=1)

    test_pred = np.sign(model.predict(X_test))
    test_loss = model.evaluate(X_test, Y_test)
    np.savetxt('pred.csv', test_pred, delimiter=',')

  # with open(JSON_NAME, 'w') as jfile:
  #     jfile.write(model.to_json())
  # model.save_weights(WEIGHTS_NAME, overwrite=True)

    print('Test loss: ', test_loss)
    print('Test accuracy: ', score.accuracy(test_pred, Y_test))
    print('F1 score: ', score.f1score(test_pred, Y_test))
    print('F1 score by class:')
    score_byclass = score.f1_by_class(test_pred, Y_test)
    for c, s in enumerate(score_byclass):
        print(c, ':', s)
Example #4
0
def main():
    batch_size = 64
    nb_epoch = 20
    img_size = 256
    gen = read_data.read_data_photo_labels(2000,
                                           img_size,
                                           batch_size=batch_size)
    X_test, Y_test = next(gen)

    # model = get_image_model(img_size)

    with open(JSON_NAME, 'r') as jfile:
        model = models.model_from_json(jfile.read())
    model.load_weights(WEIGHTS_NAME)

    # model.fit_generator(gen,
    #         samples_per_epoch=10000, nb_epoch=nb_epoch, verbose=1)

    test_pred = np.sign(model.predict(X_test))
    test_loss = model.evaluate(X_test, Y_test)
    np.savetxt('pred.csv', test_pred, delimiter=',')

    # with open(JSON_NAME, 'w') as jfile:
    #     jfile.write(model.to_json())
    # model.save_weights(WEIGHTS_NAME, overwrite=True)

    print('Test loss: ', test_loss)
    print('Test accuracy: ', score.accuracy(test_pred, Y_test))
    print('F1 score: ', score.f1score(test_pred, Y_test))
    print('F1 score by class:')
    score_byclass = score.f1_by_class(test_pred, Y_test)
    for c, s in enumerate(score_byclass):
        print(c, ':', s)
def test_accuracy_for_deliverable1b():
    expected = 0.729
    actual = accuracy(KEYFILES[ENGLISH], DELIVERABLE1b)
    assert_almost_equals(
        expected,
        actual,
        places=3,
        msg="Accuracy Incorrect for 1b: Expected %f, Actual %f" %
        (expected, actual))
def test_accuracy_for_deliverable2f():
    expected = {
        GERMAN: 0.612,
        SPANISH: 0.662,
        ITALIAN: 0.657,
        FRENCH: 0.582,
        PORTO: 0.624
    }
    SUFFIX = "deliverable2f.conll"
    lang, filename = getForeignLanguage(DIR, SUFFIX)
    actual = accuracy(KEYFILES[lang], os.path.join(DIR, filename))
    ok_(expected[lang] <= (actual + 0.002),
        msg="Accuracy Incorrect for 2f: Expected %f, Actual %f" %
        (expected[lang], actual))
def test_accuracy_for_deliverable2c():
    expected = {
        GERMAN: 0.432,
        SPANISH: 0.365,
        ITALIAN: 0.311,
        FRENCH: 0.372,
        PORTO: 0.305
    }
    SUFFIX = "deliverable2c.conll"
    lang, filename = getForeignLanguage(DIR, SUFFIX)
    actual = accuracy(KEYFILES[lang], os.path.join(DIR, filename))
    ok_(expected[lang] <= (actual + 0.002),
        msg="Accuracy Incorrect for 2c: Expected %f, Actual %f" %
        (expected[lang], actual))  # adding some tolerance.
Example #8
0
def scoreModel():
    try:
        testX = np.array(pd.read_csv('../data/testX.csv'))
        testY = np.array(pd.read_csv('../data/testY.csv'), dtype=int)
    except:
        print("No valid datasets were found")
    try:
        (unigramSrc, bigramSrc, trigramSrc, unigramTgt, bigramTgt, trigramTgt,
         unigramSrcPos, bigramSrcPos, trigramSrcPos, unigramTgtPos,
         bigramTgtPos, trigramTgtPos) = corpf.loadNLP()
    except:
        print('No ngram models were found, making new ones...')
        (unigramSrc, bigramSrc, trigramSrc, unigramTgt, bigramTgt, trigramTgt,
         unigramSrcPos, bigramSrcPos, trigramSrcPos, unigramTgtPos,
         bigramTgtPos, trigramTgtPos) = corpf.getNgramModels()
    method = input(
        'What model would you like to use \nSee README for available options: '
    )
    if method == 'SVM':
        svm = lc.loadmodel('../data/svm.joblib')
        scores = lc.classCLF(svm, testX)
    elif method == 'LR':
        lr = lc.loadmodel('../data/lr.joblib')
        scores = lc.classCLF(lr, testX)
    elif method == 'MLP':
        mlp = lc.loadmodel('../data/mlp.joblib')
        scores = lc.classCLF(mlp, testX)
    elif method == 'NBC':
        NBC = lc.loadmodel('../data/NBC.joblib')
        scores = lc.NBC.classnb(NBC, testX)
    else:
        print(
            'No valid method was given, please see the README for instrucions')
    fscore = sc.fscore(scores[0], testY)
    print('F1score:', fscore)
    accuracy = sc.accuracy(scores[0], testY)
    print('Accuracy:', accuracy)
    crossent = []
    for i in range(len(testY)):
        correctEst = 0
        if scores[0][i] == testY[i]:
            correctEst = 1
        crossent.append(sc.crossEntropy(scores[1][i], correctEst))
    print('CrossEnt:', np.mean(crossent))
    return fscore, method, scores, crossent
Example #9
0
def scoreSentences():
    print("Warning this takes quite a while")
    try:
        cleandf = pd.read_csv('../data/cleandf.csv', dtype=object)
    except:
        fullDf = pd.read_csv("../data/en-nl.tsv", sep="\t")
        cleandf = cd.cleandata(fullDf)
        cleandf.to_csv('../data/cleandf.csv')
    try:
        (unigramSrc, bigramSrc, trigramSrc, unigramTgt, bigramTgt, trigramTgt,
         unigramSrcPos, bigramSrcPos, trigramSrcPos, unigramTgtPos,
         bigramTgtPos, trigramTgtPos) = corpf.loadNLP()
    except:
        print('No ngram models were found, making new ones...')
        (unigramSrc, bigramSrc, trigramSrc, unigramTgt, bigramTgt, trigramTgt,
         unigramSrcPos, bigramSrcPos, trigramSrcPos, unigramTgtPos,
         bigramTgtPos, trigramTgtPos) = corpf.getNgramModels()
    try:
        testX = np.array(pd.read_csv('../data/testX.csv'))
        testY = np.array(pd.read_csv('../data/testY.csv'), dtype=int)
    except:
        print("No valid datasets were found")

    scores = scoreder(cleandf, testX)
    fscore = sc.fscore(scores[0], testY)
    print('F1-score:', fscore)
    accuracy = sc.accuracy(scores[0], testY)
    print('Accuracy', accuracy)
    crossent = []
    for i in range(len(testY)):
        correctEst = 0
        if scores[0][i] == testY[i]:
            correctEst = 1
        crossent.append(sc.crossEntropy(scores[1][i], correctEst))
    print('Cross Entropy:', np.mean(crossent))
    return
Example #10
0
        x_whole[idx] /= len(photo_ids)
        y_whole[idx] = label

    train_frac = 0.9
    x_train, x_test = np.vsplit(x_whole, [int(num_biz * train_frac)])
    y_train, y_test = np.vsplit(y_whole, [int(num_biz * train_frac)])

    print('X_train shape:', x_train.shape)
    print('Y_train shape:', y_train.shape)
    print('Training on %s biz, testing on %s biz' % \
            (x_train.shape[0], x_test.shape[0]))

    model.fit(x_train,
              y_train,
              batch_size=batch_size,
              nb_epoch=nb_epoch,
              verbose=1,
              validation_split=0.0)

    test_pred = np.sign(model.predict(x_test))
    test_loss = model.evaluate(x_test, y_test)
    np.savetxt('pred.csv', test_pred, delimiter=',')

    print('Test loss: ', test_loss)
    print('Test accuracy: ', score.accuracy(test_pred, y_test))
    print('F1 score: ', score.f1score(test_pred, y_test))
    print('F1 score by class:')
    score_byclass = score.f1_by_class(test_pred, y_test)
    for c, score in enumerate(score_byclass):
        print(c, ':', score)
Example #11
0
     loss2 += link_constraints2
     optimizer1.zero_grad()  # clear gradients for next train
     loss1.backward()  #  backpropagation, compute gradients
     optimizer1.step()  # apply gradients
     optimizer2.zero_grad()  # clear gradients for next train
     loss2.backward()  #  backpropagation, compute gradients
     optimizer2.step()  # apply gradients
     if step % 1 == 0:
         if Args.cuda:
             pred1 = torch.max(output1, 1)[1].cuda().data.squeeze()
             pred2 = torch.max(output2, 1)[1].cuda().data.squeeze()
         else:
             pred1 = torch.max(output1, 1)[1].data.squeeze()
             pred2 = torch.max(output2, 1)[1].data.squeeze()
         # evaluate
         accuracy1 = score.accuracy(pred1, y)
         accuracy2 = score.accuracy(pred2, y)
         F1_1 = score.F1(pred1, y)
         F1_2 = score.F1(pred2, y)
         print('Epoch: %s |step: %s | accuracy1: %.2f | F1: %.4f | accuracy2: %.2f | F1: %.4f |' %(epoch, step, accuracy1, F1_1, accuracy2, F1_2))
 #%% Testing
 all_y = []
 all_pred1 = []
 all_pred2 = []
 for step, (x, y) in enumerate(valid_loader):
     y = torch.squeeze(y)  # delete a axis
     if Args.cuda:
         x, y = x.cuda(), y.cuda()
     diagnosis1.eval() # test model
     diagnosis2.eval()
     output1, _ = diagnosis1(x)
Example #12
0
def probfoil(**kwargs):
    args = kwargs

    if 'seed' in args:
        seed = args['seed']
    else:
        seed = str(random.random())
        args['seed'] = seed
    random.seed(seed)

    logger = 'probfoil'

    if 'log' not in args:
        args['log'] = None
        logfile = None
    else:
        logfile = open(args['log'], 'w')

    if 'verbose' not in args:
        args['verbose'] = 0

    if 'm' not in args:
        args['m'] = 1

    if 'beam_size' not in args:
        args['beam_size'] = 5

    if 'p' not in args:
        args['p'] = None

    if 'l' not in args:
        args['l'] = None

    if 'target' not in args:
        args['target'] = None

    if 'symmetry_breaking' not in args:
        args['symmetry_breaking'] = True

    if 'settings' in args:
        settings = args['settings']
        del args['settings']
    else:
        settings = None

    if 'train' in args:
        train = args['train']
        del args['train']
    else:
        train = None

    if 'test' in args:
        test = args['test']
        del args['test']
    else:
        test = None

    #settings = args['settings']
    #train = args['train']

    log = init_logger(verbose=args['verbose'], name=logger, out=logfile)

    log.info('Random seed: %s' % seed)

    # Load input files
    #data = DataFile(*(PrologFile(source) for source in args['files']))
    data = DataFile(*(PrologString(source) for source in [settings, train]))

    if 'probfoil1' in args:
        learn_class = ProbFOIL
    else:
        learn_class = ProbFOIL2

    time_start = time.time()
    learn = learn_class(data,
                        logger=logger,
                        seed=seed,
                        log=args['log'],
                        verbose=args['verbose'],
                        m=args['m'],
                        beam_size=args['beam_size'],
                        p=args['p'],
                        l=args['l'])

    hypothesis = learn.learn()
    time_total = time.time() - time_start

    # Store scores
    train_accuracy = accuracy(hypothesis)
    train_precision = precision(hypothesis)
    train_recall = recall(hypothesis)

    # Load test data
    if test != None:
        test_data = DataFile(*(PrologString(source)
                               for source in [settings, test]))
        test = learn_class(test_data,
                           logger=logger,
                           seed=seed,
                           log=args['log'],
                           verbose=args['verbose'],
                           m=args['m'],
                           beam_size=args['beam_size'],
                           p=args['p'],
                           l=args['l'])
        test_hypothesis = test.test_rule(hypothesis)

        # Store scores
        test_accuracy = accuracy(test_hypothesis)
        test_precision = precision(test_hypothesis)
        test_recall = recall(test_hypothesis)

    print('================ SETTINGS ================')
    #for kv in vars(args).items():
    for kv in args.items():
        print('%20s:\t%s' % kv)

    if learn.interrupted:
        print('================ PARTIAL THEORY ================')
    else:
        print('================= FINAL THEORY =================')
    rule = hypothesis
    rules = rule.to_clauses(rule.target.functor)

    # First rule is failing rule: don't print it if there are other rules.
    if len(rules) > 1:
        for rule in rules[1:]:
            print(rule)
    else:
        print(rules[0])

    print('==================== SCORES ====================')
    print('            Train Set')
    print('             Accuracy:\t', train_accuracy)
    print('            Precision:\t', train_precision)
    print('               Recall:\t', train_recall)
    if test != None:
        print('             Test Set')
        print('             Accuracy:\t', test_accuracy)
        print('            Precision:\t', test_precision)
        print('               Recall:\t', test_recall)
    print('================== STATISTICS ==================')
    for name, value in learn.statistics():
        print('%20s:\t%s' % (name, value))
    print('          Total time:\t%.4fs' % time_total)

    if logfile:
        logfile.close()


#def main(argv=sys.argv[1:]):
#    args = argparser().parse_args(argv)
#
#    if args.seed:
#        seed = args.seed
#    else:
#        seed = str(random.random())
#    random.seed(seed)
#
#    logger = 'probfoil'
#
#    if args.log is None:
#        logfile = None
#    else:
#        logfile = open(args.log, 'w')
#
#    log = init_logger(verbose=args.verbose, name=logger, out=logfile)
#
#    log.info('Random seed: %s' % seed)
#
#    # Load input files
#    data = DataFile(*(PrologFile(source) for source in args.files))
#
#    if args.probfoil1:
#        learn_class = ProbFOIL
#    else:
#        learn_class = ProbFOIL2
#
#    time_start = time.time()
#    learn = learn_class(data, logger=logger, **vars(args))
#
#    hypothesis = learn.learn()
#    time_total = time.time() - time_start
#
#    print ('================ SETTINGS ================')
#    for kv in vars(args).items():
#        print('%20s:\t%s' % kv)
#
#    if learn.interrupted:
#        print('================ PARTIAL THEORY ================')
#    else:
#        print('================= FINAL THEORY =================')
#    rule = hypothesis
#    rules = rule.to_clauses(rule.target.functor)
#
#    # First rule is failing rule: don't print it if there are other rules.
#    if len(rules) > 1:
#        for rule in rules[1:]:
#            print (rule)
#    else:
#        print (rules[0])
#    print ('==================== SCORES ====================')
#    print ('            Accuracy:\t', accuracy(hypothesis))
#    print ('           Precision:\t', precision(hypothesis))
#    print ('              Recall:\t', recall(hypothesis))
#    print ('================== STATISTICS ==================')
#    for name, value in learn.statistics():
#        print ('%20s:\t%s' % (name, value))
#    print ('          Total time:\t%.4fs' % time_total)
#
#    if logfile:
#        logfile.close()
#
#def argparser():
#    parser = argparse.ArgumentParser()
#    parser.add_argument('files', nargs='+')
#    parser.add_argument('-1', '--det-rules', action='store_true', dest='probfoil1',
#                        help='learn deterministic rules')
#    parser.add_argument('-m', help='parameter m for m-estimate', type=float,
#                        default=argparse.SUPPRESS)
#    parser.add_argument('-b', '--beam-size', type=int, default=5,
#                        help='size of beam for beam search')
#    parser.add_argument('-p', '--significance', type=float, default=None,
#                        help='rule significance threshold', dest='p')
#    parser.add_argument('-l', '--length', dest='l', type=int, default=None,
#                        help='maximum rule length')
#    parser.add_argument('-v', action='count', dest='verbose', default=None,
#                        help='increase verbosity (repeat for more)')
#    parser.add_argument('--symmetry-breaking', action='store_true',
#                        help='avoid symmetries in refinement operator')
#    parser.add_argument('--target', '-t', type=str,
#                        help='specify predicate/arity to learn (overrides settings file)')
#    parser.add_argument('-s', '--seed', help='random seed', default=None)
#    parser.add_argument('--log', help='write log to file', default=None)
#
#    return parser
#
#
#if __name__ == '__main__':
#    main()
Example #13
0
    for idx, (biz_id, (label, photo_ids)) in enumerate(biz_csv.items()):
        for photo_id in photo_ids:
            image_idx = image_label_order[str(photo_id)]
            x_whole[idx] += image_labels[image_idx]
        x_whole[idx] /= len(photo_ids)
        y_whole[idx] = label

    train_frac = 0.9
    x_train, x_test = np.vsplit(x_whole, [int(num_biz*train_frac)])
    y_train, y_test = np.vsplit(y_whole, [int(num_biz*train_frac)])

    print('X_train shape:', x_train.shape)
    print('Y_train shape:', y_train.shape)
    print('Training on %s biz, testing on %s biz' % \
            (x_train.shape[0], x_test.shape[0]))

    model.fit(x_train, y_train, batch_size=batch_size,
            nb_epoch=nb_epoch, verbose=1, validation_split=0.0)

    test_pred = np.sign(model.predict(x_test))
    test_loss = model.evaluate(x_test, y_test)
    np.savetxt('pred.csv', test_pred, delimiter=',')

    print('Test loss: ', test_loss)
    print('Test accuracy: ', score.accuracy(test_pred, y_test))
    print('F1 score: ', score.f1score(test_pred, y_test))
    print('F1 score by class:')
    score_byclass = score.f1_by_class(test_pred, y_test)
    for c, score in enumerate(score_byclass):
        print(c, ':', score)
def test_accuracy_for_deliverable2f ():
    expected = {GERMAN: 0.612, SPANISH: 0.662, ITALIAN: 0.657, FRENCH: 0.582, PORTO: 0.624}
    SUFFIX = "deliverable2f.conll"
    lang, filename = getForeignLanguage (DIR, SUFFIX)
    actual   = accuracy (KEYFILES[lang], os.path.join (DIR, filename))
    ok_ (expected[lang]<= (actual + 0.002), msg="Accuracy Incorrect for 2f: Expected %f, Actual %f" %(expected[lang], actual))
def test_accuracy_for_deliverable2c ():
    expected = {GERMAN: 0.432, SPANISH:0.365 , ITALIAN: 0.311, FRENCH: 0.372, PORTO: 0.305}
    SUFFIX = "deliverable2c.conll"
    lang, filename = getForeignLanguage (DIR, SUFFIX)
    actual   = accuracy (KEYFILES[lang], os.path.join (DIR, filename))
    ok_ (expected[lang] <= (actual + 0.002), msg="Accuracy Incorrect for 2c: Expected %f, Actual %f" %(expected[lang], actual)) # adding some tolerance.
def test_accuracy_for_deliverable1c ():
    expected = 0.82
    actual   = accuracy (KEYFILES[ENGLISH], DELIVERABLE1c)
    ok_(expected < (actual + 0.002), msg="Accuracy is lesser than expected for 1c: Expected %f, Actual %f" %(expected, actual))
def test_accuracy_for_deliverable1b ():
    expected = 0.729 
    actual   = accuracy (KEYFILES[ENGLISH], DELIVERABLE1b)
    assert_almost_equals (expected, actual, places=3, msg="Accuracy Incorrect for 1b: Expected %f, Actual %f" %(expected, actual))
Example #18
0
     y = torch.squeeze(y)  # delete a axis
     if Args.cuda:
         x, y = x.cuda(), y.cuda()
     diagnosis.train()  # train model
     output = diagnosis(x)
     loss = loss_func(output, y)  # loss
     optimizer.zero_grad()  # clear gradients for next train
     loss.backward()  #  backpropagation, compute gradients
     optimizer.step()  # apply gradients
     if step % 1 == 0:
         if Args.cuda:
             pred = torch.max(output, 1)[1].cuda().data.squeeze()
         else:
             pred = torch.max(output, 1)[1].data.squeeze()
         # evaluate
         accuracy = score.accuracy(pred, y)
         F1 = score.F1(pred, y)
         print(
             'Epoch: %s |step: %s | train loss: %.2f | accuracy: %.2f | F1: %.4f'
             % (epoch, step, loss.data, accuracy, F1))
 #%% Testing
 all_y = []
 all_pred = []
 for step, (x, y) in enumerate(valid_loader):
     y = torch.squeeze(y)  # delete a axis
     if Args.cuda:
         x, y = x.cuda(), y.cuda()
     diagnosis.eval()  # test model
     output = diagnosis(x)
     if Args.cuda:
         pred = torch.max(output, 1)[1].cuda().data.squeeze()
def test_accuracy_for_deliverable1c():
    expected = 0.82
    actual = accuracy(KEYFILES[ENGLISH], DELIVERABLE1c)
    ok_(expected < (actual + 0.002),
        msg="Accuracy is lesser than expected for 1c: Expected %f, Actual %f" %
        (expected, actual))