Example #1
0
def evalAlgo(data, algo, number_of_folds, *args):

    folds = crossValidator(data, number_of_folds)
    scores = list()
    fs = list()

    for fold in folds:
        trainingSet = list(folds)
        trainingSet.remove(fold)
        trainingSet = sum(trainingSet, [])
        testingSet = list()

        for row in fold:
            copyRow = list(row)
            testingSet.append(copyRow)
            copyRow[-1] = None

        predicted = algo(trainingSet, testingSet, *args)
        real = [i[-1] for i in fold]
        accuracy = measureAccuracy(real, predicted)
        scores.append(accuracy)

        fs.append(fscore.fscore(real, predicted))
    print("F Score is: ", sum(fs) / len(fs))
    return scores
Example #2
0
def calc_cd(output, gt, calc_f1=False):
    cham_loss = dist_chamfer_3D.chamfer_3DDist()
    dist1, dist2, _, _ = cham_loss(gt, output)
    cd_p = (torch.sqrt(dist1).mean(1) + torch.sqrt(dist2).mean(1)) / 2
    cd_t = (dist1.mean(1) + dist2.mean(1))
    if calc_f1:
        f1, _, _ = fscore(dist1, dist2)
        return cd_p, cd_t, f1
    else:
        return cd_p, cd_t
def list_of_fscores(p, y, outputs):
    fscores = [fscore()]*int(outputs)
    for i in range(outputs):
        for j in range(y.shape[0]):
            #positive
            if p[j] == (i+1):
                #true
                if p[j] == y[j]:
                    fscores[i].true_pos += 1
                #false
                else:
                    fscores[i].false_pos += 1
            #negative
            elif p[j] != (i+1):
                #true
                if p[j] == y[j]:
                    fscores[i].true_neg += 1
                else:
                    fscores[i].false_neg += 1
    return fscores
Example #4
0
def test_chamfer(distChamfer, dim):
    points1 = torch.rand(4, 100, dim).cuda()
    points2 = torch.rand(4, 200, dim, requires_grad=True).cuda()
    dist1, dist2, idx1, idx2= distChamfer(points1, points2)

    loss = torch.sum(dist1)
    loss.backward()

    mydist1, mydist2, myidx1, myidx2 = chamfer_python.distChamfer(points1, points2)
    d1 = (dist1 - mydist1) ** 2
    d2 = (dist2 - mydist2) ** 2
    assert (
        torch.mean(d1) + torch.mean(d2) < 0.00000001
    ), "chamfer cuda and chamfer normal are not giving the same results"

    xd1 = idx1 - myidx1
    xd2 = idx2 - myidx2
    assert (
            torch.norm(xd1.float()) + torch.norm(xd2.float()) == 0
    ), "chamfer cuda and chamfer normal are not giving the same results"
    print(f"fscore :", fscore(dist1, dist2))
    print("Unit test passed")
Example #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('tags', metavar='tag', nargs='+')
    parser.add_argument(
        '--recordfile',
        default='test.tfrecords',
        help=
        'tfrecords file with the users to test on (default: test.tfrecords)')
    parser.add_argument(
        '--mc-trials',
        type=int,
        default=800,
        help=
        'Number of rounds of monte carlo sim to perform per product/threshold (default:800)'
    )
    parser.add_argument('--exhaustive',
                        action='store_true',
                        help='Exhaustively search candidate thresholds')
    parser.add_argument(
        '-n',
        '--n-users',
        type=int,
        help='Limit number of users tested on (default: no limit)')
    args = parser.parse_args()

    t0 = time.time()

    assert len(args.tags) == 1
    tag = args.tags[0]
    print "Loading pdict"
    pmap = helpers.pdict_for_tag(tag, args.recordfile)
    predictor = pred.HybridThresholdPredictor(pmap, ntrials=args.mc_trials)
    user_iterator = iterate_wrapped_users(args.recordfile)

    dfs = []
    print "Crunching data"
    for i, user in enumerate(user_iterator):
        if args.n_users and i >= args.n_users:
            break
        user_dat = []
        actual_set = user.last_order_predictable_prods()
        pid_to_prob = pmap[user.uid]
        items = pid_to_prob.items()
        # Sort on probability
        items.sort(key=lambda i: i[1])
        pids = [i[0] for i in items]
        probs = [i[1] for i in items]
        probs = np.array(probs)
        p_none = np.product(1 - probs)
        actual_arr = np.zeros(len(pids))
        for i, pid in enumerate(pids):
            if pid in actual_set:
                actual_arr[i] = 1

        cands = predictor.get_candidate_thresholds(probs,
                                                   exhaustive=args.exhaustive)
        for cand in cands:
            if args.exhaustive:
                thresh, was_cand = cand
            else:
                thresh = cand
            e_f = predictor.evaluate_threshold(thresh, probs)
            predicted = (probs >= thresh).astype(np.int8)
            predict_none = p_none > thresh or predicted.sum() == 0
            actual_fs = fscore_helpers.fscore(predicted, actual_arr,
                                              predict_none)
            row = [thresh, e_f, actual_fs]
            if args.exhaustive:
                row.append(was_cand)
            user_dat.append(row)

        cols = ['thresh', 'e_f', 'fscore']
        if args.exhaustive:
            cols.append('cand')
        df = pd.DataFrame(user_dat, columns=cols)
        dfs.append(df)

    print "Saving"
    with open('threshdat.pickle', 'w') as f:
        pickle.dump(dfs, f)

    t1 = time.time()
    print "Finished poking in {:.1f}s".format(t1 - t0)
    return dfs
Example #6
0
clf = RandomForestClassifier(class_weight='balanced', n_estimators=500, n_jobs=-1)

# plot learning curves
cv = ShuffleSplit(X.shape[0], n_iter=40,
                  test_size=0.2, random_state=0)
title = "Learning Curves (Random Forests)"
plot_learning_curve(clf, title, X, y, ylim=(0.7, 1.01), cv=cv, n_jobs=1)

# train the model
clf.fit(X_train, y_train)

# predict the test classes
y_score = clf.predict_proba(X_test)

# fscore plot
fscore(y_test, y_score)

# saving the classifier
with open('classifier.pickle', 'wb') as handle:
    pickle.dump(clf, handle)

# saving the standard scaler
with open('scaler.pickle', 'wb') as handle:
    pickle.dump(std, handle)

# saving the polynomial transformer
with open('polynomial-transformer.pickle', 'wb') as handle:
    pickle.dump(poly, handle)

# saving the variance feature selector
with open('variance-threshholder.pickle', 'wb') as handle:
Example #7
0
def test(args):
    model_dir = args.model_dir
    log_test = LogString(open(os.path.join(model_dir, 'log_text.txt'), 'w'))
    dataset_test = ShapeNetH5(train=False,
                              npoints=args.num_points,
                              use_mean_feature=args.use_mean_feature)
    dataloader_test = torch.utils.data.DataLoader(dataset_test,
                                                  batch_size=args.batch_size,
                                                  shuffle=False,
                                                  num_workers=int(
                                                      args.workers))
    dataset_length = len(dataset_test)
    epochs = [
        'best_emd_network.pth', 'best_cd_p_network.pth',
        'best_cd_t_network.pth'
    ]
    for epoch in epochs:
        load_path = os.path.join(args.model_dir, epoch)
        log_test.log_string(load_path)
        if args.model_name == 'ecg':
            net = ECG(num_coarse=1024, num_fine=args.num_points)
        else:
            raise NotImplementedError

        load_model(args, net, None, log_test, train=False)
        net.cuda()
        net.eval()
        log_test.log_string("Testing...")
        pcd_file = h5py.File(
            os.path.join(args.model_dir, '%s_pcds.h5' % epoch.split('.')[0]),
            'w')
        pcd_file.create_dataset('output_pcds', (1200, args.num_points, 3))
        test_loss_cd_p = AverageValueMeter()
        test_loss_cd_t = AverageValueMeter()
        test_loss_emd = AverageValueMeter()
        test_f1_score = AverageValueMeter()
        test_loss_cat = torch.zeros([8, 4], dtype=torch.float32).cuda()
        cat_num = torch.ones([8, 1], dtype=torch.float32).cuda() * 150
        with torch.no_grad():
            for i, data in enumerate(dataloader_test):
                if args.use_mean_feature == 0:
                    label, inputs, gt = data
                    mean_feature = None
                else:
                    label, inputs, gt, mean_feature = data
                    mean_feature = mean_feature.float().cuda()

                inputs = inputs.float().cuda()
                gt = gt.float().cuda()
                inputs = inputs.transpose(2, 1).contiguous()

                if args.model_name == 'ecg':
                    _, output = net(inputs)
                else:
                    raise NotImplementedError

                # save pcd
                pcd_file['output_pcds'][args.batch_size * i:args.batch_size *
                                        (i + 1), :, :] = output.cpu().numpy()

                # EMD
                dist, _ = EMD(output, gt, 0.004, 3000)
                emd = torch.sqrt(dist).mean(1)

                # CD
                dist1, dist2, _, _ = chamLoss(gt, output)
                cd_p = (torch.sqrt(dist1).mean(1) +
                        torch.sqrt(dist2).mean(1)) / 2
                cd_t = dist1.mean(1) + dist2.mean(1)

                # f1
                f1, _, _ = fscore(dist1, dist2)

                test_loss_cd_p.update(cd_p.mean().item())
                test_loss_cd_t.update(cd_t.mean().item())
                test_loss_emd.update(emd.mean().item())
                test_f1_score.update(f1.mean().item())

                for j, l in enumerate(label):
                    test_loss_cat[int(l), 0] += cd_p[int(j)]
                    test_loss_cat[int(l), 1] += cd_t[int(j)]
                    test_loss_cat[int(l), 2] += emd[int(j)]
                    test_loss_cat[int(l), 3] += f1[int(j)]

                if i % 100 == 0:
                    log_test.log_string('test [%d/%d]' %
                                        (i, dataset_length / args.batch_size))

            # Per cat loss:
            for i in range(8):
                log_test.log_string('CD_p: %f, CD_t: %f, EMD: %f F1: %f' %
                                    (test_loss_cat[i, 0] / cat_num[i] * 10000,
                                     test_loss_cat[i, 1] / cat_num[i] * 10000,
                                     test_loss_cat[i, 2] / cat_num[i] * 10000,
                                     test_loss_cat[i, 3] / cat_num[i]))

            log_test.log_string('Overview results:')
            log_test.log_string('CD_p: %f, CD_t: %f, EMD: %f F1: %f' %
                                (test_loss_cd_p.avg, test_loss_cd_t.avg,
                                 test_loss_emd.avg, test_f1_score.avg))
    pcd_file.close()
    log_test.close()
Example #8
0
                             n_estimators=500,
                             n_jobs=-1)

# plot learning curves
cv = ShuffleSplit(X.shape[0], n_iter=40, test_size=0.2, random_state=0)
title = "Learning Curves (Random Forests)"
plot_learning_curve(clf, title, X, y, ylim=(0.7, 1.01), cv=cv, n_jobs=1)

# train the model
clf.fit(X_train, y_train)

# predict the test classes
y_score = clf.predict_proba(X_test)

# fscore plot
fscore(y_test, y_score)

# saving the classifier
with open('classifier.pickle', 'wb') as handle:
    pickle.dump(clf, handle)

# saving the standard scaler
with open('scaler.pickle', 'wb') as handle:
    pickle.dump(std, handle)

# saving the polynomial transformer
with open('polynomial-transformer.pickle', 'wb') as handle:
    pickle.dump(poly, handle)

# saving the variance feature selector
with open('variance-threshholder.pickle', 'wb') as handle:
Example #9
0
from fafficher import fafficher as faff
from fproposition import fproposition as fprop
from fscore import fscore

choix = "oui"
suite = 0

while choix != "non":
    print("Nouveau Mot !!! ")
    #initialisation
    vie = 8
    L = []
    mot, mot_caché = ft()

    #suite
    while mot_caché != mot and vie > 0:
        faff(mot, mot_caché, vie)
        var = str(input("Proposition : "))
        mot_caché, vie = fprop(mot, mot_caché, vie, L, var)
    #fin
    if vie == 0:
        print("Vie restantes : 0")
        print("Game Over :)")

    if vie == 8:
        suite += 1
        fscore(suite)

    choix = str(input("voulez-vous rejouer (oui/non) :"))
    print("\n")
Example #10
0
def blender(X_train, y_train, X_test, y_test):
    """

    :param X_train: training set
    :param y_train: output vector for training set
    :param X_test: test set
    :param y_test: output vector for test set
    :return: classification accracy score
    """

    # our level 0 classifiers

    clfs = [
        RandomForestClassifier(n_estimators=50, criterion='gini', n_jobs=-1),
        ExtraTreesClassifier(n_estimators=100, criterion='gini', n_jobs=-1),
        RandomForestClassifier(n_estimators=100,
                               criterion='entropy',
                               n_jobs=-1),
        ExtraTreesClassifier(n_estimators=50, criterion='entropy', n_jobs=-1),
        GradientBoostingClassifier(learning_rate=0.05)
    ]

    # Ready for cross validation

    n_folds = 5
    skf = list(StratifiedKFold(y_train, n_folds))

    blend_train = np.zeros(
        (X_train.shape[0],
         len(clfs)))  # Number of training data x Number of classifiers
    blend_test = np.zeros(
        (X_test.shape[0],
         len(clfs)))  # Number of testing data x Number of classifiers

    print('X_test.shape = %s' % (str(X_test.shape)))
    print('blend_train.shape = %s' % (str(blend_train.shape)))
    print('blend_test.shape = %s' % (str(blend_test.shape)))

    # for each classfier, we train the number of fold times(= len(skf))
    for j, clf in enumerate(clfs):

        print('Training classifier [%s]' % (j))
        blend_test_j = np.zeros(
            (X_test.shape[0],
             len(skf)))  # Number of testing data x Number of folds ,
        # we will take the mean of the predictions later
        for i, (train_index, cv_index) in enumerate(skf):
            print('Fold [%s]' % (i))
            # This is the training and validation set
            X_blend_train = X_train[train_index]
            y_blend_train = y_train[train_index]
            X_blend_cv = X_train[cv_index]
            y_blend_cv = y_train[cv_index]

            clf.fit(X_blend_train, y_blend_train)
            # This output will be the basis for our blended classifier to train against,
            # which is also the output of our classifiers
            blend_train[cv_index, j] = clf.predict(X_blend_cv)
            blend_test_j[:, i] = clf.predict(X_test)

        # Take the mean of the predictions of the cross validation set
        blend_test[:, j] = blend_test_j.mean(1)

    # blending the results using logistic regression
    bclf = LogisticRegression()
    bclf.fit(blend_train, y_train)

    # prediction
    y_test_predict = bclf.predict(blend_test)
    score = metrics.accuracy_score(y_test, y_test_predict)
    print('Accuracy = %s' % (score))

    fscore(bclf, blend_test, y_test)

    return score
Example #11
0
def blender(X_train, y_train, X_test, y_test):
    """

    :param X_train: training set
    :param y_train: output vector for training set
    :param X_test: test set
    :param y_test: output vector for test set
    :return: classification accracy score
    """

    # our level 0 classifiers

    clfs = [
        RandomForestClassifier(n_estimators=50, criterion='gini', n_jobs=-1),
        ExtraTreesClassifier(n_estimators=100, criterion='gini', n_jobs=-1),
        RandomForestClassifier(n_estimators=100, criterion='entropy', n_jobs=-1),
        ExtraTreesClassifier(n_estimators=50, criterion='entropy', n_jobs=-1),
        GradientBoostingClassifier(learning_rate=0.05)
    ]

    # Ready for cross validation

    n_folds = 5
    skf = list(StratifiedKFold(y_train, n_folds))

    blend_train = np.zeros((X_train.shape[0], len(clfs)))  # Number of training data x Number of classifiers
    blend_test = np.zeros((X_test.shape[0], len(clfs)))  # Number of testing data x Number of classifiers

    print('X_test.shape = %s' % (str(X_test.shape)))
    print('blend_train.shape = %s' % (str(blend_train.shape)))
    print('blend_test.shape = %s' % (str(blend_test.shape)))

    # for each classfier, we train the number of fold times(= len(skf)) 
    for j, clf in enumerate(clfs):

        print('Training classifier [%s]' % (j))
        blend_test_j = np.zeros((X_test.shape[0], len(skf)))  # Number of testing data x Number of folds ,
        # we will take the mean of the predictions later
        for i, (train_index, cv_index) in enumerate(skf):
            print('Fold [%s]' % (i))
            # This is the training and validation set
            X_blend_train = X_train[train_index]
            y_blend_train = y_train[train_index]
            X_blend_cv = X_train[cv_index]
            y_blend_cv = y_train[cv_index]

            clf.fit(X_blend_train, y_blend_train)
            # This output will be the basis for our blended classifier to train against,
            # which is also the output of our classifiers
            blend_train[cv_index, j] = clf.predict(X_blend_cv)
            blend_test_j[:, i] = clf.predict(X_test)

        # Take the mean of the predictions of the cross validation set
        blend_test[:, j] = blend_test_j.mean(1)

    # blending the results using logistic regression
    bclf = LogisticRegression()
    bclf.fit(blend_train, y_train)

    # prediction
    y_test_predict = bclf.predict(blend_test)
    score = metrics.accuracy_score(y_test, y_test_predict)
    print('Accuracy = %s' % (score))

    fscore(bclf, blend_test, y_test)

    return score