def evalAlgo(data, algo, number_of_folds, *args): folds = crossValidator(data, number_of_folds) scores = list() fs = list() for fold in folds: trainingSet = list(folds) trainingSet.remove(fold) trainingSet = sum(trainingSet, []) testingSet = list() for row in fold: copyRow = list(row) testingSet.append(copyRow) copyRow[-1] = None predicted = algo(trainingSet, testingSet, *args) real = [i[-1] for i in fold] accuracy = measureAccuracy(real, predicted) scores.append(accuracy) fs.append(fscore.fscore(real, predicted)) print("F Score is: ", sum(fs) / len(fs)) return scores
def calc_cd(output, gt, calc_f1=False): cham_loss = dist_chamfer_3D.chamfer_3DDist() dist1, dist2, _, _ = cham_loss(gt, output) cd_p = (torch.sqrt(dist1).mean(1) + torch.sqrt(dist2).mean(1)) / 2 cd_t = (dist1.mean(1) + dist2.mean(1)) if calc_f1: f1, _, _ = fscore(dist1, dist2) return cd_p, cd_t, f1 else: return cd_p, cd_t
def list_of_fscores(p, y, outputs): fscores = [fscore()]*int(outputs) for i in range(outputs): for j in range(y.shape[0]): #positive if p[j] == (i+1): #true if p[j] == y[j]: fscores[i].true_pos += 1 #false else: fscores[i].false_pos += 1 #negative elif p[j] != (i+1): #true if p[j] == y[j]: fscores[i].true_neg += 1 else: fscores[i].false_neg += 1 return fscores
def test_chamfer(distChamfer, dim): points1 = torch.rand(4, 100, dim).cuda() points2 = torch.rand(4, 200, dim, requires_grad=True).cuda() dist1, dist2, idx1, idx2= distChamfer(points1, points2) loss = torch.sum(dist1) loss.backward() mydist1, mydist2, myidx1, myidx2 = chamfer_python.distChamfer(points1, points2) d1 = (dist1 - mydist1) ** 2 d2 = (dist2 - mydist2) ** 2 assert ( torch.mean(d1) + torch.mean(d2) < 0.00000001 ), "chamfer cuda and chamfer normal are not giving the same results" xd1 = idx1 - myidx1 xd2 = idx2 - myidx2 assert ( torch.norm(xd1.float()) + torch.norm(xd2.float()) == 0 ), "chamfer cuda and chamfer normal are not giving the same results" print(f"fscore :", fscore(dist1, dist2)) print("Unit test passed")
def main(): parser = argparse.ArgumentParser() parser.add_argument('tags', metavar='tag', nargs='+') parser.add_argument( '--recordfile', default='test.tfrecords', help= 'tfrecords file with the users to test on (default: test.tfrecords)') parser.add_argument( '--mc-trials', type=int, default=800, help= 'Number of rounds of monte carlo sim to perform per product/threshold (default:800)' ) parser.add_argument('--exhaustive', action='store_true', help='Exhaustively search candidate thresholds') parser.add_argument( '-n', '--n-users', type=int, help='Limit number of users tested on (default: no limit)') args = parser.parse_args() t0 = time.time() assert len(args.tags) == 1 tag = args.tags[0] print "Loading pdict" pmap = helpers.pdict_for_tag(tag, args.recordfile) predictor = pred.HybridThresholdPredictor(pmap, ntrials=args.mc_trials) user_iterator = iterate_wrapped_users(args.recordfile) dfs = [] print "Crunching data" for i, user in enumerate(user_iterator): if args.n_users and i >= args.n_users: break user_dat = [] actual_set = user.last_order_predictable_prods() pid_to_prob = pmap[user.uid] items = pid_to_prob.items() # Sort on probability items.sort(key=lambda i: i[1]) pids = [i[0] for i in items] probs = [i[1] for i in items] probs = np.array(probs) p_none = np.product(1 - probs) actual_arr = np.zeros(len(pids)) for i, pid in enumerate(pids): if pid in actual_set: actual_arr[i] = 1 cands = predictor.get_candidate_thresholds(probs, exhaustive=args.exhaustive) for cand in cands: if args.exhaustive: thresh, was_cand = cand else: thresh = cand e_f = predictor.evaluate_threshold(thresh, probs) predicted = (probs >= thresh).astype(np.int8) predict_none = p_none > thresh or predicted.sum() == 0 actual_fs = fscore_helpers.fscore(predicted, actual_arr, predict_none) row = [thresh, e_f, actual_fs] if args.exhaustive: row.append(was_cand) user_dat.append(row) cols = ['thresh', 'e_f', 'fscore'] if args.exhaustive: cols.append('cand') df = pd.DataFrame(user_dat, columns=cols) dfs.append(df) print "Saving" with open('threshdat.pickle', 'w') as f: pickle.dump(dfs, f) t1 = time.time() print "Finished poking in {:.1f}s".format(t1 - t0) return dfs
clf = RandomForestClassifier(class_weight='balanced', n_estimators=500, n_jobs=-1) # plot learning curves cv = ShuffleSplit(X.shape[0], n_iter=40, test_size=0.2, random_state=0) title = "Learning Curves (Random Forests)" plot_learning_curve(clf, title, X, y, ylim=(0.7, 1.01), cv=cv, n_jobs=1) # train the model clf.fit(X_train, y_train) # predict the test classes y_score = clf.predict_proba(X_test) # fscore plot fscore(y_test, y_score) # saving the classifier with open('classifier.pickle', 'wb') as handle: pickle.dump(clf, handle) # saving the standard scaler with open('scaler.pickle', 'wb') as handle: pickle.dump(std, handle) # saving the polynomial transformer with open('polynomial-transformer.pickle', 'wb') as handle: pickle.dump(poly, handle) # saving the variance feature selector with open('variance-threshholder.pickle', 'wb') as handle:
def test(args): model_dir = args.model_dir log_test = LogString(open(os.path.join(model_dir, 'log_text.txt'), 'w')) dataset_test = ShapeNetH5(train=False, npoints=args.num_points, use_mean_feature=args.use_mean_feature) dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size=args.batch_size, shuffle=False, num_workers=int( args.workers)) dataset_length = len(dataset_test) epochs = [ 'best_emd_network.pth', 'best_cd_p_network.pth', 'best_cd_t_network.pth' ] for epoch in epochs: load_path = os.path.join(args.model_dir, epoch) log_test.log_string(load_path) if args.model_name == 'ecg': net = ECG(num_coarse=1024, num_fine=args.num_points) else: raise NotImplementedError load_model(args, net, None, log_test, train=False) net.cuda() net.eval() log_test.log_string("Testing...") pcd_file = h5py.File( os.path.join(args.model_dir, '%s_pcds.h5' % epoch.split('.')[0]), 'w') pcd_file.create_dataset('output_pcds', (1200, args.num_points, 3)) test_loss_cd_p = AverageValueMeter() test_loss_cd_t = AverageValueMeter() test_loss_emd = AverageValueMeter() test_f1_score = AverageValueMeter() test_loss_cat = torch.zeros([8, 4], dtype=torch.float32).cuda() cat_num = torch.ones([8, 1], dtype=torch.float32).cuda() * 150 with torch.no_grad(): for i, data in enumerate(dataloader_test): if args.use_mean_feature == 0: label, inputs, gt = data mean_feature = None else: label, inputs, gt, mean_feature = data mean_feature = mean_feature.float().cuda() inputs = inputs.float().cuda() gt = gt.float().cuda() inputs = inputs.transpose(2, 1).contiguous() if args.model_name == 'ecg': _, output = net(inputs) else: raise NotImplementedError # save pcd pcd_file['output_pcds'][args.batch_size * i:args.batch_size * (i + 1), :, :] = output.cpu().numpy() # EMD dist, _ = EMD(output, gt, 0.004, 3000) emd = torch.sqrt(dist).mean(1) # CD dist1, dist2, _, _ = chamLoss(gt, output) cd_p = (torch.sqrt(dist1).mean(1) + torch.sqrt(dist2).mean(1)) / 2 cd_t = dist1.mean(1) + dist2.mean(1) # f1 f1, _, _ = fscore(dist1, dist2) test_loss_cd_p.update(cd_p.mean().item()) test_loss_cd_t.update(cd_t.mean().item()) test_loss_emd.update(emd.mean().item()) test_f1_score.update(f1.mean().item()) for j, l in enumerate(label): test_loss_cat[int(l), 0] += cd_p[int(j)] test_loss_cat[int(l), 1] += cd_t[int(j)] test_loss_cat[int(l), 2] += emd[int(j)] test_loss_cat[int(l), 3] += f1[int(j)] if i % 100 == 0: log_test.log_string('test [%d/%d]' % (i, dataset_length / args.batch_size)) # Per cat loss: for i in range(8): log_test.log_string('CD_p: %f, CD_t: %f, EMD: %f F1: %f' % (test_loss_cat[i, 0] / cat_num[i] * 10000, test_loss_cat[i, 1] / cat_num[i] * 10000, test_loss_cat[i, 2] / cat_num[i] * 10000, test_loss_cat[i, 3] / cat_num[i])) log_test.log_string('Overview results:') log_test.log_string('CD_p: %f, CD_t: %f, EMD: %f F1: %f' % (test_loss_cd_p.avg, test_loss_cd_t.avg, test_loss_emd.avg, test_f1_score.avg)) pcd_file.close() log_test.close()
n_estimators=500, n_jobs=-1) # plot learning curves cv = ShuffleSplit(X.shape[0], n_iter=40, test_size=0.2, random_state=0) title = "Learning Curves (Random Forests)" plot_learning_curve(clf, title, X, y, ylim=(0.7, 1.01), cv=cv, n_jobs=1) # train the model clf.fit(X_train, y_train) # predict the test classes y_score = clf.predict_proba(X_test) # fscore plot fscore(y_test, y_score) # saving the classifier with open('classifier.pickle', 'wb') as handle: pickle.dump(clf, handle) # saving the standard scaler with open('scaler.pickle', 'wb') as handle: pickle.dump(std, handle) # saving the polynomial transformer with open('polynomial-transformer.pickle', 'wb') as handle: pickle.dump(poly, handle) # saving the variance feature selector with open('variance-threshholder.pickle', 'wb') as handle:
from fafficher import fafficher as faff from fproposition import fproposition as fprop from fscore import fscore choix = "oui" suite = 0 while choix != "non": print("Nouveau Mot !!! ") #initialisation vie = 8 L = [] mot, mot_caché = ft() #suite while mot_caché != mot and vie > 0: faff(mot, mot_caché, vie) var = str(input("Proposition : ")) mot_caché, vie = fprop(mot, mot_caché, vie, L, var) #fin if vie == 0: print("Vie restantes : 0") print("Game Over :)") if vie == 8: suite += 1 fscore(suite) choix = str(input("voulez-vous rejouer (oui/non) :")) print("\n")
def blender(X_train, y_train, X_test, y_test): """ :param X_train: training set :param y_train: output vector for training set :param X_test: test set :param y_test: output vector for test set :return: classification accracy score """ # our level 0 classifiers clfs = [ RandomForestClassifier(n_estimators=50, criterion='gini', n_jobs=-1), ExtraTreesClassifier(n_estimators=100, criterion='gini', n_jobs=-1), RandomForestClassifier(n_estimators=100, criterion='entropy', n_jobs=-1), ExtraTreesClassifier(n_estimators=50, criterion='entropy', n_jobs=-1), GradientBoostingClassifier(learning_rate=0.05) ] # Ready for cross validation n_folds = 5 skf = list(StratifiedKFold(y_train, n_folds)) blend_train = np.zeros( (X_train.shape[0], len(clfs))) # Number of training data x Number of classifiers blend_test = np.zeros( (X_test.shape[0], len(clfs))) # Number of testing data x Number of classifiers print('X_test.shape = %s' % (str(X_test.shape))) print('blend_train.shape = %s' % (str(blend_train.shape))) print('blend_test.shape = %s' % (str(blend_test.shape))) # for each classfier, we train the number of fold times(= len(skf)) for j, clf in enumerate(clfs): print('Training classifier [%s]' % (j)) blend_test_j = np.zeros( (X_test.shape[0], len(skf))) # Number of testing data x Number of folds , # we will take the mean of the predictions later for i, (train_index, cv_index) in enumerate(skf): print('Fold [%s]' % (i)) # This is the training and validation set X_blend_train = X_train[train_index] y_blend_train = y_train[train_index] X_blend_cv = X_train[cv_index] y_blend_cv = y_train[cv_index] clf.fit(X_blend_train, y_blend_train) # This output will be the basis for our blended classifier to train against, # which is also the output of our classifiers blend_train[cv_index, j] = clf.predict(X_blend_cv) blend_test_j[:, i] = clf.predict(X_test) # Take the mean of the predictions of the cross validation set blend_test[:, j] = blend_test_j.mean(1) # blending the results using logistic regression bclf = LogisticRegression() bclf.fit(blend_train, y_train) # prediction y_test_predict = bclf.predict(blend_test) score = metrics.accuracy_score(y_test, y_test_predict) print('Accuracy = %s' % (score)) fscore(bclf, blend_test, y_test) return score
def blender(X_train, y_train, X_test, y_test): """ :param X_train: training set :param y_train: output vector for training set :param X_test: test set :param y_test: output vector for test set :return: classification accracy score """ # our level 0 classifiers clfs = [ RandomForestClassifier(n_estimators=50, criterion='gini', n_jobs=-1), ExtraTreesClassifier(n_estimators=100, criterion='gini', n_jobs=-1), RandomForestClassifier(n_estimators=100, criterion='entropy', n_jobs=-1), ExtraTreesClassifier(n_estimators=50, criterion='entropy', n_jobs=-1), GradientBoostingClassifier(learning_rate=0.05) ] # Ready for cross validation n_folds = 5 skf = list(StratifiedKFold(y_train, n_folds)) blend_train = np.zeros((X_train.shape[0], len(clfs))) # Number of training data x Number of classifiers blend_test = np.zeros((X_test.shape[0], len(clfs))) # Number of testing data x Number of classifiers print('X_test.shape = %s' % (str(X_test.shape))) print('blend_train.shape = %s' % (str(blend_train.shape))) print('blend_test.shape = %s' % (str(blend_test.shape))) # for each classfier, we train the number of fold times(= len(skf)) for j, clf in enumerate(clfs): print('Training classifier [%s]' % (j)) blend_test_j = np.zeros((X_test.shape[0], len(skf))) # Number of testing data x Number of folds , # we will take the mean of the predictions later for i, (train_index, cv_index) in enumerate(skf): print('Fold [%s]' % (i)) # This is the training and validation set X_blend_train = X_train[train_index] y_blend_train = y_train[train_index] X_blend_cv = X_train[cv_index] y_blend_cv = y_train[cv_index] clf.fit(X_blend_train, y_blend_train) # This output will be the basis for our blended classifier to train against, # which is also the output of our classifiers blend_train[cv_index, j] = clf.predict(X_blend_cv) blend_test_j[:, i] = clf.predict(X_test) # Take the mean of the predictions of the cross validation set blend_test[:, j] = blend_test_j.mean(1) # blending the results using logistic regression bclf = LogisticRegression() bclf.fit(blend_train, y_train) # prediction y_test_predict = bclf.predict(blend_test) score = metrics.accuracy_score(y_test, y_test_predict) print('Accuracy = %s' % (score)) fscore(bclf, blend_test, y_test) return score