def main(): """ Main function. """ # arg definition parser = argparse.ArgumentParser( description="Generating LSP dataset for comparison \ between chainer and pytorch about implementing DeepPose.") parser.add_argument("--image_size", "-S", type=int, default=256, help="Size of output image.") parser.add_argument("--crop_size", "-C", type=int, default=227, help="Size of cropping for DNN training.") parser.add_argument("--path", "-p", type=str, default="orig_data", help="A path to download datasets.") parser.add_argument("--output", "-o", type=str, default="data", help="An output path for generated datasets.") # main process args = parser.parse_args() generator = DatasetGenerator(args.image_size, args.crop_size, args.path, args.output) generator.generate()
def all_vs_all(model, dataset, args): # Create a video generator for the dataset video enqueuer = tf.keras.utils.OrderedEnqueuer( DatasetGenerator(args.video_file, dataset.get_queries(), all_frames='i3d' in args.network), use_multiprocessing=True, shuffle=False) enqueuer.start(workers=args.threads, max_queue_size=args.threads * 2) # Calculate similarities between all videos in the dataset all_db, similarities, features = set(), dict(), dict() pbar = tqdm(range(len(enqueuer.sequence))) for _ in pbar: frames, q = next(enqueuer.get()) if frames.shape[0] > 0: all_db.add(q) similarities[q] = dict() feat = model.extract_features(frames, batch_sz=25 if 'i3d' in args.network else args.batch_sz) for k, v in features.items(): if 'symmetric' in args.similarity_function: similarities[q][k] = similarities[k][q] = model.calculate_video_similarity(v, feat) else: similarities[k][q] = model.calculate_video_similarity(v, feat) similarities[q][k] = model.calculate_video_similarity(feat, v) features[q] = feat pbar.set_postfix(video_id=q, frames=frames.shape, features=feat.shape) enqueuer.stop() dataset.evaluate(similarities, all_db=all_db)
def query_vs_database(model, dataset, args): # Create a video generator for the queries enqueuer = tf.keras.utils.OrderedEnqueuer( DatasetGenerator(args.video_file, dataset.get_queries(), all_frames='i3d' in args.network), use_multiprocessing=True, shuffle=False) enqueuer.start(workers=args.threads, max_queue_size=args.threads * 2) # Extract features of the queries all_db, queries, queries_ids = set(), [], [] pbar = tqdm(range(len(enqueuer.sequence))) for _ in pbar: frames, query_id = next(enqueuer.get()) if frames.shape[0] > 0: queries.append(model.extract_features(frames, batch_sz=25 if 'i3d' in args.network else args.batch_sz)) queries_ids.append(query_id) all_db.add(query_id) pbar.set_postfix(query_id=query_id) enqueuer.stop() model.set_queries(queries) # Create a video generator for the database video enqueuer = tf.keras.utils.OrderedEnqueuer( DatasetGenerator(args.video_file, dataset.get_database(), all_frames='i3d' in args.network), use_multiprocessing=True, shuffle=False) enqueuer.start(workers=args.threads, max_queue_size=args.threads * 2) generator = enqueuer.get() # Calculate similarities between the queries and the database videos similarities = dict({query: dict() for query in queries_ids}) pbar = tqdm(range(len(enqueuer.sequence))) for _ in pbar: frames, video_id = next(generator) if frames.shape[0] > 1: features = model.extract_features(frames, batch_sz=25 if 'i3d' in args.network else args.batch_sz) sims = model.calculate_similarities_to_queries(features) all_db.add(video_id) for i, s in enumerate(sims): similarities[queries_ids[i]][video_id] = float(s) pbar.set_postfix(video_id=video_id) enqueuer.stop() dataset.evaluate(similarities, all_db)
def main(): model = "ringnorm" a, b, y_test = data.create_dataset(5000, model) X_test = np.array(np.c_[a, b]) y_test = np.array(y_test)[:, 0] ######################################### ##### DATA CLASSIFICATION ##### ######################################### n_trees = 100 times = 100 scores_clf = [] for perc in tqdm(np.arange(0.1, 0.9, 0.01)): ### Classifier generation ### clf = ClasificadorRuido(n_trees=n_trees, perc=perc) score_clf = 0 for _ in range(times): ### Training data generation ### a, b, y_train = data.create_dataset(300, model) X_train = np.array(np.c_[a, b]) y_train = np.array(y_train)[:, 0] ### Classifiers training and classification ### clf.fit(X_train, y_train, random_perc=False) score_clf += (1 - clf.score(X_test, y_test)) scores_clf.append(score_clf / times) ### Random Forest ### rfclf = RandomForestClassifier(n_estimators=n_trees) rfscore = 0 for _ in range(times): a, b, y_train = data.create_dataset(300, model) X_train = np.array(np.c_[a, b]) y_train = np.array(y_train)[:, 0] rfclf.fit(X_train, y_train) rfscore += 1 - rfclf.score(X_test, y_test) plt.axhline(y=rfscore/times, color='m', linestyle='-') plt.plot(np.arange(0.1, 0.9, 0.01), scores_clf, linestyle='-.') plt.savefig("../plots/noise-variation_"+model+".png")
def main(): model = "ringnorm" X_test, y_test = data.create_full_dataset(5000, 20, model) # X_test = np.array(np.c_[a, b]) y_test = y_test.transpose()[0] ######################################### ##### DATA CLASSIFICATION ##### ######################################### n_trees = 100 times = 10 rf_scores = [] clf_scores = np.empty((times, len(np.arange(0.01, 0.99, 0.01)), n_trees)) for i in tqdm(range(times)): ### Training data generation ### X_train, y_train = data.create_full_dataset(300, 20, model) # X_train = np.array(np.c_[a, b]) y_train = y_train.transpose()[0] ### Classifiers training and classification ### # RANDOM FOREST rfclf = RandomForestClassifier(n_estimators=n_trees) rfclf.fit(X_train, y_train) rf_scores.append(1 - rfclf.score(X_test, y_test)) # NOISE BASED for perci, perc in enumerate(np.arange(0.01, 0.99, 0.01)): clf = Alfredo(n_trees=n_trees, perc=perc, bagg=True) clf.fit(X_train, y_train, random_perc=False) clf.predict_proba_error(X_test) scores = [] for n_tree in range(1, n_trees + 1): scores.append( 1 - clf.score_error(X_test, y_test, n_classifiers=n_tree)) clf_scores[i, perci] = np.array(scores) print(np.array(rf_scores)) print() print(clf_scores) print(clf_scores.shape) print() print(clf_scores.mean(axis=0)) print(clf_scores.mean(axis=0).shape) np.save("../data/" + model + "_data_random-forest_ALFREDO", np.array(rf_scores)) np.save("../data/" + model + "_data_ALFREDO", clf_scores)
def main(): model = "threenorm" a, b, y_test = data.create_dataset(100, model) X_test = np.array(np.c_[a, b]) y_test = np.array(y_test)[:, 0] ######################################### ##### DATA CLASSIFICATION ##### ######################################### n_trees = 100 for perc in tqdm(np.arange(0.1, 0.91, 0.1)): ### Classifier generation ### clf = ClasificadorRuido(n_trees=n_trees, perc=perc) clf_scores = np.zeros((100, n_trees)) for i in range(100): ### Training data generation ### a, b, y_train = data.create_dataset(300, model) X_train = np.array(np.c_[a, b]) y_train = np.array(y_train)[:, 0] ### Classifiers training and classification ### clf.fit(X_train, y_train, random_perc=False) clf.predict_proba_error(X_test) for n_tree in range(1, n_trees + 1): clf_scores[i, n_tree - 1] += 1 - clf.score_error(X_test, y_test, n_classifiers=n_tree) plt.plot(range(1, n_trees + 1), clf_scores.mean(axis=0), linestyle='-.') print() print("Perc: ", np.round(perc, 1), " - ", clf_scores.mean(axis=0)[-1]) ### Random Forest ### rfclf = RandomForestClassifier(n_estimators=n_trees) a, b, y_train = data.create_dataset(300, model) X_train = np.array(np.c_[a, b]) y_train = np.array(y_train)[:, 0] rfclf.fit(X_train, y_train) rfscore = 1 - rfclf.score(X_test, y_test) plt.axhline(y=rfscore, color='m', linestyle='-') plt.legend(('perc=0.1', 'perc=0.2', 'perc=0.3', 'perc=0.4', 'perc=0.5', 'perc=0.6', 'perc=0.7', 'perc=0.8', 'perc=0.9', 'RF'), loc='upper right') plt.savefig("../plots/noise-error_"+model+"_100trees.png")
def main(): model = "threenorm" data = np.load("../data/" + model + "_data_ALFREDO.npy") rfscore = np.load("../data/" + model + "_data_random-forest_ALFREDO.npy") print(data.shape) model_title = str.upper(model[0]) + model[1:] plt.figure(figsize=(20, 10)) plt.subplot(1, 2, 1) plt.title(model_title + " perc. random") lst = [0, 4, 9, 49, 99] for i in lst: plt.plot(np.arange(0.01, 0.99, 0.01), data.mean(axis=0)[:, i], linestyle='-') plt.axhline(y=rfscore.mean(), color='m', linestyle='-') legend = list(map(lambda x: str(x + 1), [0, 4, 9, 49, 99])) legend.append('RF') plt.legend(legend, loc='upper right', ncol=2) plt.ylabel("Err") plt.xlabel("Data randomization") plt.ylim() plt.grid() ######################################################################################## plt.subplot(1, 2, 2) plt.title(model_title + " n. trees - err.") lst = [0, 4, 9, 48, 97] print(data.mean(axis=0).shape) for i in lst: plt.plot(range(1, 101, 1), data.mean(axis=0)[i, :], linestyle='-') plt.axhline(y=rfscore.mean(), color='m', linestyle='-') legend = list(map(lambda x: str(x / 100), [1, 5, 10, 50, 99])) legend.append('RF') plt.legend(legend, loc='upper right', ncol=2) plt.ylabel("Err") plt.xlabel("N. trees") plt.grid() plt.tight_layout() # plt.show() plt.savefig("../plots/ALFREDO/PNG/2-plots_" + model + ".png") plt.savefig("../plots/ALFREDO/EPS/2-plots_" + model + ".eps")