def main(args): input_folder = args.i_path threshold = args.threshold mode = args.mode intervals = args.resolution print('LOADING PKL...') artists = load_data(filename=input_folder) print('PREPROCESSING ', d[mode]) X, y = gen_dataset(artists=artists, mode=mode) X, y = remove_outlier(X=X, y=y, thresh=threshold) X = normalize(X=X) for lr in [10, 100, 500, 1000]: print('TSNE with learning rate =', lr) X_emb = tsne(X, lr=lr) print('[TSNE-1 - TSNE-2]') print('min values') print(np.amin(X_emb, axis=0)) print('max values') print(np.amax(X_emb, axis=0)) print('mean values') print(np.mean(X_emb, axis=0)) print('variance values') print(np.var(X_emb, axis=0)) #artists = optimize_artists_dictionary(artists) #artists = attach_tsne_to_art_dict(artists=artists, X=X, y=y) tsne_min = np.amin(X, axis=0) tsne_max = np.amax(X, axis=0)
def main(args): input_folder = args.i_path threshold = args.threshold mode = args.mode print('LOADING PKL...') artists = load_data(filename=input_folder) print('PREPROCESSING ', d[mode]) X, y = gen_dataset(artists=artists, mode=mode) X, y = remove_outlier(X=X, y=y, thresh=threshold) X = normalize(X=X) print('TSNE') X = tsne(X=X, lr=1000) artists = optimize_artists_dictionary(artists) artists = attach_tsne_to_art_dict(artists=artists, X=X, y=y) tsne_min = np.amin(X, axis=0) tsne_max = np.amax(X, axis=0) print('[TSNE-1 - TSNE-2]') print('min values') print(np.amin(X, axis=0)) print('max values') print(np.amax(X, axis=0)) print('mean values') print(np.mean(X, axis=0)) print('variance values') print(np.var(X, axis=0)) artists = clean_similar_artists(artists=artists)
def main(args): input_folder = args.i_path global output_path output_path = args.o_path if output_path[-1] != '/': output_path += '/' global artists print('LOADING PKL...') artists = load_data(filename=input_folder) print('PREPROCESSING') X, y = gen_dataset(artists=artists, mode=3) for t in np.arange(1, 3, 0.2): A, b = remove_outlier(X=X, y=y, thresh=t)
def main(args): input_folder = args.i_path threshold = args.threshold output_pkl = args.output_pkl global output_path output_path = args.o_path if output_path[-1] != '/': output_path += '/' mode = args.mode global artists print('LOADING PKL...') artists = load_data(filename=input_folder) print('PREPROCESSING ', d[mode]) X, y = gen_dataset(artists=artists, mode=mode) X, y = remove_outlier(X=X, y=y, thresh=threshold) X = normalize(X=X) print('TSNE') X = tsne(X=X, lr=1000) artists = optimize_artists_dictionary(artists) artists = attach_tsne_to_art_dict(artists=artists, X=X, y=y) min = np.amin(X, axis=0) max = np.amax(X, axis=0) dimension = 20 print('[TSNE-1 - TSNE-2]') print('min values') print(np.amin(X, axis=0)) print('max values') print(np.amax(X, axis=0)) print('mean values') print(np.mean(X, axis=0)) print('variance values') print(np.var(X, axis=0)) artists = clean_similar_artists(artists=artists) print('GENERATE HEATMAPS') gen_heatmaps_master(dimension=dimension, min=min, max=max) print('SAVING DATA') save_data(artists, filename=output_pkl) print('PLOT HEATMAPS in ', output_path) plot_heatmaps_master(dimension=dimension, min=min, max=max)
def main(args): input_folder = args.i_path threshold = args.threshold artists = load_data(filename=input_folder) X, y = gen_dataset(artists=artists) if PRINT_DISTRIBUTION: #PRINT VALUES BEFORE OUTLIER REMOTION feat_names = get_features_dict() x = np.array(X) for i in range(x.shape[1]): ax = plt.hist(x[:, i], bins=200) filename = args.o_path + '/BEFORE/' + feat_names[i] + '.png' title = feat_names[i] + 'BEFORE outlier remotion' plt.title(title) plt.savefig(filename) plt.close('all') X, y = remove_outlier(X=X, y=y, thresh=threshold, verbose=False, save_histogram=True) #X = normalize(X=X) #X, y = remove_outliers_lof(data=X, y=y) if PRINT_DISTRIBUTION: # PRINT VALUES AFTER OUTLIER REMOTION x = np.array(X) for i in range(x.shape[1]): ax = plt.hist(x[:, i], bins=200) filename = args.o_path + '/AFTER/' + feat_names[i] + '.png' title = feat_names[i] + 'AFTER outlier remotion' plt.title(title) plt.savefig(filename) plt.close('all')