def generate_blogcatalog_121_embedding(): import visualisation s = datetime.datetime.now() y_path = '../../local_resources/blogcatalog_121_sample/y.p' y = utils.read_pickle(y_path) log_path = '../../local_resources/tf_logs/run1/' walk_path = '../../local_resources/blogcatalog_121_sample/walks.csv' size = 2 # dimensionality of the embedding params = Params(walk_path, batch_size=4, embedding_size=size, neg_samples=5, skip_window=5, num_pairs=1500, statistics_interval=10.0, initial_learning_rate=1.0, save_path=log_path, epochs=1, concurrent_steps=4) path = '../../local_resources/blogcatalog_121_sample/embeddings/Win' + '_' + utils.get_timestamp() + '.csv' embedding_in, embedding_out = HE.main(params) visualisation.plot_poincare_embedding(embedding_in, y, '../../results/blogcatalog_121_sample/figs/poincare_polar_Win' + '_' + utils.get_timestamp() + '.pdf') visualisation.plot_poincare_embedding(embedding_out, y, '../../results/blogcatalog_121_sample/figs/poincare_polar_Wout' + '_' + utils.get_timestamp() + '.pdf') df_in = pd.DataFrame(data=embedding_in, index=np.arange(embedding_in.shape[0])) df_in.to_csv(path, sep=',') df_out = pd.DataFrame(data=embedding_out, index=np.arange(embedding_out.shape[0])) df_out.to_csv( '../../local_resources/blogcatalog_121_sample/embeddings/Wout' + '_' + utils.get_timestamp() + '.csv', sep=',') print('blogcatalog embedding 121 sample generated in: ', datetime.datetime.now() - s) MLD.blogcatalog_121_scenario(path) return path
def generate_karate_embedding(): import visualisation y_path = '../../local_resources/karate/y.p' targets = utils.read_pickle(y_path) y = np.array(targets['cat']) log_path = '../../local_resources/tf_logs/run4/' walk_path = '../../local_resources/karate/walks_n1_l10.csv' size = 2 # dimensionality of the embedding params = Params(walk_path, batch_size=4, embedding_size=size, neg_samples=5, skip_window=5, num_pairs=1500, statistics_interval=0.1, initial_learning_rate=1.0, save_path=log_path, epochs=10, concurrent_steps=1) path = '../../local_resources/karate/embeddings/tf_Win_polar' + '_' + utils.get_timestamp() + '.csv' embedding_in, embedding_out = HE.main(params) visualisation.plot_poincare_embedding(embedding_in, y, '../../results/karate/figs/poincare_polar_Win' + '_' + utils.get_timestamp() + '.pdf') visualisation.plot_poincare_embedding(embedding_out, y, '../../results/karate/figs/poincare_polar_Wout' + '_' + utils.get_timestamp() + '.pdf') df_in = pd.DataFrame(data=embedding_in, index=range(embedding_in.shape[0])) df_in.to_csv(path, sep=',') df_out = pd.DataFrame(data=embedding_out, index=range(embedding_out.shape[0])) df_out.to_csv( '../../local_resources/karate/embeddings/tf_Wout_polar' + '_' + utils.get_timestamp() + '.csv', sep=',') return path
def simulated_tree_scenario(branching_factor, levels): import visualisation folder = '../../local_resources/simulated_trees' deepwalk_path = '../../local_resources/simulated_trees/deepwalk_z{}_l{}.emd'.format( branching_factor, levels) walk_path = '../../local_resources/simulated_trees/walks_long_z{}_l{}.emd'.format( branching_factor, levels) emb_path = create_adj_mat(folder, branching_factor, levels) generate_simulated_tree(emb_path, walk_path, deepwalk_path) deepwalk_emd = pd.read_csv(deepwalk_path, header=None, index_col=0, skiprows=1, sep=" ") s = datetime.datetime.now() # y_path = '../../local_resources/blogcatalog_121_sample/y.p' # y = utils.read_pickle(y_path) y = generate_y(branching_factor, levels) log_path = '../../local_resources/tf_logs/sim_tree/' # walk_path = '../../local_resources/simulated_trees/walks.csv' size = 2 # dimensionality of the embedding params = Params(walk_path, batch_size=4, embedding_size=size, neg_samples=5, skip_window=5, num_pairs=1500, statistics_interval=0.1, initial_learning_rate=1.0, save_path=log_path, epochs=20, concurrent_steps=4) path = '../../local_resources/simulated_trees/embeddings/Win' + '_' + utils.get_timestamp( ) + '.csv' embedding_in, embedding_out = HE.main(params) visualisation.plot_deepwalk_embedding( deepwalk_emd.values, y, '../../results/simulated_trees/figs/deepwalk_z{}_l{}_{}.pdf'.format( branching_factor, levels, utils.get_timestamp())) visualisation.plot_poincare_embedding( embedding_in, y, '../../results/simulated_trees/figs/hyp_z{}_l{}_{}.pdf'.format( branching_factor, levels, utils.get_timestamp())) df_in = pd.DataFrame(data=embedding_in, index=np.arange(embedding_in.shape[0])) df_in.to_csv(path, sep=',') return path