print("Creating dataset") # index of examples dataset dataset_index = corpora_tools.from_indexable( torch.arange(0, len(D), 1).unsqueeze(-1)) D.set_path(False) # random walk dataset d_rw = D.light_copy() d_rw.set_walk(parameters.walk_length, 1.0) d_rw.set_path(True) # neigbhor dataset d_v = D.light_copy() d_v.set_walk(1, 1.0) print("Merging dataset") embedding_dataset = corpora_tools.zip_datasets( dataset_index, corpora_tools.select_from_index(d_rw, element_index=0), corpora_tools.select_from_index(d_v, element_index=0)) training_dataloader = DataLoader(embedding_dataset, batch_size=128, shuffle=True, num_workers=4, collate_fn=data_tools.PadCollate(dim=0), drop_last=False) print("Initialize embeddings") hyperbolic_embeddings_graph.learn_init(embeddings, training_dataloader, embedding_dataset, optimizer_method, modules.hyperbolicDistance, max_iter=150,
d_rw.set_path(True) d_rw = corpora.FlatContextCorpus(d_rw, context_size=args.context_size, precompute=args.precompute_rw) torch.save(d_rw, "/local/gerald/KMEANS_RESULTS/"+key+".t7") rw_log[key] = {"file":"/local/gerald/KMEANS_RESULTS/"+key+".t7", "context_size":args.context_size, "walk_lenght": args.walk_lenght, "precompute_rw": args.precompute_rw} else: d_rw.set_walk(args.walk_lenght, 1.0) d_rw.set_path(True) d_rw = corpora.ContextCorpus(d_rw, context_size=args.context_size, precompute=args.precompute_rw) # neigbhor dataset d_v = D.light_copy() d_v.set_walk(1, 1.0) dataset_repeated = corpora_tools.zip_datasets(dataset_index, corpora_tools.select_from_index(d_v, element_index=0)) dataset_repeated = corpora_tools.repeat_dataset(dataset_repeated, len(d_rw)) # print(d_rw[1][0].size()) print("Merging dataset") embedding_dataset = corpora_tools.zip_datasets(dataset_repeated, d_rw) training_dataloader = DataLoader(embedding_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4, drop_last=False ) representation_d = []
d_rw.set_walk(args.walk_lenght, 1.0) d_rw.set_path(True) d_rw = corpora.ContextCorpus(d_rw, context_size=args.context_size, precompute=args.precompute_rw) if(args.save): os.makedirs("/local/gerald/AISTAT_RESULTS/"+args.id+"/", exist_ok=True) logger_object = logger.JSONLogger("/local/gerald/AISTAT_RESULTS/"+args.id+"/log.json") logger_object.append(vars(args)) # neigbhor dataset d_v = D.light_copy() d_v.set_walk(1, 1.0) print(d_rw[1][0].size()) print("Merging dataset") embedding_dataset = corpora_tools.zip_datasets(dataset_index, corpora_tools.select_from_index(d_v, element_index=0), d_rw ) print(embedding_dataset[29][-1][20:25]) training_dataloader = DataLoader(embedding_dataset, batch_size=args.batch_size, shuffle=False, num_workers=8, collate_fn=data_tools.PadCollate(dim=0), drop_last=False ) representation_d = [] pi_d = [] mu_d = [] sigma_d = []