예제 #1
0
print("Creating dataset")
# index of examples dataset
dataset_index = corpora_tools.from_indexable(
    torch.arange(0, len(D), 1).unsqueeze(-1))
D.set_path(False)
# random walk dataset
d_rw = D.light_copy()
d_rw.set_walk(parameters.walk_length, 1.0)
d_rw.set_path(True)
# neigbhor dataset
d_v = D.light_copy()
d_v.set_walk(1, 1.0)

print("Merging dataset")
embedding_dataset = corpora_tools.zip_datasets(
    dataset_index, corpora_tools.select_from_index(d_rw, element_index=0),
    corpora_tools.select_from_index(d_v, element_index=0))
training_dataloader = DataLoader(embedding_dataset,
                                 batch_size=128,
                                 shuffle=True,
                                 num_workers=4,
                                 collate_fn=data_tools.PadCollate(dim=0),
                                 drop_last=False)

print("Initialize embeddings")
hyperbolic_embeddings_graph.learn_init(embeddings,
                                       training_dataloader,
                                       embedding_dataset,
                                       optimizer_method,
                                       modules.hyperbolicDistance,
                                       max_iter=150,
        d_rw.set_path(True)
        d_rw = corpora.FlatContextCorpus(d_rw, context_size=args.context_size, precompute=args.precompute_rw)
        torch.save(d_rw, "/local/gerald/KMEANS_RESULTS/"+key+".t7")
        rw_log[key] = {"file":"/local/gerald/KMEANS_RESULTS/"+key+".t7", 
                       "context_size":args.context_size, "walk_lenght": args.walk_lenght,
                       "precompute_rw": args.precompute_rw}
else:
    d_rw.set_walk(args.walk_lenght, 1.0)
    d_rw.set_path(True)
    d_rw = corpora.ContextCorpus(d_rw, context_size=args.context_size, precompute=args.precompute_rw)   

# neigbhor dataset
d_v = D.light_copy()
d_v.set_walk(1, 1.0)

dataset_repeated = corpora_tools.zip_datasets(dataset_index, corpora_tools.select_from_index(d_v, element_index=0))
dataset_repeated = corpora_tools.repeat_dataset(dataset_repeated, len(d_rw))
# print(d_rw[1][0].size())

print("Merging dataset")
embedding_dataset = corpora_tools.zip_datasets(dataset_repeated, d_rw)


training_dataloader = DataLoader(embedding_dataset, 
                            batch_size=args.batch_size, 
                            shuffle=False,
                            num_workers=4,
                            drop_last=False
                    )

representation_d = []
예제 #3
0
    d_rw.set_walk(args.walk_lenght, 1.0)
    d_rw.set_path(True)
    d_rw = corpora.ContextCorpus(d_rw, context_size=args.context_size, precompute=args.precompute_rw)   
if(args.save):
    os.makedirs("/local/gerald/AISTAT_RESULTS/"+args.id+"/", exist_ok=True)
    logger_object = logger.JSONLogger("/local/gerald/AISTAT_RESULTS/"+args.id+"/log.json")
    logger_object.append(vars(args))
# neigbhor dataset
d_v = D.light_copy()
d_v.set_walk(1, 1.0)

print(d_rw[1][0].size())

print("Merging dataset")
embedding_dataset = corpora_tools.zip_datasets(dataset_index,
                                                corpora_tools.select_from_index(d_v, element_index=0),
                                                d_rw
                                                )
print(embedding_dataset[29][-1][20:25])
training_dataloader = DataLoader(embedding_dataset, 
                            batch_size=args.batch_size, 
                            shuffle=False,
                            num_workers=8,
                            collate_fn=data_tools.PadCollate(dim=0),
                            drop_last=False
                    )

representation_d = []
pi_d = []
mu_d = []
sigma_d = []