default=1, help="number of times to perform kmeans") parser.add_argument('--init', dest="init", action="store_true") args = parser.parse_args() dataset_dict = { "karate": corpora.load_karate, "football": corpora.load_football, "flickr": corpora.load_flickr, "dblp": corpora.load_dblp, "books": corpora.load_books, "blogCatalog": corpora.load_blogCatalog, "polblog": corpora.load_polblogs, "adjnoun": corpora.load_adjnoun } log_in = logger.JSONLogger(os.path.join(args.file, "log.json"), mod="continue") dataset_name = log_in["dataset"] print(dataset_name) n_gaussian = log_in["n_gaussian"] if (dataset_name not in dataset_dict): print("Dataset " + dataset_name + " does not exist, please select one of the following : ") print(list(dataset_dict.keys())) quit() print("Loading Corpus ") D, X, Y = dataset_dict[dataset_name]() results = [] std_kmeans = [] if (args.init):
default=1, help="number of times to perform kmeans") args = parser.parse_args() dataset_dict = { "karate": corpora.load_karate, "football": corpora.load_football, "dblp": corpora.load_dblp, "books": corpora.load_books, "polblog": corpora.load_polblogs, "adjnoun": corpora.load_adjnoun, "wikipedia": corpora.load_wikipedia } # loading the configuration file general_conf = logger.JSONLogger("data/config/general.conf", mod="continue") # get the folder of the experiment folder_xp = join(general_conf["log_path"], args.id) # load the log file of the experiment log_xp = logger.JSONLogger(join(folder_xp, "log.json"), mod="continue") # reading configuration of the xp dataset_name = log_xp["dataset"] n_centroid = log_xp["n_centroid"] if (dataset_name not in dataset_dict): print("Dataset " + dataset_name + " does not exist, please select one of the following : ") print(list(dataset_dict.keys())) quit()
# index of examples dataset dataset_index = corpora_tools.from_indexable( torch.arange(0, len(D), 1).unsqueeze(-1)) print("Dataset Size -> ", len(D)) D.set_path(False) # negative sampling distribution frequency = D.getFrequency()**(3 / 4) frequency[:, 1] /= frequency[:, 1].sum() frequency = pytorch_categorical.Categorical(frequency[:, 1]) # random walk dataset d_rw = D.light_copy() general_conf = logger.JSONLogger("data/config/general.conf", mod="fill") general_path = general_conf["path"] log_path = general_conf["log_path"] rw_log = logger.JSONLogger("data/config/random_walk.conf", mod="continue") if (args.force_rw): key = args.dataset + "_" + str(args.context_size) + "_" + str( args.walk_lenght) + "_" + str(args.seed) if (key in rw_log): try: print('Loading random walks from files') d_rw = torch.load(rw_log[key]["file"]) print('Loaded') except: os.makedirs(general_path, exist_ok=True)
if (args.init_beta < 0): args.init_beta = args.beta alpha, beta = args.init_alpha, args.init_beta print("Loading Corpus ") D, X, Y = dataset_dict[args.dataset]() print("Creating dataset") # index of examples dataset dataset_index = corpora_tools.from_indexable( torch.arange(0, len(D), 1).unsqueeze(-1)) print("Dataset Size : ", len(D)) print("log will be saved at : ", os.path.join(saving_folder, args.id + "/")) if (args.save): os.makedirs(os.path.join(saving_folder, args.id + "/"), exist_ok=True) logger_object = logger.JSONLogger( os.path.join(saving_folder, args.id + "/log.json")) logger_object.append(vars(args)) D.set_path(False) # negative sampling distribution frequency = D.getFrequency()**(3 / 4) frequency[:, 1] /= frequency[:, 1].sum() frequency = pytorch_categorical.Categorical(frequency[:, 1]) # random walk dataset d_rw = D.light_copy() rw_log = logger.JSONLogger("ressources/random_walk.conf", mod="continue") if (args.force_rw): key = (args.dataset + "_" + str(args.context_size) + "_" + str(args.walk_lenght) + "_" + str(args.seed) + "_" +
if (args.init_beta < 0): args.init_beta = args.beta alpha, beta = args.init_alpha, args.init_beta print("Loading Corpus ") D, X, Y = dataset_dict[args.dataset]() print("Creating dataset") # index of examples dataset dataset_index = corpora_tools.from_indexable( torch.arange(0, len(D), 1).unsqueeze(-1)) print("Dataset Size : ", len(D)) print("log will be saved at : ", os.path.join(saving_folder, args.id + "/")) if (args.save): os.makedirs(os.path.join(saving_folder, args.id + "/"), exist_ok=True) logger_object = logger.JSONLogger( os.path.join(saving_folder, args.id + "/log.json")) logger_object.append(vars(args)) D.set_path(False) # negative sampling distribution frequency = D.getFrequency()**(3 / 4) frequency[:, 1] /= frequency[:, 1].sum() frequency = pytorch_categorical.Categorical(frequency[:, 1]) # random walk dataset d_rw = D.light_copy() print("Loading Neighbor corpus") dataset_o1 = corpora.NeigbhorFlatCorpus(X, Y) print("Loading Context corpus") dataset_o2 = corpora.RandomContextSizeFlat(X,