default=1,
                    help="number of times to perform kmeans")
parser.add_argument('--init', dest="init", action="store_true")
args = parser.parse_args()

dataset_dict = {
    "karate": corpora.load_karate,
    "football": corpora.load_football,
    "flickr": corpora.load_flickr,
    "dblp": corpora.load_dblp,
    "books": corpora.load_books,
    "blogCatalog": corpora.load_blogCatalog,
    "polblog": corpora.load_polblogs,
    "adjnoun": corpora.load_adjnoun
}
log_in = logger.JSONLogger(os.path.join(args.file, "log.json"), mod="continue")
dataset_name = log_in["dataset"]
print(dataset_name)
n_gaussian = log_in["n_gaussian"]
if (dataset_name not in dataset_dict):
    print("Dataset " + dataset_name +
          " does not exist, please select one of the following : ")
    print(list(dataset_dict.keys()))
    quit()

print("Loading Corpus ")
D, X, Y = dataset_dict[dataset_name]()

results = []
std_kmeans = []
if (args.init):
                    default=1,
                    help="number of times to perform kmeans")
args = parser.parse_args()

dataset_dict = {
    "karate": corpora.load_karate,
    "football": corpora.load_football,
    "dblp": corpora.load_dblp,
    "books": corpora.load_books,
    "polblog": corpora.load_polblogs,
    "adjnoun": corpora.load_adjnoun,
    "wikipedia": corpora.load_wikipedia
}

# loading the configuration file
general_conf = logger.JSONLogger("data/config/general.conf", mod="continue")
# get the folder of the experiment
folder_xp = join(general_conf["log_path"], args.id)
# load the log file of the experiment
log_xp = logger.JSONLogger(join(folder_xp, "log.json"), mod="continue")

# reading configuration of the xp
dataset_name = log_xp["dataset"]
n_centroid = log_xp["n_centroid"]

if (dataset_name not in dataset_dict):
    print("Dataset " + dataset_name +
          " does not exist, please select one of the following : ")
    print(list(dataset_dict.keys()))
    quit()
Exemple #3
0
# index of examples dataset
dataset_index = corpora_tools.from_indexable(
    torch.arange(0, len(D), 1).unsqueeze(-1))
print("Dataset Size -> ", len(D))

D.set_path(False)

# negative sampling distribution
frequency = D.getFrequency()**(3 / 4)
frequency[:, 1] /= frequency[:, 1].sum()
frequency = pytorch_categorical.Categorical(frequency[:, 1])

# random walk dataset
d_rw = D.light_copy()

general_conf = logger.JSONLogger("data/config/general.conf", mod="fill")
general_path = general_conf["path"]
log_path = general_conf["log_path"]
rw_log = logger.JSONLogger("data/config/random_walk.conf", mod="continue")

if (args.force_rw):
    key = args.dataset + "_" + str(args.context_size) + "_" + str(
        args.walk_lenght) + "_" + str(args.seed)
    if (key in rw_log):

        try:
            print('Loading random walks from files')
            d_rw = torch.load(rw_log[key]["file"])
            print('Loaded')
        except:
            os.makedirs(general_path, exist_ok=True)
Exemple #4
0
if (args.init_beta < 0):
    args.init_beta = args.beta

alpha, beta = args.init_alpha, args.init_beta

print("Loading Corpus ")
D, X, Y = dataset_dict[args.dataset]()
print("Creating dataset")
# index of examples dataset
dataset_index = corpora_tools.from_indexable(
    torch.arange(0, len(D), 1).unsqueeze(-1))
print("Dataset Size : ", len(D))
print("log will be saved at : ", os.path.join(saving_folder, args.id + "/"))
if (args.save):
    os.makedirs(os.path.join(saving_folder, args.id + "/"), exist_ok=True)
    logger_object = logger.JSONLogger(
        os.path.join(saving_folder, args.id + "/log.json"))
    logger_object.append(vars(args))

D.set_path(False)

# negative sampling distribution
frequency = D.getFrequency()**(3 / 4)
frequency[:, 1] /= frequency[:, 1].sum()
frequency = pytorch_categorical.Categorical(frequency[:, 1])
# random walk dataset
d_rw = D.light_copy()

rw_log = logger.JSONLogger("ressources/random_walk.conf", mod="continue")
if (args.force_rw):
    key = (args.dataset + "_" + str(args.context_size) + "_" +
           str(args.walk_lenght) + "_" + str(args.seed) + "_" +
if (args.init_beta < 0):
    args.init_beta = args.beta

alpha, beta = args.init_alpha, args.init_beta

print("Loading Corpus ")
D, X, Y = dataset_dict[args.dataset]()
print("Creating dataset")
# index of examples dataset
dataset_index = corpora_tools.from_indexable(
    torch.arange(0, len(D), 1).unsqueeze(-1))
print("Dataset Size : ", len(D))
print("log will be saved at : ", os.path.join(saving_folder, args.id + "/"))
if (args.save):
    os.makedirs(os.path.join(saving_folder, args.id + "/"), exist_ok=True)
    logger_object = logger.JSONLogger(
        os.path.join(saving_folder, args.id + "/log.json"))
    logger_object.append(vars(args))

D.set_path(False)

# negative sampling distribution
frequency = D.getFrequency()**(3 / 4)
frequency[:, 1] /= frequency[:, 1].sum()
frequency = pytorch_categorical.Categorical(frequency[:, 1])
# random walk dataset
d_rw = D.light_copy()

print("Loading Neighbor corpus")
dataset_o1 = corpora.NeigbhorFlatCorpus(X, Y)
print("Loading Context corpus")
dataset_o2 = corpora.RandomContextSizeFlat(X,