Example #1
0
parser.add_argument('--file', dest="file", type=str, default="RESULTS/football-5D-KMEANS-1/",
                    help="embeddings location file") 
args = parser.parse_args()


dataset_dict = { "karate": corpora.load_karate,
            "football": corpora.load_football,
            "flickr": corpora.load_flickr,
            "dblp": corpora.load_dblp,
            "books": corpora.load_books,
            "blogCatalog": corpora.load_blogCatalog,
            "polblog": corpora.load_polblogs,
            "adjnoun": corpora.load_adjnoun
          }
log_in = logger.JSONLogger(os.path.join(args.file,"log.json"), mod="continue")
dataset_name = log_in["dataset"]
print(dataset_name)
n_gaussian = log_in["n_gaussian"]
if(dataset_name not in dataset_dict):
    print("Dataset " + dataset_name + " does not exist, please select one of the following : ")
    print(list(dataset_dict.keys()))
    quit()


print("Loading Corpus ")
D, X, Y = dataset_dict[dataset_name]()

results = []
std_kmeans = []
representations = torch.load(os.path.join(args.file,"embeddings.t7"))[0]
Example #2
0
if (args.init_beta < 0):
    args.init_beta = args.beta
# set the seed for random sampling
alpha, beta = args.init_alpha, args.init_beta

print("Loading Corpus ")
D, X, Y = dataset_dict[args.dataset]()
print("Creating dataset")
# index of examples dataset
dataset_index = corpora_tools.from_indexable(
    torch.arange(0, len(D), 1).unsqueeze(-1))
print("Dataset Size -> ", len(D))

if (args.save):
    os.makedirs(os.path.join(saving_folder, args.id + "/"), exist_ok=True)
    logger_object = logger.JSONLogger(
        os.path.join(saving_folder, args.id + "/log.json"))
    logger_object.append(vars(args))
D.set_path(False)

# negative sampling distribution
frequency = D.getFrequency()**(3 / 4)
frequency[:, 1] /= frequency[:, 1].sum()
frequency = pytorch_categorical.Categorical(frequency[:, 1])
# random walk dataset
d_rw = D.light_copy()
rw_log = logger.JSONLogger("ressources/random_walk.conf", mod="continue")
if (args.force_rw):
    key = args.dataset + "_" + str(args.context_size) + "_" + str(
        args.walk_lenght) + "_" + str(args.seed)
    if (key in rw_log):
print("Creating dataset")
# index of examples dataset
dataset_index = corpora_tools.from_indexable(
    torch.arange(0, len(D), 1).unsqueeze(-1))
print("Dataset Size -> ", len(D))

D.set_path(False)

# negative sampling distribution
frequency = D.getFrequency()**(3 / 4)
frequency[:, 1] /= frequency[:, 1].sum()
frequency = pytorch_categorical.Categorical(frequency[:, 1])
# random walk dataset
d_rw = D.light_copy()

rw_log = logger.JSONLogger("ressources/random_walk.conf", mod="continue")
if (args.force_rw):
    key = args.dataset + "_" + str(args.context_size) + "_" + str(
        args.walk_lenght) + "_" + str(args.seed)
    if (key in rw_log):

        try:
            print('Loading random walks from files')
            d_rw = torch.load(rw_log[key]["file"])
            print('Loaded')
        except:
            os.makedirs("/local/gerald/KMEANS_RESULTS/", exist_ok=True)
            d_rw.set_walk(args.walk_lenght, 1.0)
            d_rw.set_path(True)
            d_rw = corpora.ContextCorpus(d_rw,
                                         context_size=args.context_size,
    "dblp": corpora.load_dblp,
    "books": corpora.load_books,
    "blogCatalog": corpora.load_blogCatalog
}

optimizer_dict = {
    "addhsgd": optimizer.PoincareBallSGDAdd,
    "exphsgd": optimizer.PoincareBallSGDExp,
    "hsgd": optimizer.PoincareBallSGD,
    "exphsga": optimizer.PoincareBallSGAExp
}

if (args.save):
    print("The following options are use for the current experiment ", args)
    os.makedirs("RESULTS/" + args.id + "/", exist_ok=True)
    logger_object = logger.JSONLogger("RESULTS/" + args.id + "/log.json")
    logger_object.append(vars(args))

# check if dataset exists

if (args.dataset not in dataset_dict):
    print("Dataset " + args.dataset +
          " does not exist, please select one of the following : ")
    print(list(dataset_dict.keys()))
    quit()

if (args.embedding_optimizer not in optimizer_dict):
    print("Optimizer " + args.embedding_optimizer +
          " does not exist, please select one of the following : ")
    print(list(optimizer_dict.keys()))
    quit()