Beispiel #1
0
def train_TransE(target_folder):
    con = Config()
    # Input training files from benchmarks/FB15K/ folder.
    con.set_in_path(target_folder+'/')
    con.set_log_on(1)  # set to 1 to print the loss

    con.set_work_threads(30)
    con.set_train_times(2000)
    con.set_nbatches(50)
    con.set_alpha(0.001)
    con.set_margin(1.0)
    con.set_bern(0)
    con.set_dimension(50)
    con.set_ent_neg_rate(1)
    con.set_rel_neg_rate(0)
    con.set_opt_method("SGD")

    # Models will be exported via tf.Saver() automatically.
    con.set_export_files(target_folder+"/model.vec.tf", steps=50)
    # Model parameters will be exported to json files automatically.
    con.set_out_files(target_folder + "/embedding.vec.json")
    # Initialize experimental settings.
    con.init()
    # Set the knowledge embedding model
    # print(con.get_parameter_lists())
    con.set_model(TransE)
    # Train the model.
    # print(con.get_parameter_lists())
    # print(con.get_parameters())
    con.run()

    # embeddings = con.get_parameters()
    # print(con.get_parameter_lists())
    # print(con.trainModel.ent_embeddings)
    # print(con.trainModel.rel_embeddings)
    # print(con.trainModel.parameter_lists)

    # # we need to convert the embedding to txt
    with open(target_folder + "/embedding.vec.json", "r") as f:
        dic = json.load(f)
    #
    # print('dic:')
    # print(embeddings)
    # for key in embeddings:
    #     print(key)
    #
    ent_embs, rel_embs = dic['ent_embeddings'], dic['rel_embeddings']

    with open(target_folder+'/entity_vector.json', 'w') as f:
        json.dump(ent_embs, f)

    with open(target_folder+'/relation_vector.json', 'w') as f:
        json.dump(rel_embs, f)
Beispiel #2
0
def run():

    opt_method = args.opt_method
    int_pretrain = args.pretrain
    if int_pretrain == 1:
        pretrain = True
    elif int_pretrain == 0:
        pretrain = False
    else:
        raise ValueError('arg "pretrain" must be 0 or 1')

    # Download and preprocess ConcepNet

    config = Config()
    config.set_in_path("./openke_data/")
    config.set_log_on(1)  # set to 1 to print the loss

    config.set_work_threads(30)
    config.set_train_times(1000)  # number of iterations
    config.set_nbatches(512)  # batch size
    config.set_alpha(0.001)  # learning rate

    config.set_bern(0)
    config.set_dimension(100)
    config.set_margin(1.0)
    config.set_ent_neg_rate(1)
    config.set_rel_neg_rate(0)
    config.set_opt_method(opt_method)
    '''revision starts'''
    config.set_pretrain(pretrain)

    # Save the graph embedding every {number} iterations

    # OUTPUT_PATH = "./openke_data/embs/glove_initialized/"
    if pretrain:
        OUTPUT_PATH = "./openke_data/embs/glove_initialized/glove."
    else:
        OUTPUT_PATH = "./openke_data/embs/xavier_initialized/"
    '''revision ends'''

    # Model parameters will be exported via torch.save() automatically.
    config.set_export_files(OUTPUT_PATH + "transe." + opt_method + ".tf",
                            steps=500)
    # Model parameters will be exported to json files automatically.
    # (Might cause IOError if the file is too large)
    config.set_out_files(OUTPUT_PATH + "transe." + opt_method + ".vec.json")

    print("Opt-method: %s" % opt_method)
    print("Pretrain: %d" % pretrain)
    config.init()
    config.set_model(models.TransE)

    print("Begin training TransE")

    config.run()
Beispiel #3
0
def init_predict(hs, ts, rs):
    '''
    # (1) Set import files and OpenKE will automatically load models via tf.Saver().
    con = Config()


    # con.set_in_path("OpenKE/benchmarks/FB15K/")
    con.set_in_path("openke_data/")
    # con.set_test_link_prediction(True)
    con.set_test_triple_classification(True)
    con.set_work_threads(8)
    con.set_dimension(100)


    # con.set_import_files("OpenKE/res/model.vec.tf")
    con.set_import_files("openke_data/embs/glove_initialized/glove.transe.SGD.pt")
    con.init()
    con.set_model(models.TransE)
    con.test()

    con.predict_triple(hs, ts, rs)

    # con.show_link_prediction(2,1)
    # con.show_triple_classification(2,1,3)
    '''

    # (2) Read model parameters from json files and manually load parameters.
    con = Config()
    con.set_in_path("./openke_data/")
    con.set_test_triple_classification(True)
    con.set_work_threads(8)
    con.set_dimension(100)
    con.init()
    con.set_model(models.TransE)
    f = open("./openke_data/embs/glove_initialized/glove.transe.SGD.vec.json",
             "r")
    content = json.loads(f.read())
    f.close()
    con.set_parameters(content)
    con.test()
def run():
    # Download and preprocess ConcepNet
    csv_path = download_concepnet()
    tuples_directory = csv_to_tuples(csv_path)

    config = Config()
    config.set_in_path(f'{tuples_directory}/')
    #config.set_in_path(r'/Users/ashishnagar/code/knowledge-enabled-textual-entailment/kg-embeddings/OpenKE/benchmarks/FB15K/')
    print(tuples_directory)
    config.set_log_on(1)  # set to 1 to print the loss

    config.set_work_threads(8)
    config.set_train_times(500)  # number of iterations
    config.set_nbatches(300)  # batch size
    config.set_alpha(0.001)  # learning rate

    config.set_bern(0)
    config.set_dimension(100)
    config.set_margin(1.0)
    config.set_ent_neg_rate(1)
    config.set_rel_neg_rate(0)
    config.set_opt_method("SGD")

    
    OUTPUT_PATH = APP_ROOT / "../data/embeddings/conceptnet/"
    if not OUTPUT_PATH.exists():
        OUTPUT_PATH.mkdir()

    OUTPUT_PATH = str(OUTPUT_PATH)
    # Model parameters will be exported via torch.save() automatically.
    config.set_export_files(OUTPUT_PATH + "/transh.pt")
    # Model parameters will be exported to json files automatically.
    # (Might cause IOError if the file is too large)
    config.set_out_files(OUTPUT_PATH + "/transh_embedding.vec.json")

    config.init()
    # Save the graph embedding every {number} iterations
    config.set_export_steps(20)

    config.set_model(models.TransH)

    logger.info("Begin training with {}".format(config.__dict__))

    config.run()
Beispiel #5
0
def run():

    opt_method = args.opt_method
    int_pretrain = args.pretrain
    if int_pretrain == 1:
        pretrain = True
    elif int_pretrain == 0:
        pretrain = False
    else:
        raise ValueError('arg "pretrain" must be 0 or 1')

    config = Config()
    config.set_in_path("../dataset/")
    config.set_log_on(1)  # set to 1 to print the loss

    config.set_work_threads(30)
    config.set_train_times(10000)  # number of iterations
    config.set_nbatches(512)  # batch size
    config.set_alpha(0.001)  # learning rate

    config.set_bern(0)
    config.set_dimension(100)
    config.set_margin(1.0)
    config.set_ent_neg_rate(1)
    config.set_rel_neg_rate(0)
    config.set_opt_method(opt_method)
    '''revision starts'''
    config.set_pretrain(pretrain)

    OUTPUT_PATH = "../dataset/emb_init/"
    '''revision ends'''

    # Model parameters will be exported via torch.save() automatically.
    config.set_export_files(OUTPUT_PATH + 'transe.' + opt_method + '.tf',
                            steps=500)
    # Model parameters will be exported to json files automatically.
    config.set_out_files(OUTPUT_PATH + "transe." + opt_method + ".vec.json")

    print("Opt-method: %s" % opt_method)
    print("Pretrain: %d" % pretrain)
    config.init()
    config.set_model(models.TransE)

    print("Begin training TransE")

    config.run()
def train(config_data):
    #os.environ['CUDA_VISIBLE_DEVICES']= config_data["gpu_number"]
    con = Config()

    if "use_gpu" in config_data.keys():
        if config_data["use_gpu"] == "True":
            con.set_use_gpu(True)
        else:
            con.set_use_gpu(False)
    else:
        con.set_use_gpu(False)
    con.set_in_path(config_data["input_dir"] + "/")
    con.set_work_threads(20)
    ## epoch
    con.set_train_times(int(config_data["epochs"]))
    ## batch size
    con.set_nbatches(int(config_data["batch_size"]))
    ## learning rate
    con.set_alpha(float(config_data["learning_rate"]))
    con.set_bern(0)
    ## embedding dimension
    con.set_dimension(int(config_data["embedding_dimension"]))
    con.set_margin(1.0)
    con.set_ent_neg_rate(1)
    con.set_rel_neg_rate(0)
    con.set_opt_method(config_data["optimizer"])
    con.set_save_steps(100)
    con.set_valid_steps(100)
    con.set_early_stopping_patience(10)
    con.set_checkpoint_dir("checkpoint")
    con.set_result_dir(config_data["output_dir"])
    con.set_test_link(True)
    con.set_test_triple(True)
    con.init()
    con.set_train_model(TransE)
    con.train()