def train_TransE(target_folder): con = Config() # Input training files from benchmarks/FB15K/ folder. con.set_in_path(target_folder+'/') con.set_log_on(1) # set to 1 to print the loss con.set_work_threads(30) con.set_train_times(2000) con.set_nbatches(50) con.set_alpha(0.001) con.set_margin(1.0) con.set_bern(0) con.set_dimension(50) con.set_ent_neg_rate(1) con.set_rel_neg_rate(0) con.set_opt_method("SGD") # Models will be exported via tf.Saver() automatically. con.set_export_files(target_folder+"/model.vec.tf", steps=50) # Model parameters will be exported to json files automatically. con.set_out_files(target_folder + "/embedding.vec.json") # Initialize experimental settings. con.init() # Set the knowledge embedding model # print(con.get_parameter_lists()) con.set_model(TransE) # Train the model. # print(con.get_parameter_lists()) # print(con.get_parameters()) con.run() # embeddings = con.get_parameters() # print(con.get_parameter_lists()) # print(con.trainModel.ent_embeddings) # print(con.trainModel.rel_embeddings) # print(con.trainModel.parameter_lists) # # we need to convert the embedding to txt with open(target_folder + "/embedding.vec.json", "r") as f: dic = json.load(f) # # print('dic:') # print(embeddings) # for key in embeddings: # print(key) # ent_embs, rel_embs = dic['ent_embeddings'], dic['rel_embeddings'] with open(target_folder+'/entity_vector.json', 'w') as f: json.dump(ent_embs, f) with open(target_folder+'/relation_vector.json', 'w') as f: json.dump(rel_embs, f)
def run(): opt_method = args.opt_method int_pretrain = args.pretrain if int_pretrain == 1: pretrain = True elif int_pretrain == 0: pretrain = False else: raise ValueError('arg "pretrain" must be 0 or 1') # Download and preprocess ConcepNet config = Config() config.set_in_path("./openke_data/") config.set_log_on(1) # set to 1 to print the loss config.set_work_threads(30) config.set_train_times(1000) # number of iterations config.set_nbatches(512) # batch size config.set_alpha(0.001) # learning rate config.set_bern(0) config.set_dimension(100) config.set_margin(1.0) config.set_ent_neg_rate(1) config.set_rel_neg_rate(0) config.set_opt_method(opt_method) '''revision starts''' config.set_pretrain(pretrain) # Save the graph embedding every {number} iterations # OUTPUT_PATH = "./openke_data/embs/glove_initialized/" if pretrain: OUTPUT_PATH = "./openke_data/embs/glove_initialized/glove." else: OUTPUT_PATH = "./openke_data/embs/xavier_initialized/" '''revision ends''' # Model parameters will be exported via torch.save() automatically. config.set_export_files(OUTPUT_PATH + "transe." + opt_method + ".tf", steps=500) # Model parameters will be exported to json files automatically. # (Might cause IOError if the file is too large) config.set_out_files(OUTPUT_PATH + "transe." + opt_method + ".vec.json") print("Opt-method: %s" % opt_method) print("Pretrain: %d" % pretrain) config.init() config.set_model(models.TransE) print("Begin training TransE") config.run()
def init_predict(hs, ts, rs): ''' # (1) Set import files and OpenKE will automatically load models via tf.Saver(). con = Config() # con.set_in_path("OpenKE/benchmarks/FB15K/") con.set_in_path("openke_data/") # con.set_test_link_prediction(True) con.set_test_triple_classification(True) con.set_work_threads(8) con.set_dimension(100) # con.set_import_files("OpenKE/res/model.vec.tf") con.set_import_files("openke_data/embs/glove_initialized/glove.transe.SGD.pt") con.init() con.set_model(models.TransE) con.test() con.predict_triple(hs, ts, rs) # con.show_link_prediction(2,1) # con.show_triple_classification(2,1,3) ''' # (2) Read model parameters from json files and manually load parameters. con = Config() con.set_in_path("./openke_data/") con.set_test_triple_classification(True) con.set_work_threads(8) con.set_dimension(100) con.init() con.set_model(models.TransE) f = open("./openke_data/embs/glove_initialized/glove.transe.SGD.vec.json", "r") content = json.loads(f.read()) f.close() con.set_parameters(content) con.test()
def run(): # Download and preprocess ConcepNet csv_path = download_concepnet() tuples_directory = csv_to_tuples(csv_path) config = Config() config.set_in_path(f'{tuples_directory}/') #config.set_in_path(r'/Users/ashishnagar/code/knowledge-enabled-textual-entailment/kg-embeddings/OpenKE/benchmarks/FB15K/') print(tuples_directory) config.set_log_on(1) # set to 1 to print the loss config.set_work_threads(8) config.set_train_times(500) # number of iterations config.set_nbatches(300) # batch size config.set_alpha(0.001) # learning rate config.set_bern(0) config.set_dimension(100) config.set_margin(1.0) config.set_ent_neg_rate(1) config.set_rel_neg_rate(0) config.set_opt_method("SGD") OUTPUT_PATH = APP_ROOT / "../data/embeddings/conceptnet/" if not OUTPUT_PATH.exists(): OUTPUT_PATH.mkdir() OUTPUT_PATH = str(OUTPUT_PATH) # Model parameters will be exported via torch.save() automatically. config.set_export_files(OUTPUT_PATH + "/transh.pt") # Model parameters will be exported to json files automatically. # (Might cause IOError if the file is too large) config.set_out_files(OUTPUT_PATH + "/transh_embedding.vec.json") config.init() # Save the graph embedding every {number} iterations config.set_export_steps(20) config.set_model(models.TransH) logger.info("Begin training with {}".format(config.__dict__)) config.run()
def run(): opt_method = args.opt_method int_pretrain = args.pretrain if int_pretrain == 1: pretrain = True elif int_pretrain == 0: pretrain = False else: raise ValueError('arg "pretrain" must be 0 or 1') config = Config() config.set_in_path("../dataset/") config.set_log_on(1) # set to 1 to print the loss config.set_work_threads(30) config.set_train_times(10000) # number of iterations config.set_nbatches(512) # batch size config.set_alpha(0.001) # learning rate config.set_bern(0) config.set_dimension(100) config.set_margin(1.0) config.set_ent_neg_rate(1) config.set_rel_neg_rate(0) config.set_opt_method(opt_method) '''revision starts''' config.set_pretrain(pretrain) OUTPUT_PATH = "../dataset/emb_init/" '''revision ends''' # Model parameters will be exported via torch.save() automatically. config.set_export_files(OUTPUT_PATH + 'transe.' + opt_method + '.tf', steps=500) # Model parameters will be exported to json files automatically. config.set_out_files(OUTPUT_PATH + "transe." + opt_method + ".vec.json") print("Opt-method: %s" % opt_method) print("Pretrain: %d" % pretrain) config.init() config.set_model(models.TransE) print("Begin training TransE") config.run()
def train(config_data): #os.environ['CUDA_VISIBLE_DEVICES']= config_data["gpu_number"] con = Config() if "use_gpu" in config_data.keys(): if config_data["use_gpu"] == "True": con.set_use_gpu(True) else: con.set_use_gpu(False) else: con.set_use_gpu(False) con.set_in_path(config_data["input_dir"] + "/") con.set_work_threads(20) ## epoch con.set_train_times(int(config_data["epochs"])) ## batch size con.set_nbatches(int(config_data["batch_size"])) ## learning rate con.set_alpha(float(config_data["learning_rate"])) con.set_bern(0) ## embedding dimension con.set_dimension(int(config_data["embedding_dimension"])) con.set_margin(1.0) con.set_ent_neg_rate(1) con.set_rel_neg_rate(0) con.set_opt_method(config_data["optimizer"]) con.set_save_steps(100) con.set_valid_steps(100) con.set_early_stopping_patience(10) con.set_checkpoint_dir("checkpoint") con.set_result_dir(config_data["output_dir"]) con.set_test_link(True) con.set_test_triple(True) con.init() con.set_train_model(TransE) con.train()