def start(config: Optional[TextIO], connection: str, rebuild: bool, no_prompt_bio2bel: bool): """Start the BioKEEN training pipeline.""" import pykeen if config is not None: config = json.load(config) else: from .prompts import prompt_biokeen_config config = prompt_biokeen_config( connection=connection, rebuild=rebuild, do_prompt_bio2bel=(not no_prompt_bio2bel), ) config['pykeen-version'] = PYKEEN_VERSION config['biokeen-version'] = VERSION pykeen.run(config)
def execute_pipeline(self, config): """Test that ConvE is trained correctly in training mode.""" results = pykeen.run( config=config, output_directory=self.dir.name, ) return results
def handle(self, *args, **options): learning_rate = options['learning_rate'] num_epochs = options['num_epochs'] embedding_dim = options['embedding_dim'] batch_size = options['batch_size'] device = options['device'] testset_name = options['testset_name'] training_files = [join(TRAINING_SET_DIR, file) for file in os.listdir(TRAINING_SET_DIR + '/.') if (file) and ('nt' in splitext(file)[1] or 'tsv' in splitext(file)[1])] try: config = dict() config[pkc.TRAINING_SET_PATH] = training_files config[pkc.TEST_SET_PATH] = join(TEST_SET_DIR, testset_name + '.tsv') config[pkc.EXECUTION_MODE] = pkc.TRAINING_MODE config[pkc.KG_EMBEDDING_MODEL_NAME] = pkc.TRANS_E_NAME config[pkc.SEED] = 0 config[pkc.LEARNING_RATE] = float(learning_rate) if learning_rate else 0.01 config[pkc.NUM_EPOCHS] = int(num_epochs) if num_epochs else 10 config[pkc.BATCH_SIZE] = int(batch_size) if batch_size else 64 config[pkc.PREFERRED_DEVICE] = device if device else pkc.GPU config[pkc.EMBEDDING_DIM] = int(embedding_dim) if embedding_dim else 50 config[pkc.SCORING_FUNCTION_NORM] = 1 # corresponds to L1 config[pkc.NORM_FOR_NORMALIZATION_OF_ENTITIES] = 2 # corresponds to L2 config[pkc.MARGIN_LOSS] = 1 # corresponds to L1 config[pkc.FILTER_NEG_TRIPLES] = True logger.info("Starting training dataset with settings:" + str(config)) out_dir = join(KGE_DIR, pkc.TRANS_E_NAME + '-' + testset_name + '-' + str(datetime.date.today())) os.makedirs(out_dir, exist_ok=True) os.chdir(out_dir) results = pykeen.run( config=config, output_directory=out_dir, ) print('Keys:', *sorted(results.results.keys()), sep='\n ') logger.info(results.trained_model) logger.info(results.losses) self.generate_bio2vec_frmt(out_dir) except Exception as e: logger.exception("message") except RuntimeError: logger.exception("message")
def rm_main(): config = dict( training_set_path = '%{FolderPath}/%{DatasetFinalName}.tsv', execution_mode = '%{execution_mode}', kg_embedding_model_name = '%{kg_embedding_model_name}', embedding_dim = %{embedding_dim}, normalization_of_entities = %{normalization_of_entities}, # corresponds to L2 scoring_function = %{scoring_function}, # corresponds to L1 margin_loss = %{margin_loss}, learning_rate = %{learning_rate}, batch_size = %{batch_size}, num_epochs = %{num_epochs}, test_set_ratio = %{test_set_ratio}, filter_negative_triples = %{filter_negative_triples}, random_seed = %{random_seed}, preferred_device = '%{preferred_device}', maximum_number_of_hpo_iters = %{maximum_number_of_hpo_iters}, ) results = pykeen.run( config=config, output_directory="%{ResultDirectory}", ) # Create output files with open(r"%{ResultDirectory}/%{DatasetFinalName}_trained_model.txt", "w+") as res: res.write(str(results.trained_model)+"\n") # Create output files with open(r"%{ResultDirectory}/%{DatasetFinalName}_losses.txt", "w+") as res: res.write(str(results.losses)+"\n") # Create output files with open(r"%{ResultDirectory}/%{DatasetFinalName}_evaluation_summary.txt", "w+") as res: res.write(str(results.evaluation_summary)+"\n")
normalization_of_entities), # corresponds to L2 scoring_function=float(scoring_function), # corresponds to L1 margin_loss=float(margin_loss), learning_rate=float(learning_rate), batch_size=int(batch_size), num_epochs=int(num_epochs), test_set_ratio=float(test_set_ratio), filter_negative_triples=bool(filter_negative_triples), random_seed=int(random_seed), preferred_device='gpu', maximum_number_of_hpo_iters=int(maximum_number_of_hpo_iters), ) # Execute kg_embedding results = pykeen.run( config=config, output_directory=path, ) """ # Create output files with open(path+"_trained_model.txt", "w+") as res: res.write(str(results.trained_model)+"\n") # Create output files with open(path+"_losses.txt", "w+") as res: res.write(str(results.losses)+"\n") # Create output files with open(path+"_evaluation_summary.txt", "w+") as res: res.write(str(results.evaluation_summary)+"\n") """
config = dict( training_set_path=args.rdf_file, execution_mode='Training_mode', random_seed=int(args.random_seed), kg_embedding_model_name=args.embedding_model_name, embedding_dim=int(args.embedding_dim), scoring_function=int(args.scoring_function), # corresponds to L1 normalization_of_entities=int( args.norm_of_entities), # corresponds to L2 margin_loss=float(args.margin_loss), learning_rate=float(args.learning_rate), num_epochs=int(args.num_epochs), batch_size=int(args.batch_size), filter_negative_triples=True, preferred_device=args.preferred_device) if config['kg_embedding_model_name'] in ['TransD', 'TransR']: config['relation_embedding_dim'] = 20 if config['kg_embedding_model_name'] == 'TransH': config['weighting_soft_constraint'] = 0.015625 print(args.out_folder) print(config) results = pykeen.run( config=config, output_directory=args.out_folder, ) print("Embedding learning is complete")