Esempio n. 1
0
def start(config: Optional[TextIO], connection: str, rebuild: bool, no_prompt_bio2bel: bool):
    """Start the BioKEEN training pipeline."""
    import pykeen

    if config is not None:
        config = json.load(config)
    else:
        from .prompts import prompt_biokeen_config
        config = prompt_biokeen_config(
            connection=connection,
            rebuild=rebuild,
            do_prompt_bio2bel=(not no_prompt_bio2bel),
        )

    config['pykeen-version'] = PYKEEN_VERSION
    config['biokeen-version'] = VERSION
    pykeen.run(config)
Esempio n. 2
0
    def execute_pipeline(self, config):
        """Test that ConvE is trained correctly in training mode."""
        results = pykeen.run(
            config=config,
            output_directory=self.dir.name,
        )

        return results
Esempio n. 3
0
    def handle(self, *args, **options):
        learning_rate = options['learning_rate']
        num_epochs = options['num_epochs']
        embedding_dim = options['embedding_dim']
        batch_size = options['batch_size']
        device = options['device']
        testset_name = options['testset_name']

        training_files = [join(TRAINING_SET_DIR, file) for file in os.listdir(TRAINING_SET_DIR + '/.') if (file) and ('nt' in splitext(file)[1] or 'tsv' in splitext(file)[1])]
        try: 
            config = dict()
            config[pkc.TRAINING_SET_PATH] = training_files
            config[pkc.TEST_SET_PATH] = join(TEST_SET_DIR, testset_name + '.tsv')
            config[pkc.EXECUTION_MODE] = pkc.TRAINING_MODE
            config[pkc.KG_EMBEDDING_MODEL_NAME] = pkc.TRANS_E_NAME
            config[pkc.SEED] = 0
            config[pkc.LEARNING_RATE] = float(learning_rate) if learning_rate else 0.01
            config[pkc.NUM_EPOCHS] = int(num_epochs) if num_epochs else 10
            config[pkc.BATCH_SIZE] = int(batch_size) if batch_size else 64
            config[pkc.PREFERRED_DEVICE] = device if device else pkc.GPU
            config[pkc.EMBEDDING_DIM] = int(embedding_dim) if embedding_dim else 50
            config[pkc.SCORING_FUNCTION_NORM] = 1  # corresponds to L1
            config[pkc.NORM_FOR_NORMALIZATION_OF_ENTITIES] = 2  # corresponds to L2
            config[pkc.MARGIN_LOSS] = 1  # corresponds to L1
            config[pkc.FILTER_NEG_TRIPLES] = True
            
            logger.info("Starting training dataset with settings:" + str(config))
            
            out_dir = join(KGE_DIR, pkc.TRANS_E_NAME + '-' + testset_name + '-' + str(datetime.date.today()))
            os.makedirs(out_dir, exist_ok=True)
            os.chdir(out_dir)
            results = pykeen.run(
                config=config,
                output_directory=out_dir,
            )

            print('Keys:', *sorted(results.results.keys()), sep='\n  ')
            logger.info(results.trained_model)
            logger.info(results.losses)

            self.generate_bio2vec_frmt(out_dir)
                    
        except Exception as e:
            logger.exception("message")
        except RuntimeError:
            logger.exception("message")
Esempio n. 4
0
def rm_main():

	config = dict(
	    training_set_path           = '%{FolderPath}/%{DatasetFinalName}.tsv',
	    execution_mode              = '%{execution_mode}',
	    kg_embedding_model_name     = '%{kg_embedding_model_name}',
	    embedding_dim               = %{embedding_dim},
	    normalization_of_entities   = %{normalization_of_entities},  # corresponds to L2
	    scoring_function            = %{scoring_function},  # corresponds to L1
	    margin_loss                 = %{margin_loss},
	    learning_rate               = %{learning_rate},
	    batch_size                  = %{batch_size},
	    num_epochs                  = %{num_epochs},
	    test_set_ratio              = %{test_set_ratio},
	    filter_negative_triples     = %{filter_negative_triples},
	    random_seed                 = %{random_seed},
	    preferred_device            = '%{preferred_device}',
	    maximum_number_of_hpo_iters = %{maximum_number_of_hpo_iters},
	)

	results = pykeen.run(
	    config=config,
	    output_directory="%{ResultDirectory}",
	)

	# Create output files
	with open(r"%{ResultDirectory}/%{DatasetFinalName}_trained_model.txt", "w+") as res:
		res.write(str(results.trained_model)+"\n")

	# Create output files
	with  open(r"%{ResultDirectory}/%{DatasetFinalName}_losses.txt", "w+") as res:
		res.write(str(results.losses)+"\n") 	

	# Create output files
	with open(r"%{ResultDirectory}/%{DatasetFinalName}_evaluation_summary.txt", "w+") as res:
		res.write(str(results.evaluation_summary)+"\n") 	
Esempio n. 5
0
        normalization_of_entities),  # corresponds to L2
    scoring_function=float(scoring_function),  # corresponds to L1
    margin_loss=float(margin_loss),
    learning_rate=float(learning_rate),
    batch_size=int(batch_size),
    num_epochs=int(num_epochs),
    test_set_ratio=float(test_set_ratio),
    filter_negative_triples=bool(filter_negative_triples),
    random_seed=int(random_seed),
    preferred_device='gpu',
    maximum_number_of_hpo_iters=int(maximum_number_of_hpo_iters),
)

# Execute kg_embedding
results = pykeen.run(
    config=config,
    output_directory=path,
)
"""
# Create output files
with open(path+"_trained_model.txt", "w+") as res:
	res.write(str(results.trained_model)+"\n")

# Create output files
with  open(path+"_losses.txt", "w+") as res:
	res.write(str(results.losses)+"\n") 	

# Create output files
with open(path+"_evaluation_summary.txt", "w+") as res:
	res.write(str(results.evaluation_summary)+"\n")
"""
Esempio n. 6
0
    config = dict(
        training_set_path=args.rdf_file,
        execution_mode='Training_mode',
        random_seed=int(args.random_seed),
        kg_embedding_model_name=args.embedding_model_name,
        embedding_dim=int(args.embedding_dim),
        scoring_function=int(args.scoring_function),  # corresponds to L1
        normalization_of_entities=int(
            args.norm_of_entities),  # corresponds to L2
        margin_loss=float(args.margin_loss),
        learning_rate=float(args.learning_rate),
        num_epochs=int(args.num_epochs),
        batch_size=int(args.batch_size),
        filter_negative_triples=True,
        preferred_device=args.preferred_device)
    if config['kg_embedding_model_name'] in ['TransD', 'TransR']:
        config['relation_embedding_dim'] = 20

    if config['kg_embedding_model_name'] == 'TransH':
        config['weighting_soft_constraint'] = 0.015625

    print(args.out_folder)
    print(config)

    results = pykeen.run(
        config=config,
        output_directory=args.out_folder,
    )
    print("Embedding learning is complete")