Esempio n. 1
0
    def test_custom_tf(self):
        """Test using a custom triples factories with HPO.

        .. seealso:: https://github.com/pykeen/pykeen/issues/230
        """
        tf = TriplesFactory.from_path(path=NATIONS_TRAIN_PATH)
        training, testing, validation = tf.split([.8, .1, .1], random_state=0)

        hpo_pipeline_result = hpo_pipeline(
            training=training,
            testing=testing,
            validation=validation,
            model='TransE',
            n_trials=2,
            training_kwargs=dict(num_epochs=2),
        )

        with tempfile.TemporaryDirectory() as directory:
            hpo_pipeline_result.save_to_directory(directory)
Esempio n. 2
0
    def test_custom_tf_object(self):
        """Test using a custom triples factories with HPO.

        .. seealso:: https://github.com/pykeen/pykeen/issues/230
        """
        tf = TriplesFactory.from_path(path=NATIONS_TRAIN_PATH)
        training, testing, validation = tf.split([.8, .1, .1], random_state=0)

        hpo_pipeline_result = self._help_test_hpo(
            study_name='HPO with custom triples factories',
            training=training,
            testing=testing,
            validation=validation,
        )
        self.assertNotIn('dataset', hpo_pipeline_result.study.user_attrs)
        # Since there's no source path information, these shouldn't be
        # added, even if it might be possible to infer path information
        # from the triples factories
        self.assertNotIn('training', hpo_pipeline_result.study.user_attrs)
        self.assertNotIn('testing', hpo_pipeline_result.study.user_attrs)
        self.assertNotIn('validation', hpo_pipeline_result.study.user_attrs)
Esempio n. 3
0
from pykeen.triples import TriplesFactory
from pykeen.evaluation import RankBasedEvaluator
from pykeen.pipeline import pipeline
import json

n_tokeep = 300
minimum = 500

tf = TriplesFactory.from_path(f'data/rare/rare_{minimum}_{n_tokeep}.csv')
training, testing = tf.split([.8, .2])

result_pipeline = pipeline(
    training=training,
    testing=testing,
    model='RESCAL',
    model_kwargs=dict(embedding_dim=300),
    training_kwargs=dict(  #sampler="schlichtkrull",
        # checkpoint_name='RGCN_checkpointt.pt',
        # checkpoint_frequency=5,
        num_epochs=200  #,
        #batch_size=128
    ),
    evaluator=RankBasedEvaluator,
    evaluator_kwargs=dict(ks=[50]))
result_pipeline.plot_losses()

result_pipeline.plot()