Ejemplo n.º 1
0
 def test_hash_update(self):
     hasher = Hasher()
     for x in ["hello", Foo("hello")]:
         hasher.update(x)
     hash1 = hasher.hexdigest()
     hasher = Hasher()
     for x in ["hello", Foo("hello")]:
         hasher.update(x)
     hash2 = hasher.hexdigest()
     hasher = Hasher()
     for x in ["there", Foo("there")]:
         hasher.update(x)
     hash3 = hasher.hexdigest()
     self.assertEqual(hash1, hash2)
     self.assertNotEqual(hash1, hash3)
Ejemplo n.º 2
0
def test_dependency_on_dill():
    # AttributeError: module 'dill._dill' has no attribute 'stack'
    hasher = Hasher()
    hasher.update(lambda x: x)
Ejemplo n.º 3
0
    def _create_fingerprint_for_instance_list(self, pipeline: "Pipeline") -> str:
        """Create a fingerprint for the instance list

        The fingerprint is based on:
        - the fingerprint of the previous dataset
        - the tokenizer config
        - the indexer config of the features
        - the biome__version__, allennlp__version__ and spaCy__version__ just to be completely sure!

        Parameters
        ----------
        pipeline
            Pipeline with the tokenizer and indexer config of the features

        Returns
        -------
        fingerprint
            String of hexadecimal digits
        """
        hasher = Hasher()
        hasher.update(self.dataset._fingerprint)  # necessary evil ...
        hasher.update(vars(pipeline.backbone.tokenizer.config))
        for feature in pipeline.config.features:
            hasher.update(feature.config["indexer"])
        hasher.update(biome__version__)
        hasher.update(allennlp__version__)
        hasher.update(spacy__version__)

        return hasher.hexdigest()