예제 #1
0
def test_filename_friendly_hash():
    data = {
        'stuff': 'stuff',
        'other_stuff': 'more_stuff',
        'a_datetime': datetime.datetime(2015, 1, 1),
        'a_date': datetime.date(2016, 1, 1),
        'a_number': 5.0
    }
    output = filename_friendly_hash(data)
    assert isinstance(output, str)
    assert re.match('^[\w]+$', output) is not None

    # make sure ordering keys differently doesn't change the hash
    new_output = filename_friendly_hash({
        'other_stuff':
        'more_stuff',
        'stuff':
        'stuff',
        'a_datetime':
        datetime.datetime(2015, 1, 1),
        'a_date':
        datetime.date(2016, 1, 1),
        'a_number':
        5.0
    })
    assert new_output == output

    # make sure new data hashes to something different
    new_output = filename_friendly_hash({'stuff': 'stuff', 'a_number': 5.0})
    assert new_output != output
예제 #2
0
def test_filename_friendly_hash_stability():
    nested_data = {'one': 'two', 'three': {'four': 'five', 'six': 'seven'}}
    output = filename_friendly_hash(nested_data)
    # 1. we want to make sure this is stable across different runs
    # so hardcode an expected value
    assert output == '9a844a7ebbfd821010b1c2c13f7391e6'
    other_nested_data = {
        'one': 'two',
        'three': {
            'six': 'seven',
            'four': 'five'
        }
    }
    new_output = filename_friendly_hash(other_nested_data)
    assert output == new_output
예제 #3
0
    def _model_hash(self, matrix_metadata, class_path, parameters):
        """Generates a unique identifier for a trained model
        based on attributes of the model that together define
        equivalence; in other words, if we train a second model with these
        same attributes there would be no reason to keep the old one)

        Args:
        class_path (string): a full class path for the classifier
        parameters (dict): all hyperparameters to be passed to the classifier

        Returns: (string) a unique identifier
        """
        unique = {
            'className': class_path,
            'parameters': parameters,
            'project_path': self.project_path,
            'training_metadata': matrix_metadata
        }
        return filename_friendly_hash(unique)