Esempio n. 1
0
 def row_feature_hasher(row):
     hash_counts = collections.defaultdict(int)
     for column in self.columns:
         hashed_value = simple_hash(row[column], self.num_features)
         hash_counts[hashed_value] = hash_counts[hashed_value] + 1
     return {
         f"hash_{joined_columns}_{i}": hash_counts[i]
         for i in range(self.num_features)
     }
Esempio n. 2
0
def test_simple_hash():
    # Tests simple_hash determinism.
    assert simple_hash(1, 100) == 83
    assert simple_hash("a", 100) == 52
    assert simple_hash("banana", 100) == 16
    assert simple_hash([1, 2, "apple"], 100) == 37
Esempio n. 3
0
 def hash_count(tokens: List[str]) -> Counter:
     hashed_tokens = [simple_hash(token, self.num_features) for token in tokens]
     return Counter(hashed_tokens)