Esempio n. 1
0
def hash_columns(ds, hashed_columns):
    if len(hashed_columns) == 0:
        return ds
    assert all(col < len(ds.column_names) for col in hashed_columns)
    for row in ds.rows:
        for col in hashed_columns:
            val = row.values[col]
            if val.getType() not in [Value.NVAL, Value.__EMPTY__]:
                row.values[col] = Value(iVal=murmurhash2(val))
    return ds
Esempio n. 2
0
def hash_columns(ds, hashed_columns):
    if len(hashed_columns) == 0:
        return ds
    for col in hashed_columns:
        assert col < len(ds.column_names), "The hashed column should in range."
    for row in ds.rows:
        for col in hashed_columns:
            if row.values[col].getType() != Value.NVAL and row.values[col].getType() != Value.__EMPTY__:
                row.values[col] = Value(iVal = murmurhash2(row.values[col]))
    return ds