Beispiel #1
0
def hasher_spherical(row_cols, params):
    features = []
    for row, columns in row_cols:
        cur_feature = msgpack.loads(columns['feature'])
        features.append(np.array(cur_feature[0]))
    features = np.asfarray(features)
    out = picarus_takeout.spherical_hasher_train(features, params['num_pivots'], params['eps_m'], params['eps_s'], params['max_iters'])
    out = {'pivots': out['pivots'].ravel().tolist(),
           'threshs': out['threshs'].tolist()}
    model_link = {'name': 'picarus.SphericalHasher', 'kw': out}
    return 'feature', 'hash', model_link
Beispiel #2
0
def hasher_spherical(row_cols, params):
    features = []
    for row, columns in row_cols:
        cur_feature = msgpack.loads(columns['feature'])
        features.append(np.array(cur_feature[0]))
    features = np.asfarray(features)
    out = picarus_takeout.spherical_hasher_train(features,
                                                 params['num_pivots'],
                                                 params['eps_m'],
                                                 params['eps_s'],
                                                 params['max_iters'])
    out = {
        'pivots': out['pivots'].ravel().tolist(),
        'threshs': out['threshs'].tolist()
    }
    model_link = {'name': 'picarus.SphericalHasher', 'kw': out}
    return 'feature', 'hash', model_link
Beispiel #3
0
def hasher_spherical(queue, params, inputs, schema, start_stop_rows, table, owner):
    thrift, manager, slices, inputsb64 = _setup(start_stop_rows, inputs)
    features = []
    for start_row, stop_row in start_stop_rows:
        row_cols = hadoopy_hbase.scanner(thrift, table,
                                         columns=[inputs['feature']],
                                         start_row=start_row, stop_row=stop_row)
        for row, cols in row_cols:
            cur_feature = msgpack.loads(cols[inputs['feature']])
            features.append(np.array(cur_feature[0]))
    print('num_features[%d]' % len(features))
    features = np.asfarray(features)
    out = picarus_takeout.spherical_hasher_train(features, params['num_pivots'], params['eps_m'], params['eps_s'], params['max_iters'])
    out = {'pivots': out['pivots'].ravel().tolist(),
           'threshs': out['threshs'].tolist()}
    #out = picarus.modules.spherical_hash.train_takeout(features, params['num_pivots'], params['eps_m'], params['eps_s'], params['max_iters'])
    factory_info = {'slices': slices, 'num_features': len(features), 'data': 'slices', 'params': params, 'inputs': inputsb64}
    model_link = {'name': 'picarus.SphericalHasher', 'kw': out}
    model_chain = tables._takeout_model_chain_from_key(manager, inputs['feature']) + [model_link]
    queue.put(manager.input_model_param_to_key(**{'input': inputs['feature'], 'model_link': model_link, 'model_chain': model_chain, 'input_type': 'feature',
                                                  'output_type': 'hash', 'email': owner, 'name': manager.model_to_name(model_link),
                                                  'factory_info': json.dumps(factory_info)}))