def test_predict(root, rows_csv, encoding, **unused): COUNT = 10 with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): with loom.query.get_server(root, debug=True) as query_server: result_out = 'predictions_out.csv' rows_in = os.listdir(rows_csv)[0] rows_in = os.path.join(rows_csv, rows_in) encoders = json_load(encoding) name_to_encoder = {e['name']: load_encoder(e) for e in encoders} preql = loom.preql.PreQL(query_server, encoding) preql.predict(rows_in, COUNT, result_out, id_offset=False) with open_compressed(rows_in, 'rb') as fin: with open(result_out, 'r') as fout: in_reader = csv.reader(fin) out_reader = csv.reader(fout) fnames = in_reader.next() out_reader.next() for in_row in in_reader: for i in range(COUNT): out_row = out_reader.next() bundle = zip(fnames, in_row, out_row) for name, in_val, out_val in bundle: encode = name_to_encoder[name] observed = bool(in_val.strip()) if observed: assert_almost_equal( encode(in_val), encode(out_val)) else: assert_true(bool(out_val.strip()))
def test_predict(root, rows_csv, encoding, **unused): with tempdir(cleanup_on_error=CLEANUP_ON_ERROR): with loom.preql.get_server(root, debug=True) as preql: result_out = 'predictions_out.csv' rows_in = os.listdir(rows_csv)[0] rows_in = os.path.join(rows_csv, rows_in) preql.predict(rows_in, COUNT, result_out, id_offset=True) print 'DEBUG', open_compressed(rows_in).read() print 'DEBUG', open_compressed(result_out).read() _check_predictions(rows_in, result_out, encoding)
def test_predict_pandas(root, rows_csv, schema, **unused): feature_count = len(json_load(schema)) with loom.preql.get_server(root, debug=True) as preql: rows_filename = os.path.join(rows_csv, os.listdir(rows_csv)[0]) with open_compressed(rows_filename) as f: rows_df = pandas.read_csv(f, converters=preql.converters, index_col='_id') print 'rows_df =' print rows_df row_count = rows_df.shape[0] assert_equal(rows_df.shape[1], feature_count) rows_io = StringIO(rows_df.to_csv()) result_string = preql.predict(rows_io, COUNT, id_offset=True) result_df = pandas.read_csv(StringIO(result_string), index_col=False) print 'result_df =' print result_df assert_equal(result_df.ndim, 2) assert_equal(result_df.shape[0], row_count * COUNT) assert_equal(result_df.shape[1], 1 + feature_count)
def test_predict_pandas(root, rows_csv, schema, **unused): feature_count = len(json_load(schema)) with loom.preql.get_server(root, debug=True) as preql: rows_filename = os.path.join(rows_csv, os.listdir(rows_csv)[0]) with open_compressed(rows_filename) as f: rows_df = pandas.read_csv( f, converters=preql.converters, index_col='_id') print 'rows_df =' print rows_df row_count = rows_df.shape[0] assert_equal(rows_df.shape[1], feature_count) rows_io = StringIO(rows_df.to_csv()) result_string = preql.predict(rows_io, COUNT, id_offset=True) result_df = pandas.read_csv(StringIO(result_string), index_col=False) print 'result_df =' print result_df assert_equal(result_df.ndim, 2) assert_equal(result_df.shape[0], row_count * COUNT) assert_equal(result_df.shape[1], 1 + feature_count)