Exemple #1
0
def test_predict(root, rows_csv, encoding, **unused):
    COUNT = 10
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        with loom.query.get_server(root, debug=True) as query_server:
            result_out = 'predictions_out.csv'
            rows_in = os.listdir(rows_csv)[0]
            rows_in = os.path.join(rows_csv, rows_in)
            encoders = json_load(encoding)
            name_to_encoder = {e['name']: load_encoder(e) for e in encoders}
            preql = loom.preql.PreQL(query_server, encoding)
            preql.predict(rows_in, COUNT, result_out, id_offset=False)
            with open_compressed(rows_in, 'rb') as fin:
                with open(result_out, 'r') as fout:
                    in_reader = csv.reader(fin)
                    out_reader = csv.reader(fout)
                    fnames = in_reader.next()
                    out_reader.next()
                    for in_row in in_reader:
                        for i in range(COUNT):
                            out_row = out_reader.next()
                            bundle = zip(fnames, in_row, out_row)
                            for name, in_val, out_val in bundle:
                                encode = name_to_encoder[name]
                                observed = bool(in_val.strip())
                                if observed:
                                    assert_almost_equal(
                                        encode(in_val),
                                        encode(out_val))
                                else:
                                    assert_true(bool(out_val.strip()))
Exemple #2
0
def test_predict(root, rows_csv, encoding, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        with loom.preql.get_server(root, debug=True) as preql:
            result_out = 'predictions_out.csv'
            rows_in = os.listdir(rows_csv)[0]
            rows_in = os.path.join(rows_csv, rows_in)
            preql.predict(rows_in, COUNT, result_out, id_offset=True)
            print 'DEBUG', open_compressed(rows_in).read()
            print 'DEBUG', open_compressed(result_out).read()
            _check_predictions(rows_in, result_out, encoding)
Exemple #3
0
def test_predict(root, rows_csv, encoding, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        with loom.preql.get_server(root, debug=True) as preql:
            result_out = 'predictions_out.csv'
            rows_in = os.listdir(rows_csv)[0]
            rows_in = os.path.join(rows_csv, rows_in)
            preql.predict(rows_in, COUNT, result_out, id_offset=True)
            print 'DEBUG', open_compressed(rows_in).read()
            print 'DEBUG', open_compressed(result_out).read()
            _check_predictions(rows_in, result_out, encoding)
Exemple #4
0
def test_predict_pandas(root, rows_csv, schema, **unused):
    feature_count = len(json_load(schema))
    with loom.preql.get_server(root, debug=True) as preql:
        rows_filename = os.path.join(rows_csv, os.listdir(rows_csv)[0])
        with open_compressed(rows_filename) as f:
            rows_df = pandas.read_csv(f,
                                      converters=preql.converters,
                                      index_col='_id')
        print 'rows_df ='
        print rows_df
        row_count = rows_df.shape[0]
        assert_equal(rows_df.shape[1], feature_count)
        rows_io = StringIO(rows_df.to_csv())
        result_string = preql.predict(rows_io, COUNT, id_offset=True)
        result_df = pandas.read_csv(StringIO(result_string), index_col=False)
        print 'result_df ='
        print result_df
        assert_equal(result_df.ndim, 2)
        assert_equal(result_df.shape[0], row_count * COUNT)
        assert_equal(result_df.shape[1], 1 + feature_count)
Exemple #5
0
def test_predict_pandas(root, rows_csv, schema, **unused):
    feature_count = len(json_load(schema))
    with loom.preql.get_server(root, debug=True) as preql:
        rows_filename = os.path.join(rows_csv, os.listdir(rows_csv)[0])
        with open_compressed(rows_filename) as f:
            rows_df = pandas.read_csv(
                f,
                converters=preql.converters,
                index_col='_id')
        print 'rows_df ='
        print rows_df
        row_count = rows_df.shape[0]
        assert_equal(rows_df.shape[1], feature_count)
        rows_io = StringIO(rows_df.to_csv())
        result_string = preql.predict(rows_io, COUNT, id_offset=True)
        result_df = pandas.read_csv(StringIO(result_string), index_col=False)
        print 'result_df ='
        print result_df
        assert_equal(result_df.ndim, 2)
        assert_equal(result_df.shape[0], row_count * COUNT)
        assert_equal(result_df.shape[1], 1 + feature_count)