Esempio n. 1
0
def test_generate(model, **unused):
    for row_count in [0, 1, 100]:
        for density in [0.0, 0.5, 1.0]:
            with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
                config_in = os.path.abspath('config.pb.gz')
                config = {
                    'generate': {
                        'row_count': row_count,
                        'density': density,
                    },
                }
                loom.config.config_dump(config, config_in)
                assert_found(config_in)

                rows_out = os.path.abspath('rows.pbs.gz')
                model_out = os.path.abspath('model.pb.gz')
                groups_out = os.path.abspath('groups')
                loom.runner.generate(
                    config_in=config_in,
                    model_in=model,
                    rows_out=rows_out,
                    model_out=model_out,
                    groups_out=groups_out,
                    debug=True)
                assert_found(rows_out, model_out, groups_out)

                group_counts = get_group_counts(groups_out)
                print 'group_counts: {}'.format(
                    ' '.join(map(str, group_counts)))
Esempio n. 2
0
def test_tare(rows, schema_row, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        tares = os.path.abspath('tares.pbs.gz')
        loom.runner.tare(schema_row_in=schema_row,
                         rows_in=rows,
                         tares_out=tares)
        assert_found(tares)
Esempio n. 3
0
def test_make_schema(model, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        schema_out = os.path.abspath('schema.json.gz')
        loom.format.make_schema(
            model_in=model,
            schema_out=schema_out)
        assert_found(schema_out)
Esempio n. 4
0
def test_posterior_enum(name, tares, diffs, init, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        config_in = os.path.abspath('config.pb.gz')
        config = {
            'posterior_enum': {
                'sample_count': 7,
            },
            'kernels': {
                'kind': {
                    'row_queue_capacity': 0,
                    'score_parallel': False,
                },
            },
        }
        loom.config.config_dump(config, config_in)
        assert_found(config_in)

        samples_out = os.path.abspath('samples.pbs.gz')
        loom.runner.posterior_enum(config_in=config_in,
                                   model_in=init,
                                   tares_in=tares,
                                   rows_in=diffs,
                                   samples_out=samples_out,
                                   debug=True)
        assert_found(samples_out)
        actual_count = sum(1 for _ in protobuf_stream_load(samples_out))
        assert_equal(actual_count, config['posterior_enum']['sample_count'])
Esempio n. 5
0
def test_generate_init(encoding, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        init_out = os.path.abspath('init.pb.gz')
        loom.generate.generate_init(
            encoding_in=encoding,
            model_out=init_out)
        assert_found(init_out)
Esempio n. 6
0
def test_make_fake_encoding(schema, model, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        encoding_out = os.path.abspath('encoding.json.gz')
        loom.format.make_fake_encoding(schema_in=schema,
                                       model_in=model,
                                       encoding_out=encoding_out)
        assert_found(encoding_out)
Esempio n. 7
0
def test_generate_init(encoding, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        init_out = os.path.abspath('init.pb.gz')
        loom.generate.generate_init(
            encoding_in=encoding,
            model_out=init_out)
        assert_found(init_out)
Esempio n. 8
0
def test_generate(model, **unused):
    for row_count in [0, 1, 100]:
        for density in [0.0, 0.5, 1.0]:
            with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
                config_in = os.path.abspath('config.pb.gz')
                config = {
                    'generate': {
                        'row_count': row_count,
                        'density': density,
                    },
                }
                loom.config.config_dump(config, config_in)
                assert_found(config_in)

                rows_out = os.path.abspath('rows.pbs.gz')
                model_out = os.path.abspath('model.pb.gz')
                groups_out = os.path.abspath('groups')
                loom.runner.generate(config_in=config_in,
                                     model_in=model,
                                     rows_out=rows_out,
                                     model_out=model_out,
                                     groups_out=groups_out,
                                     debug=True)
                assert_found(rows_out, model_out, groups_out)

                group_counts = get_group_counts(groups_out)
                print 'group_counts: {}'.format(' '.join(map(
                    str, group_counts)))
Esempio n. 9
0
def test_posterior_enum(name, tares, diffs, init, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        config_in = os.path.abspath('config.pb.gz')
        config = {
            'posterior_enum': {
                'sample_count': 7,
            },
            'kernels': {
                'kind': {
                    'row_queue_capacity': 0,
                    'score_parallel': False,
                },
            },
        }
        loom.config.config_dump(config, config_in)
        assert_found(config_in)

        samples_out = os.path.abspath('samples.pbs.gz')
        loom.runner.posterior_enum(
            config_in=config_in,
            model_in=init,
            tares_in=tares,
            rows_in=diffs,
            samples_out=samples_out,
            debug=True)
        assert_found(samples_out)
        actual_count = sum(1 for _ in protobuf_stream_load(samples_out))
        assert_equal(actual_count, config['posterior_enum']['sample_count'])
Esempio n. 10
0
def test_make_schema(model, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        schema_out = os.path.abspath('schema.json.gz')
        loom.format.make_schema(
            model_in=model,
            schema_out=schema_out)
        assert_found(schema_out)
Esempio n. 11
0
def test_tare(rows, schema_row, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        tares = os.path.abspath('tares.pbs.gz')
        loom.runner.tare(
            schema_row_in=schema_row,
            rows_in=rows,
            tares_out=tares)
        assert_found(tares)
Esempio n. 12
0
def test_sparsify(rows, schema_row, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        tares = os.path.abspath("tares.pbs.gz")
        diffs = os.path.abspath("diffs.pbs.gz")
        loom.runner.tare(schema_row_in=schema_row, rows_in=rows, tares_out=tares)
        assert_found(tares)
        loom.runner.sparsify(schema_row_in=schema_row, tares_in=tares, rows_in=rows, rows_out=diffs, debug=True)
        assert_found(diffs)
Esempio n. 13
0
def test_make_fake_encoding(schema, model, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        encoding_out = os.path.abspath('encoding.json.gz')
        loom.format.make_fake_encoding(
            schema_in=schema,
            model_in=model,
            encoding_out=encoding_out)
        assert_found(encoding_out)
Esempio n. 14
0
def test_shuffle(diffs, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        seed = 12345
        rows_out = os.path.abspath('shuffled.pbs.gz')
        loom.runner.shuffle(
            rows_in=diffs,
            rows_out=rows_out,
            seed=seed)
        assert_found(rows_out)
Esempio n. 15
0
def test_import_rows(encoding, rows, rows_csv, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        rows_pbs = os.path.abspath('rows.pbs.gz')
        loom.format.import_rows(encoding_in=encoding,
                                rows_csv_in=rows_csv,
                                rows_out=rows_pbs)
        assert_found(rows_pbs)
        expected_count = sum(1 for _ in protobuf_stream_load(rows))
        actual_count = sum(1 for _ in protobuf_stream_load(rows_pbs))
        assert_equal(actual_count, expected_count)
Esempio n. 16
0
def test_import_rows(encoding, rows, rows_csv, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        rows_pbs = os.path.abspath('rows.pbs.gz')
        loom.format.import_rows(
            encoding_in=encoding,
            rows_csv_in=rows_csv,
            rows_out=rows_pbs)
        assert_found(rows_pbs)
        expected_count = sum(1 for _ in protobuf_stream_load(rows))
        actual_count = sum(1 for _ in protobuf_stream_load(rows_pbs))
        assert_equal(actual_count, expected_count)
Esempio n. 17
0
def test_make_encoding(schema, rows_csv, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        encoding = os.path.abspath('encoding.json.gz')
        rows = os.path.abspath('rows.pbs.gz')
        loom.format.make_encoding(schema_in=schema,
                                  rows_in=rows_csv,
                                  encoding_out=encoding)
        assert_found(encoding)
        loom.format.import_rows(encoding_in=encoding,
                                rows_csv_in=rows_csv,
                                rows_out=rows)
        assert_found(rows)
Esempio n. 18
0
def test_make_encoding(schema, rows_csv, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        encoding = os.path.abspath('encoding.json.gz')
        rows = os.path.abspath('rows.pbs.gz')
        loom.format.make_encoding(
            schema_in=schema,
            rows_in=rows_csv,
            encoding_out=encoding)
        assert_found(encoding)
        loom.format.import_rows(
            encoding_in=encoding,
            rows_csv_in=rows_csv,
            rows_out=rows)
        assert_found(rows)
Esempio n. 19
0
def test_sparsify(rows, schema_row, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        tares = os.path.abspath('tares.pbs.gz')
        diffs = os.path.abspath('diffs.pbs.gz')
        loom.runner.tare(schema_row_in=schema_row,
                         rows_in=rows,
                         tares_out=tares)
        assert_found(tares)
        loom.runner.sparsify(schema_row_in=schema_row,
                             tares_in=tares,
                             rows_in=rows,
                             rows_out=diffs,
                             debug=True)
        assert_found(diffs)
Esempio n. 20
0
def test_one_to_one(rows, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        seed = 12345
        rows_out = os.path.abspath('rows_out.pbs.gz')
        loom.runner.shuffle(rows_in=rows, rows_out=rows_out, seed=seed)
        assert_found(rows_out)

        original = load_rows(rows)
        shuffled = load_rows(rows_out)
        assert_equal(len(shuffled), len(original))
        assert_not_equal(shuffled, original)

        actual = sorted(shuffled, key=lambda row: row.id)
        expected = sorted(original, key=lambda row: row.id)
        assert_list_equal(expected, actual)
Esempio n. 21
0
def test_posterior_enum(name, tares, diffs, init, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        config_in = os.path.abspath("config.pb.gz")
        config = {
            "posterior_enum": {"sample_count": 7},
            "kernels": {"kind": {"row_queue_capacity": 0, "score_parallel": False}},
        }
        loom.config.config_dump(config, config_in)
        assert_found(config_in)

        samples_out = os.path.abspath("samples.pbs.gz")
        loom.runner.posterior_enum(
            config_in=config_in, model_in=init, tares_in=tares, rows_in=diffs, samples_out=samples_out, debug=True
        )
        assert_found(samples_out)
        actual_count = sum(1 for _ in protobuf_stream_load(samples_out))
        assert_equal(actual_count, config["posterior_enum"]["sample_count"])
Esempio n. 22
0
def test_one_to_one(rows, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        seed = 12345
        rows_out = os.path.abspath('rows_out.pbs.gz')
        loom.runner.shuffle(
            rows_in=rows,
            rows_out=rows_out,
            seed=seed)
        assert_found(rows_out)

        original = load_rows(rows)
        shuffled = load_rows(rows_out)
        assert_equal(len(shuffled), len(original))
        assert_not_equal(shuffled, original)

        actual = sorted(shuffled, key=lambda row: row.id)
        expected = sorted(original, key=lambda row: row.id)
        assert_list_equal(expected, actual)
Esempio n. 23
0
def test_generate(model, **unused):
    for row_count in [0, 1, 100]:
        for density in [0.0, 0.5, 1.0]:
            with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
                config_in = os.path.abspath("config.pb.gz")
                config = {"generate": {"row_count": row_count, "density": density}}
                loom.config.config_dump(config, config_in)
                assert_found(config_in)

                rows_out = os.path.abspath("rows.pbs.gz")
                model_out = os.path.abspath("model.pb.gz")
                groups_out = os.path.abspath("groups")
                loom.runner.generate(
                    config_in=config_in,
                    model_in=model,
                    rows_out=rows_out,
                    model_out=model_out,
                    groups_out=groups_out,
                    debug=True,
                )
                assert_found(rows_out, model_out, groups_out)

                group_counts = get_group_counts(groups_out)
                print "group_counts: {}".format(" ".join(map(str, group_counts)))
Esempio n. 24
0
def test_export_rows(encoding, rows, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        rows_csv = os.path.abspath('rows_csv')
        rows_pbs = os.path.abspath('rows.pbs.gz')
        loom.format.export_rows(encoding_in=encoding,
                                rows_in=rows,
                                rows_csv_out=rows_csv,
                                chunk_size=51)
        assert_found(rows_csv)
        assert_found(os.path.join(rows_csv, 'rows.0.csv.gz'))
        loom.format.import_rows(encoding_in=encoding,
                                rows_csv_in=rows_csv,
                                rows_out=rows_pbs)
        assert_found(rows_pbs)
        expected = load_rows(rows)
        actual = load_rows(rows_pbs)
        assert_equal(len(actual), len(expected))
        actual.sort(key=lambda row: row.id)
        expected.sort(key=lambda row: row.id)
        expected_data = [row.diff for row in expected]
        actual_data = [row.diff for row in actual]
        assert_close(actual_data, expected_data)
Esempio n. 25
0
def test_export_rows(encoding, rows, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        rows_csv = os.path.abspath('rows_csv')
        rows_pbs = os.path.abspath('rows.pbs.gz')
        loom.format.export_rows(
            encoding_in=encoding,
            rows_in=rows,
            rows_csv_out=rows_csv,
            chunk_size=51)
        assert_found(rows_csv)
        assert_found(os.path.join(rows_csv, 'rows.0.csv.gz'))
        loom.format.import_rows(
            encoding_in=encoding,
            rows_csv_in=rows_csv,
            rows_out=rows_pbs)
        assert_found(rows_pbs)
        expected = load_rows(rows)
        actual = load_rows(rows_pbs)
        assert_equal(len(actual), len(expected))
        actual.sort(key=lambda row: row.id)
        expected.sort(key=lambda row: row.id)
        expected_data = [row.diff for row in expected]
        actual_data = [row.diff for row in actual]
        assert_close(actual_data, expected_data)
Esempio n. 26
0
def test_shuffle(diffs, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        seed = 12345
        rows_out = os.path.abspath('shuffled.pbs.gz')
        loom.runner.shuffle(rows_in=diffs, rows_out=rows_out, seed=seed)
        assert_found(rows_out)