コード例 #1
0
ファイル: test_schema.py プロジェクト: sauravsrijan/flytekit
def test_download(value_type_pair):
    column_name, flyte_type, values = value_type_pair
    values = [tuple([value]) for value in values]
    schema_type = _schema_impl.SchemaType(columns=[(column_name, flyte_type)])

    with _utils.AutoDeletingTempDir("test") as tmpdir:
        for i in _six_moves.range(3):
            _pd.DataFrame.from_records(values, columns=[
                column_name
            ]).to_parquet(tmpdir.get_named_tempfile(str(i).zfill(6)),
                          coerce_timestamps='us')

        with _utils.AutoDeletingTempDir("test2") as local_dir:
            schema_obj = _schema_impl.Schema(tmpdir.name,
                                             schema_type=schema_type)
            schema_obj.download(local_dir.get_named_tempfile(
                _uuid.uuid4().hex))
            with schema_obj as reader:
                for df in reader.iter_chunks():
                    for check, actual in _six_moves.zip(
                            values, df[column_name].tolist()):
                        assert check[0] == actual
                assert reader.read() is None
                reader.seek(0)
                df = reader.read(concat=True)
                for iter_count, actual in enumerate(df[column_name].tolist()):
                    assert values[iter_count % len(values)][0] == actual

        with _pytest.raises(Exception):
            schema_obj = _schema_impl.Schema(tmpdir.name,
                                             schema_type=schema_type)
            schema_obj.download()

        with _test_utils.LocalTestFileSystem():
            schema_obj = _schema_impl.Schema(tmpdir.name,
                                             schema_type=schema_type)
            schema_obj.download()
            with schema_obj as reader:
                for df in reader.iter_chunks():
                    for check, actual in _six_moves.zip(
                            values, df[column_name].tolist()):
                        assert check[0] == actual
                assert reader.read() is None
                reader.seek(0)
                df = reader.read(concat=True)
                for iter_count, actual in enumerate(df[column_name].tolist()):
                    assert values[iter_count % len(values)][0] == actual
コード例 #2
0
                "e", types.SchemaType.SchemaColumn.SchemaColumnType.FLOAT),
            types.SchemaType.SchemaColumn(
                "f", types.SchemaType.SchemaColumn.SchemaColumnType.STRING),
        ]))),
     _schema_impl.Schema(
         "s3://some/where/",
         _schema_impl.SchemaType.promote_from_model(
             types.SchemaType([
                 types.SchemaType.SchemaColumn(
                     "a",
                     types.SchemaType.SchemaColumn.SchemaColumnType.INTEGER),
                 types.SchemaType.SchemaColumn(
                     "b",
                     types.SchemaType.SchemaColumn.SchemaColumnType.BOOLEAN),
                 types.SchemaType.SchemaColumn(
                     "c",
                     types.SchemaType.SchemaColumn.SchemaColumnType.DATETIME),
                 types.SchemaType.SchemaColumn(
                     "d",
                     types.SchemaType.SchemaColumn.SchemaColumnType.DURATION),
                 types.SchemaType.SchemaColumn(
                     "e",
                     types.SchemaType.SchemaColumn.SchemaColumnType.FLOAT),
                 types.SchemaType.SchemaColumn(
                     "f",
                     types.SchemaType.SchemaColumn.SchemaColumnType.STRING),
             ]))))
]

LIST_OF_SCALAR_LITERALS_AND_PYTHON_VALUE = [
    (literals.Literal(scalar=s), v)