def test_download(value_type_pair): column_name, flyte_type, values = value_type_pair values = [tuple([value]) for value in values] schema_type = _schema_impl.SchemaType(columns=[(column_name, flyte_type)]) with _utils.AutoDeletingTempDir("test") as tmpdir: for i in _six_moves.range(3): _pd.DataFrame.from_records(values, columns=[ column_name ]).to_parquet(tmpdir.get_named_tempfile(str(i).zfill(6)), coerce_timestamps='us') with _utils.AutoDeletingTempDir("test2") as local_dir: schema_obj = _schema_impl.Schema(tmpdir.name, schema_type=schema_type) schema_obj.download(local_dir.get_named_tempfile( _uuid.uuid4().hex)) with schema_obj as reader: for df in reader.iter_chunks(): for check, actual in _six_moves.zip( values, df[column_name].tolist()): assert check[0] == actual assert reader.read() is None reader.seek(0) df = reader.read(concat=True) for iter_count, actual in enumerate(df[column_name].tolist()): assert values[iter_count % len(values)][0] == actual with _pytest.raises(Exception): schema_obj = _schema_impl.Schema(tmpdir.name, schema_type=schema_type) schema_obj.download() with _test_utils.LocalTestFileSystem(): schema_obj = _schema_impl.Schema(tmpdir.name, schema_type=schema_type) schema_obj.download() with schema_obj as reader: for df in reader.iter_chunks(): for check, actual in _six_moves.zip( values, df[column_name].tolist()): assert check[0] == actual assert reader.read() is None reader.seek(0) df = reader.read(concat=True) for iter_count, actual in enumerate(df[column_name].tolist()): assert values[iter_count % len(values)][0] == actual
"e", types.SchemaType.SchemaColumn.SchemaColumnType.FLOAT), types.SchemaType.SchemaColumn( "f", types.SchemaType.SchemaColumn.SchemaColumnType.STRING), ]))), _schema_impl.Schema( "s3://some/where/", _schema_impl.SchemaType.promote_from_model( types.SchemaType([ types.SchemaType.SchemaColumn( "a", types.SchemaType.SchemaColumn.SchemaColumnType.INTEGER), types.SchemaType.SchemaColumn( "b", types.SchemaType.SchemaColumn.SchemaColumnType.BOOLEAN), types.SchemaType.SchemaColumn( "c", types.SchemaType.SchemaColumn.SchemaColumnType.DATETIME), types.SchemaType.SchemaColumn( "d", types.SchemaType.SchemaColumn.SchemaColumnType.DURATION), types.SchemaType.SchemaColumn( "e", types.SchemaType.SchemaColumn.SchemaColumnType.FLOAT), types.SchemaType.SchemaColumn( "f", types.SchemaType.SchemaColumn.SchemaColumnType.STRING), ])))) ] LIST_OF_SCALAR_LITERALS_AND_PYTHON_VALUE = [ (literals.Literal(scalar=s), v)