def test_type_promoted_schema_read_with_fastparquet(): with _test_utils.LocalTestFileSystem(): a = _schema_impl.Schema.create_at_any_location( schema_type=_schema_impl.SchemaType([( 'a', _primitives.Integer), ('b', _primitives.Boolean)])) with a as writer: writer.write( _pd.DataFrame.from_dict({ 'a': [1, 2, 3, 4], 'b': [None, True, None, False] })) import os as _os original_engine = _os.getenv('PARQUET_ENGINE') _os.environ['PARQUET_ENGINE'] = 'fastparquet' b = _schema_impl.Schema.fetch(a.remote_prefix, schema_type=_schema_impl.SchemaType([])) with b as reader: df = reader.read() assert df['a'].tolist() == [1, 2, 3, 4] assert _pd.api.types.is_object_dtype(df.dtypes['b']) assert df['b'].tolist() == [None, True, None, False] if original_engine is None: del _os.environ['PARQUET_ENGINE'] else: _os.environ['PARQUET_ENGINE'] = original_engine
def test_multipart_blob_fetch_managed(): with AutoDeletingTempDir('test') as wd: with test_utils.LocalTestFileSystem() as t: _generate_multipart_blob_data(wd) b = blobs.MultiPartBlob.fetch(wd.name) assert b.local_path.startswith(t.name) assert b.remote_location == wd.name + "/" assert b.mode == 'rb' assert b.metadata.type.format == "" assert b.metadata.type.dimensionality == _core_types.BlobType.BlobDimensionality.MULTIPART with b as r: assert r[0].read() == "part0".encode('utf-8') assert r[1].read() == "part1".encode('utf-8') assert r[2].read() == "part2".encode('utf-8') with pytest.raises(_user_exceptions.FlyteAssertion): blobs.MultiPartBlob.fetch(wd.name, local_path=b.local_path) with open(os.path.join(wd.name, "0"), 'wb') as w: w.write("bye".encode('utf-8')) b2 = blobs.MultiPartBlob.fetch(wd.name, local_path=b.local_path, overwrite=True) with b2 as r: assert r[0].read() == "bye".encode('utf-8') assert r[1].read() == "part1".encode('utf-8') assert r[2].read() == "part2".encode('utf-8') with pytest.raises(_user_exceptions.FlyteAssertion): blobs.Blob.fetch(wd.name)
def test_blob_fetch_managed(): with AutoDeletingTempDir('test') as wd: with test_utils.LocalTestFileSystem() as t: tmp_name = wd.get_named_tempfile('tmp') with open(tmp_name, 'wb') as w: w.write("hello".encode('utf-8')) b = blobs.Blob.fetch(tmp_name) assert b.local_path.startswith(t.name) assert b.remote_location == tmp_name assert b.mode == 'rb' assert b.metadata.type.format == "" assert b.metadata.type.dimensionality == _core_types.BlobType.BlobDimensionality.SINGLE with b as r: assert r.read() == "hello".encode('utf-8') with pytest.raises(_user_exceptions.FlyteAssertion): blobs.Blob.fetch(tmp_name, local_path=b.local_path) with open(tmp_name, 'wb') as w: w.write("bye".encode('utf-8')) b2 = blobs.Blob.fetch(tmp_name, local_path=b.local_path, overwrite=True) with b2 as r: assert r.read() == "bye".encode('utf-8') with pytest.raises(_user_exceptions.FlyteAssertion): blobs.Blob.fetch(tmp_name)
def test_schema_read_consistency_between_two_engines(): with _test_utils.LocalTestFileSystem(): a = _schema_impl.Schema.create_at_any_location( schema_type=_schema_impl.SchemaType([( 'a', _primitives.Integer), ('b', _primitives.Boolean)])) with a as writer: writer.write( _pd.DataFrame.from_dict({ 'a': [1, 2, 3, 4], 'b': [None, True, None, False] })) import os as _os original_engine = _os.getenv('PARQUET_ENGINE') _os.environ['PARQUET_ENGINE'] = 'fastparquet' b = _schema_impl.Schema.fetch(a.remote_prefix, schema_type=_schema_impl.SchemaType([])) with b as b_reader: b_df = b_reader.read() _os.environ['PARQUET_ENGINE'] = 'pyarrow' c = _schema_impl.Schema.fetch(a.remote_prefix, schema_type=_schema_impl.SchemaType( [])) with c as c_reader: c_df = c_reader.read() assert b_df.equals(c_df) if original_engine is None: del _os.environ['PARQUET_ENGINE'] else: _os.environ['PARQUET_ENGINE'] = original_engine
def test_datetime_coercion(): values = [ tuple( [ _datetime.datetime(day=1, month=1, year=2017, hour=1, minute=1, second=1, microsecond=1) - _datetime.timedelta(days=x) ] ) for x in _six_moves.range(5) ] schema_type = _schema_impl.SchemaType(columns=[("testname", _primitives.Datetime)]) with _test_utils.LocalTestFileSystem(): with _utils.AutoDeletingTempDir("test") as t: a = _schema_impl.Schema.create_at_known_location(t.name, mode="wb", schema_type=schema_type) with a as writer: for _ in _six_moves.range(5): # us to ms coercion segfaults unless we explicitly allow truncation. writer.write( _pd.DataFrame.from_records(values, columns=["testname"]), coerce_timestamps="ms", allow_truncated_timestamps=True, ) # TODO: Uncomment when segfault bug is resolved # with _pytest.raises(Exception): # writer.write( # _pd.DataFrame.from_records(values, columns=['testname']), # coerce_timestamps='ms') b = _schema_impl.Schema.create_at_known_location(t.name, mode="wb", schema_type=schema_type) with b as writer: for _ in _six_moves.range(5): writer.write(_pd.DataFrame.from_records(values, columns=["testname"]))
def test_simple_read_and_write_with_different_types(value_type_pair): column_name, flyte_type, values = value_type_pair values = [tuple([value]) for value in values] schema_type = _schema_impl.SchemaType(columns=[(column_name, flyte_type)]) with _test_utils.LocalTestFileSystem() as sandbox: with _utils.AutoDeletingTempDir("test") as t: a = _schema_impl.Schema.create_at_known_location( t.name, mode='wb', schema_type=schema_type) assert a.local_path is None with a as writer: for _ in _six_moves.range(5): writer.write( _pd.DataFrame.from_records(values, columns=[column_name])) assert a.local_path.startswith(sandbox.name) assert a.local_path is None b = _schema_impl.Schema.create_at_known_location( t.name, mode='rb', schema_type=schema_type) assert b.local_path is None with b as reader: for df in reader.iter_chunks(): for check, actual in _six_moves.zip( values, df[column_name].tolist()): assert check[0] == actual assert reader.read() is None reader.seek(0) df = reader.read(concat=True) for iter_count, actual in enumerate(df[column_name].tolist()): assert values[iter_count % len(values)][0] == actual assert b.local_path.startswith(sandbox.name) assert b.local_path is None
def test_normal_schema_read_with_fastparquet(): with _test_utils.LocalTestFileSystem(): a = _schema_impl.Schema.create_at_any_location( schema_type=_schema_impl.SchemaType([("a", _primitives.Integer), ("b", _primitives.Boolean)]) ) with a as writer: writer.write(_pd.DataFrame.from_dict({"a": [1, 2, 3, 4], "b": [False, True, True, False]})) import os as _os original_engine = _os.getenv("PARQUET_ENGINE") _os.environ["PARQUET_ENGINE"] = "fastparquet" b = _schema_impl.Schema.fetch(a.remote_prefix, schema_type=_schema_impl.SchemaType([])) with b as reader: df = reader.read() assert df["a"].tolist() == [1, 2, 3, 4] assert _pd.api.types.is_bool_dtype(df.dtypes["b"]) assert df["b"].tolist() == [False, True, True, False] if original_engine is None: del _os.environ["PARQUET_ENGINE"] else: _os.environ["PARQUET_ENGINE"] = original_engine
def test_extra_schema_read(): with _test_utils.LocalTestFileSystem(): a = _schema_impl.Schema.create_at_any_location( schema_type=_schema_impl.SchemaType([( 'a', _primitives.Integer), ('b', _primitives.Integer)])) with a as writer: writer.write( _pd.DataFrame.from_dict({ 'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8] })) b = _schema_impl.Schema.fetch(a.remote_prefix, schema_type=_schema_impl.SchemaType([ ('a', _primitives.Integer) ])) with b as reader: df = reader.read(concat=True, truncate_extra_columns=False) assert df.columns.values.tolist() == ['a', 'b'] assert df['a'].tolist() == [1, 2, 3, 4] assert df['b'].tolist() == [5, 6, 7, 8] with b as reader: df = reader.read(concat=True) assert df.columns.values.tolist() == ['a'] assert df['a'].tolist() == [1, 2, 3, 4]
def test_blob_download_managed(): with AutoDeletingTempDir("test") as wd: with test_utils.LocalTestFileSystem() as t: tmp_name = wd.get_named_tempfile("tmp") with open(tmp_name, "wb") as w: w.write("hello".encode("utf-8")) b = blobs.Blob(tmp_name) b.download() assert b.local_path.startswith(t.name) assert b.remote_location == tmp_name assert b.mode == "rb" assert b.metadata.type.format == "" assert b.metadata.type.dimensionality == _core_types.BlobType.BlobDimensionality.SINGLE with b as r: assert r.read() == "hello".encode("utf-8") b2 = blobs.Blob(tmp_name) with pytest.raises(_user_exceptions.FlyteAssertion): b2.download(b.local_path) with open(tmp_name, "wb") as w: w.write("bye".encode("utf-8")) b2 = blobs.Blob(tmp_name) b2.download(local_path=b.local_path, overwrite=True) with b2 as r: assert r.read() == "bye".encode("utf-8") b = blobs.Blob(tmp_name) with pytest.raises(_user_exceptions.FlyteAssertion): b.download()
def execution_data_locations(): with test_utils.LocalTestFileSystem() as fs: input_filename = fs.get_named_tempfile("inputs.pb") output_filename = fs.get_named_tempfile("outputs.pb") utils.write_proto_to_file(_INPUT_MAP.to_flyte_idl(), input_filename) utils.write_proto_to_file(_OUTPUT_MAP.to_flyte_idl(), output_filename) yield (_common_models.UrlBlob(input_filename, 100), _common_models.UrlBlob(output_filename, 100))
def test_generic_schema(): with test_utils.LocalTestFileSystem() as t: instantiator = schema.schema_instantiator() b = instantiator() assert isinstance(b, schema_impl.Schema) assert b.mode == "wb" assert len(b.type.columns) == 0 assert b.remote_location.startswith(t.name)
def test_blob_double_enter(): with test_utils.LocalTestFileSystem(): with AutoDeletingTempDir('test') as wd: b = blobs.Blob(wd.get_named_tempfile("sink"), mode='wb') with b: with pytest.raises(_user_exceptions.FlyteAssertion): with b: pass
def test_typed_schema(): with test_utils.LocalTestFileSystem() as t: instantiator = schema.schema_instantiator(_ALL_COLUMN_TYPES) b = instantiator() assert isinstance(b, schema_impl.Schema) assert b.mode == "wb" assert len(b.type.columns) == len(_ALL_COLUMN_TYPES) assert list(b.type.sdk_columns.items()) == _ALL_COLUMN_TYPES assert b.remote_location.startswith(t.name)
def test_generic_schema_read(): with _test_utils.LocalTestFileSystem(): a = _schema_impl.Schema.create_at_any_location( schema_type=_schema_impl.SchemaType([("a", _primitives.Integer), ("b", _primitives.Integer)]) ) with a as writer: writer.write(_pd.DataFrame.from_dict({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})) b = _schema_impl.Schema.fetch(a.remote_prefix, schema_type=_schema_impl.SchemaType([])) with b as reader: df = reader.read() assert df.columns.values.tolist() == ["a", "b"] assert df["a"].tolist() == [1, 2, 3, 4] assert df["b"].tolist() == [5, 6, 7, 8]
def test_create_at_known_location(): with _test_utils.LocalTestFileSystem(): with _utils.AutoDeletingTempDir("test") as wd: b = _schema_impl.Schema.create_at_known_location(wd.name, schema_type=_schema_impl.SchemaType()) assert b.local_path is None assert b.remote_location == wd.name + "/" assert b.mode == "wb" with b as w: w.write(_pd.DataFrame.from_dict({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})) df = _pd.read_parquet(_os.path.join(wd.name, "000000")) assert list(df["a"]) == [1, 2, 3, 4] assert list(df["b"]) == [5, 6, 7, 8]
def test_casting(): class MyDateTime(primitives.Datetime): ... with test_utils.LocalTestFileSystem() as t: test_columns_1 = [('altered', MyDateTime)] test_columns_2 = [('altered', primitives.Datetime)] instantiator_1 = schema.schema_instantiator(test_columns_1) a = instantiator_1() instantiator_2 = schema.schema_instantiator(test_columns_2) a.cast_to(instantiator_2._schema_type)
def test_blob_create_at(): with test_utils.LocalTestFileSystem() as t: with AutoDeletingTempDir('test') as wd: tmp_name = wd.get_named_tempfile('tmp') b = blobs.Blob.create_at_known_location(tmp_name) assert b.local_path is None assert b.remote_location == tmp_name assert b.mode == 'wb' assert b.metadata.type.format == "" assert b.metadata.type.dimensionality == _core_types.BlobType.BlobDimensionality.SINGLE with b as w: w.write("hello hello".encode('utf-8')) assert b.local_path.startswith(t.name) with open(tmp_name, 'rb') as r: assert r.read() == "hello hello".encode('utf-8')
def test_download(value_type_pair): column_name, flyte_type, values = value_type_pair values = [tuple([value]) for value in values] schema_type = _schema_impl.SchemaType(columns=[(column_name, flyte_type)]) with _utils.AutoDeletingTempDir("test") as tmpdir: for i in _six_moves.range(3): _pd.DataFrame.from_records(values, columns=[ column_name ]).to_parquet(tmpdir.get_named_tempfile(str(i).zfill(6)), coerce_timestamps='us') with _utils.AutoDeletingTempDir("test2") as local_dir: schema_obj = _schema_impl.Schema(tmpdir.name, schema_type=schema_type) schema_obj.download(local_dir.get_named_tempfile( _uuid.uuid4().hex)) with schema_obj as reader: for df in reader.iter_chunks(): for check, actual in _six_moves.zip( values, df[column_name].tolist()): assert check[0] == actual assert reader.read() is None reader.seek(0) df = reader.read(concat=True) for iter_count, actual in enumerate(df[column_name].tolist()): assert values[iter_count % len(values)][0] == actual with _pytest.raises(Exception): schema_obj = _schema_impl.Schema(tmpdir.name, schema_type=schema_type) schema_obj.download() with _test_utils.LocalTestFileSystem(): schema_obj = _schema_impl.Schema(tmpdir.name, schema_type=schema_type) schema_obj.download() with schema_obj as reader: for df in reader.iter_chunks(): for check, actual in _six_moves.zip( values, df[column_name].tolist()): assert check[0] == actual assert reader.read() is None reader.seek(0) df = reader.read(concat=True) for iter_count, actual in enumerate(df[column_name].tolist()): assert values[iter_count % len(values)][0] == actual
def test_multipartblob(): with test_utils.LocalTestFileSystem() as t: b = blobs.MultiPartBlob() assert isinstance(b, blob_impl.MultiPartBlob) assert b.remote_location.startswith(t.name) assert b.mode == "wb" assert b.metadata.type.format == "" b2 = blobs.MultiPartBlob(b) assert isinstance(b2, blobs.MultiPartBlob) assert b2.scalar.blob.uri == b.remote_location assert b2.scalar.blob.metadata == b.metadata b3 = blobs.MultiPartBlob.from_string("/a/b/c") assert isinstance(b3, blobs.MultiPartBlob) assert b3.scalar.blob.uri == "/a/b/c/" assert b3.scalar.blob.metadata.type.format == ""
def test_csv(): with test_utils.LocalTestFileSystem() as t: b = blobs.CSV() assert isinstance(b, blob_impl.Blob) assert b.remote_location.startswith(t.name) assert b.mode == "w" assert b.metadata.type.format == "csv" b2 = blobs.CSV(b) assert isinstance(b2, blobs.Blob) assert b2.scalar.blob.uri == b.remote_location assert b2.scalar.blob.metadata == b.metadata b3 = blobs.CSV.from_string("/a/b/c") assert isinstance(b3, blobs.Blob) assert b3.scalar.blob.uri == "/a/b/c" assert b3.scalar.blob.metadata.type.format == "csv"
def test_blob_from_python_std(): with test_utils.LocalTestFileSystem() as t: with AutoDeletingTempDir('test') as wd: tmp_name = wd.get_named_tempfile("from_python_std") with open(tmp_name, 'wb') as w: w.write("hello hello".encode('utf-8')) b = blobs.Blob.from_python_std(tmp_name) assert b.mode == "wb" assert b.metadata.type.format == "" assert b.metadata.type.dimensionality == _core_types.BlobType.BlobDimensionality.SINGLE assert b.remote_location.startswith(t.name) assert b.local_path == tmp_name with open(b.remote_location, 'rb') as r: assert r.read() == "hello hello".encode('utf-8') b = blobs.Blob("/tmp/fake") b2 = blobs.Blob.from_python_std(b) assert b == b2 with pytest.raises(_user_exceptions.FlyteTypeException): blobs.Blob.from_python_std(3)
def test_partial_column_read(): with _test_utils.LocalTestFileSystem(): a = _schema_impl.Schema.create_at_any_location( schema_type=_schema_impl.SchemaType([( 'a', _primitives.Integer), ('b', _primitives.Integer)])) with a as writer: writer.write( _pd.DataFrame.from_dict({ 'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8] })) b = _schema_impl.Schema.fetch(a.uri, schema_type=_schema_impl.SchemaType([ ('a', _primitives.Integer), ('b', _primitives.Integer) ])) with b as reader: df = reader.read(columns=['b']) assert df.columns.values == ['b'] assert df['b'].tolist() == [5, 6, 7, 8]
def test_multipart_blob_create_at(): with test_utils.LocalTestFileSystem(): with AutoDeletingTempDir('test') as wd: b = blobs.MultiPartBlob.create_at_known_location(wd.name) assert b.local_path is None assert b.remote_location == wd.name + "/" assert b.mode == 'wb' assert b.metadata.type.format == "" assert b.metadata.type.dimensionality == _core_types.BlobType.BlobDimensionality.MULTIPART with b.create_part('0') as w: w.write("part0".encode('utf-8')) with b.create_part('1') as w: w.write("part1".encode('utf-8')) with b.create_part('2') as w: w.write("part2".encode('utf-8')) with open(os.path.join(wd.name, '0'), 'rb') as r: assert r.read() == "part0".encode('utf-8') with open(os.path.join(wd.name, '1'), 'rb') as r: assert r.read() == "part1".encode('utf-8') with open(os.path.join(wd.name, '2'), 'rb') as r: assert r.read() == "part2".encode('utf-8')
def test_multipart_blob_from_python_std(): with test_utils.LocalTestFileSystem() as t: with AutoDeletingTempDir('test') as wd: _generate_multipart_blob_data(wd) b = blobs.MultiPartBlob.from_python_std(wd.name) assert b.mode == "wb" assert b.metadata.type.format == "" assert b.metadata.type.dimensionality == _core_types.BlobType.BlobDimensionality.MULTIPART assert b.remote_location.startswith(t.name) assert b.local_path == wd.name with open(os.path.join(b.remote_location, '0'), 'rb') as r: assert r.read() == "part0".encode('utf-8') with open(os.path.join(b.remote_location, '1'), 'rb') as r: assert r.read() == "part1".encode('utf-8') with open(os.path.join(b.remote_location, '2'), 'rb') as r: assert r.read() == "part2".encode('utf-8') b = blobs.MultiPartBlob("/tmp/fake/") b2 = blobs.MultiPartBlob.from_python_std(b) assert b == b2 with pytest.raises(_user_exceptions.FlyteTypeException): blobs.MultiPartBlob.from_python_std(3)
def test_from_python_std(): with _test_utils.LocalTestFileSystem(): def single_dataframe(): df1 = _pd.DataFrame.from_dict({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) s = _schema_impl.Schema.from_python_std( t_value=df1, schema_type=_schema_impl.SchemaType([("a", _primitives.Integer), ("b", _primitives.Integer)]), ) assert s is not None n = _schema_impl.Schema.fetch( s.uri, schema_type=_schema_impl.SchemaType([("a", _primitives.Integer), ("b", _primitives.Integer)]), ) with n as reader: df2 = reader.read() assert df2.columns.values.all() == df1.columns.values.all() assert df2["b"].tolist() == df1["b"].tolist() def list_of_dataframes(): df1 = _pd.DataFrame.from_dict({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) df2 = _pd.DataFrame.from_dict({"a": [9, 10, 11, 12], "b": [13, 14, 15, 16]}) s = _schema_impl.Schema.from_python_std( t_value=[df1, df2], schema_type=_schema_impl.SchemaType([("a", _primitives.Integer), ("b", _primitives.Integer)]), ) assert s is not None n = _schema_impl.Schema.fetch( s.uri, schema_type=_schema_impl.SchemaType([("a", _primitives.Integer), ("b", _primitives.Integer)]), ) with n as reader: actual = [] for df in reader.iter_chunks(): assert df.columns.values.all() == df1.columns.values.all() actual.extend(df["b"].tolist()) b_val = df1["b"].tolist() b_val.extend(df2["b"].tolist()) assert actual == b_val def mixed_list(): df1 = _pd.DataFrame.from_dict({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) df2 = [1, 2, 3] with _pytest.raises(_user_exceptions.FlyteTypeException): _schema_impl.Schema.from_python_std( t_value=[df1, df2], schema_type=_schema_impl.SchemaType([("a", _primitives.Integer), ("b", _primitives.Integer)]), ) def empty_list(): s = _schema_impl.Schema.from_python_std( t_value=[], schema_type=_schema_impl.SchemaType([("a", _primitives.Integer), ("b", _primitives.Integer)]), ) assert s is not None n = _schema_impl.Schema.fetch( s.uri, schema_type=_schema_impl.SchemaType([("a", _primitives.Integer), ("b", _primitives.Integer)]), ) with n as reader: df = reader.read() assert df is None single_dataframe() mixed_list() empty_list() list_of_dataframes()
def test_multipart_blob_no_enter_on_write(): with test_utils.LocalTestFileSystem(): b = blobs.MultiPartBlob.create_at_any_location() with pytest.raises(_user_exceptions.FlyteAssertion): with b: pass
def test_hive_queries(monkeypatch): def return_deterministic_uuid(): class FakeUUID4(object): def __init__(self): self.hex = 'test_uuid' class Uuid(object): def uuid4(self): return FakeUUID4() return Uuid() monkeypatch.setattr(_schema_impl, '_uuid', return_deterministic_uuid()) all_types = _schema_impl.SchemaType([('a', _primitives.Integer), ('b', _primitives.String), ('c', _primitives.Float), ('d', _primitives.Boolean), ('e', _primitives.Datetime)]) with _test_utils.LocalTestFileSystem(): df, query = _schema_impl.Schema.create_from_hive_query( "SELECT a, b, c, d, e FROM some_place WHERE i = 0", stage_query= "CREATE TEMPORARY TABLE some_place AS SELECT * FROM some_place_original", known_location="s3://my_fixed_path/", schema_type=all_types) full_query = """ CREATE TEMPORARY TABLE some_place AS SELECT * FROM some_place_original; CREATE TEMPORARY TABLE test_uuid_tmp AS SELECT a, b, c, d, e FROM some_place WHERE i = 0; CREATE EXTERNAL TABLE test_uuid LIKE test_uuid_tmp STORED AS PARQUET; ALTER TABLE test_uuid SET LOCATION 's3://my_fixed_path/'; INSERT OVERWRITE TABLE test_uuid SELECT a as a, b as b, CAST(c as double) c, d as d, e as e FROM test_uuid_tmp; DROP TABLE test_uuid; """ full_query = " ".join(full_query.split()) query = " ".join(query.split()) assert query == full_query # Test adding partition full_query = """ ALTER TABLE some_table ADD IF NOT EXISTS PARTITION ( region = 'SEA', ds = '2017-01-01' ) LOCATION 's3://my_fixed_path/'; ALTER TABLE some_table PARTITION ( region = 'SEA', ds = '2017-01-01' ) SET LOCATION 's3://my_fixed_path/'; """ query = df.get_write_partition_to_hive_table_query( 'some_table', partitions=_collections.OrderedDict([('region', 'SEA'), ('ds', '2017-01-01')])) full_query = " ".join(full_query.split()) query = " ".join(query.split()) assert query == full_query