def copy_records_to_db(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, DatabaseStorageApi)
    mdr = from_storage_api.get(from_name)
    to_storage_api.bulk_insert_records(to_name, mdr.records_object, schema)
def copy_delim_file_to_records(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, FileSystemStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    with from_storage_api.open(from_name) as f:
        records = list(read_csv(f.readlines()))
        mdr = as_records(records, data_format=RecordsFormat, schema=schema)
        mdr = mdr.conform_to_schema()
        to_storage_api.put(to_name, mdr)
Example #3
0
def copy_file_object_iterator_to_records_iterator(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    mdr = from_storage_api.get(from_name)
    itr = (read_csv(chunk) for chunk in with_header(mdr.records_object))
    to_mdr = as_records(itr, data_format=RecordsIteratorFormat, schema=schema)
    to_mdr = to_mdr.conform_to_schema()
    to_storage_api.put(to_name, to_mdr)
Example #4
0
def copy_df_to_records(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    mdr = from_storage_api.get(from_name)
    df = dataframe_to_records(mdr.records_object, schema)
    to_mdr = as_records(df, data_format=RecordsFormat, schema=schema)
    to_mdr = to_mdr.conform_to_schema()
    to_storage_api.put(to_name, to_mdr)
Example #5
0
def copy_file_object_to_records(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    mdr = from_storage_api.get(from_name)
    obj = read_csv(mdr.records_object)
    to_mdr = as_records(obj, data_format=RecordsFormat, schema=schema)
    to_mdr = to_mdr.conform_to_schema()
    to_storage_api.put(to_name, to_mdr)
Example #6
0
def copy_df_iterator_to_records_iterator(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    mdr = from_storage_api.get(from_name)
    itr = (dataframe_to_records(df, schema) for df in mdr.records_object)
    to_mdr = as_records(itr, data_format=RecordsIteratorFormat, schema=schema)
    to_mdr = to_mdr.conform_to_schema()
    to_storage_api.put(to_name, to_mdr)
def copy_delim_file_to_file_object(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, FileSystemStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    with from_storage_api.open(from_name) as f:
        mdr = as_records(f,
                         data_format=DelimitedFileObjectFormat,
                         schema=schema)
        mdr = mdr.conform_to_schema()
        to_storage_api.put(to_name, mdr)
def copy_db_to_records(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, DatabaseStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    select_sql = f"select * from {from_name}"
    with from_storage_api.execute_sql_result(select_sql) as r:
        records = result_proxy_to_records(r)
        mdr = as_records(records, data_format=RecordsFormat, schema=schema)
        mdr = mdr.conform_to_schema()
        to_storage_api.put(to_name, mdr)
def copy_file_object_to_delim_file(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, FileSystemStorageApi)
    mdr = from_storage_api.get(from_name)
    file_obj_iterator = mdr.records_object
    if isinstance(mdr.records_object, IOBase):
        file_obj_iterator = [file_obj_iterator]
    with to_storage_api.open(to_name, "w") as to_file:
        for file_obj in file_obj_iterator:
            to_file.write(file_obj)
Example #10
0
def copy_records_iterator_to_records(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    mdr = from_storage_api.get(from_name)
    all_records = []
    for records in mdr.records_object:
        all_records.extend(records)
    to_mdr = as_records(all_records, data_format=RecordsFormat, schema=schema)
    to_mdr = to_mdr.conform_to_schema()
    to_storage_api.put(to_name, to_mdr)
def copy_db_to_cursor(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, DatabaseStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    select_sql = f"select * from {from_name}"
    conn = (
        from_storage_api.get_engine().connect()
    )  # Gonna leave this connection hanging... # TODO: add "closeable" to the MDR and handle?
    r = conn.execute(select_sql)
    mdr = as_records(r, data_format=DatabaseCursorFormat, schema=schema)
    mdr = mdr.conform_to_schema()
    to_storage_api.put(to_name, mdr)
Example #12
0
def copy_file_object_to_records_iterator(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    mdr = from_storage_api.get(from_name)
    # Note: must keep header on each chunk when iterating delimited file object!
    # TODO: ugly hard-coded 1000 here, but how could we ever make it configurable? Not a big deal I guess
    itr = (read_csv(chunk)
           for chunk in with_header(iterate_chunks(mdr.records_object, 1000)))
    to_mdr = as_records(itr, data_format=RecordsIteratorFormat, schema=schema)
    to_mdr = to_mdr.conform_to_schema()
    to_storage_api.put(to_name, to_mdr)
def copy_records_to_delim_file(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, FileSystemStorageApi)
    mdr = from_storage_api.get(from_name)
    records_iterator = mdr.records_object
    if not isinstance(mdr.records_object, Iterator):
        records_iterator = [records_iterator]
    with to_storage_api.open(to_name, "w") as f:
        append = False
        for records in records_iterator:
            write_csv(records, f, append=append)
            append = True
Example #14
0
def copy_dataframe_iterator_to_dataframe(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    mdr = from_storage_api.get(from_name)
    all_dfs = []
    for df in mdr.records_object:
        all_dfs.append(df)
    to_mdr = as_records(pd.concat(all_dfs),
                        data_format=DataFrameFormat,
                        schema=schema)
    to_mdr = to_mdr.conform_to_schema()
    to_storage_api.put(to_name, to_mdr)