def copy_records_to_db(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, DatabaseStorageApi)
    mdr = from_storage_api.get(from_name)
    to_storage_api.bulk_insert_records(to_name, mdr.records_object, schema)
Exemple #2
0
def copy_file_object_iterator_to_records_iterator(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    mdr = from_storage_api.get(from_name)
    itr = (read_csv(chunk) for chunk in with_header(mdr.records_object))
    to_mdr = as_records(itr, data_format=RecordsIteratorFormat, schema=schema)
    to_mdr = to_mdr.conform_to_schema()
    to_storage_api.put(to_name, to_mdr)
Exemple #3
0
def copy_df_to_records(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    mdr = from_storage_api.get(from_name)
    df = dataframe_to_records(mdr.records_object, schema)
    to_mdr = as_records(df, data_format=RecordsFormat, schema=schema)
    to_mdr = to_mdr.conform_to_schema()
    to_storage_api.put(to_name, to_mdr)
Exemple #4
0
def copy_file_object_to_records(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    mdr = from_storage_api.get(from_name)
    obj = read_csv(mdr.records_object)
    to_mdr = as_records(obj, data_format=RecordsFormat, schema=schema)
    to_mdr = to_mdr.conform_to_schema()
    to_storage_api.put(to_name, to_mdr)
Exemple #5
0
def copy_df_iterator_to_records_iterator(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    mdr = from_storage_api.get(from_name)
    itr = (dataframe_to_records(df, schema) for df in mdr.records_object)
    to_mdr = as_records(itr, data_format=RecordsIteratorFormat, schema=schema)
    to_mdr = to_mdr.conform_to_schema()
    to_storage_api.put(to_name, to_mdr)
def copy_file_object_to_delim_file(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, FileSystemStorageApi)
    mdr = from_storage_api.get(from_name)
    file_obj_iterator = mdr.records_object
    if isinstance(mdr.records_object, IOBase):
        file_obj_iterator = [file_obj_iterator]
    with to_storage_api.open(to_name, "w") as to_file:
        for file_obj in file_obj_iterator:
            to_file.write(file_obj)
Exemple #7
0
def copy_records_iterator_to_records(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    mdr = from_storage_api.get(from_name)
    all_records = []
    for records in mdr.records_object:
        all_records.extend(records)
    to_mdr = as_records(all_records, data_format=RecordsFormat, schema=schema)
    to_mdr = to_mdr.conform_to_schema()
    to_storage_api.put(to_name, to_mdr)
Exemple #8
0
def copy_file_object_to_records_iterator(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    mdr = from_storage_api.get(from_name)
    # Note: must keep header on each chunk when iterating delimited file object!
    # TODO: ugly hard-coded 1000 here, but how could we ever make it configurable? Not a big deal I guess
    itr = (read_csv(chunk)
           for chunk in with_header(iterate_chunks(mdr.records_object, 1000)))
    to_mdr = as_records(itr, data_format=RecordsIteratorFormat, schema=schema)
    to_mdr = to_mdr.conform_to_schema()
    to_storage_api.put(to_name, to_mdr)
def copy_records_to_delim_file(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, FileSystemStorageApi)
    mdr = from_storage_api.get(from_name)
    records_iterator = mdr.records_object
    if not isinstance(mdr.records_object, Iterator):
        records_iterator = [records_iterator]
    with to_storage_api.open(to_name, "w") as f:
        append = False
        for records in records_iterator:
            write_csv(records, f, append=append)
            append = True
Exemple #10
0
def copy_dataframe_iterator_to_dataframe(
    from_name: str,
    to_name: str,
    conversion: Conversion,
    from_storage_api: StorageApi,
    to_storage_api: StorageApi,
    schema: Schema,
):
    assert isinstance(from_storage_api, PythonStorageApi)
    assert isinstance(to_storage_api, PythonStorageApi)
    mdr = from_storage_api.get(from_name)
    all_dfs = []
    for df in mdr.records_object:
        all_dfs.append(df)
    to_mdr = as_records(pd.concat(all_dfs),
                        data_format=DataFrameFormat,
                        schema=schema)
    to_mdr = to_mdr.conform_to_schema()
    to_storage_api.put(to_name, to_mdr)