def _import_helper(repo: Dolt, table_name: str, write_import_file: Callable[[str], None], primary_keys: List[str], import_mode: str) -> None: import_modes = IMPORT_MODES_TO_FLAGS.keys() if import_mode is not None: assert import_mode in import_modes, 'update_mode must be one of: {}'.format( import_modes) else: if table_name in [table.name for table in repo.ls()]: logger.info( 'No import mode specified, table exists, using "{}"'.format( UPDATE)) import_mode = UPDATE else: logger.info( 'No import mode specified, table exists, using "{}"'.format( CREATE)) import_mode = CREATE import_flags = IMPORT_MODES_TO_FLAGS[import_mode] logger.info( 'Importing to table {} in dolt directory located in {}, import mode {}' .format(table_name, repo.repo_dir(), import_mode)) fp = tempfile.NamedTemporaryFile(suffix='.csv') write_import_file(fp.name) args = [ 'table', 'import', table_name, '--pk={}'.format(','.join(primary_keys)) ] + import_flags repo.execute(args + [fp.name])
def inner(repo: Dolt): _import_mode = import_mode or ( 'create' if table not in [t.name for t in repo.ls()] else 'update') data_to_load = _apply_file_transformers(get_data(), transformers) bulk_import(repo, table, data_to_load, pk_cols, import_mode=_import_mode) return table
def inner(repo: Dolt): _transformers = transformers + [insert_unique_key ] if transformers else [ insert_unique_key ] data = _apply_df_transformers(get_data(), _transformers) if table not in [t.name for t in repo.ls()]: raise ValueError('Missing table') # Get existing PKs existing = read_table(repo, table) existing_pks = existing[INSERTED_ROW_HASH_COL].to_list() # Get proposed PKs proposed_pks = data[INSERTED_ROW_HASH_COL].to_list() to_drop = [ existing for existing in existing_pks if existing not in proposed_pks ] if to_drop: iterator = iter(to_drop) while iterator: batch = list(itertools.islice(iterator, 30000)) if len(batch) == 0: break logger.info('Dropping batch of {} IDs from table {}'.format( len(batch), table)) drop_statement = ''' DELETE FROM {table} WHERE {pk} in ("{pks_to_drop}") '''.format(table=table, pk=INSERTED_ROW_HASH_COL, pks_to_drop='","'.join(batch)) repo.sql(query=drop_statement) new_data = data[~(data[INSERTED_ROW_HASH_COL].isin(existing_pks))] if not new_data.empty: logger.info('Importing {} records'.format(len(new_data))) import_df(repo, table, new_data, [INSERTED_ROW_HASH_COL], 'update') return table
def _import_helper(repo: Dolt, table_name: str, write_import_file: Callable[[str], None], primary_keys: List[str], import_mode: str) -> None: import_modes = IMPORT_MODES_TO_FLAGS.keys() if import_mode is not None: assert import_mode in import_modes, 'update_mode must be one of: {}'.format( import_modes) else: if table_name in [table.name for table in repo.ls()]: logger.info( 'No import mode specified, table exists, using "{}"'.format( UPDATE)) import_mode = UPDATE else: logger.info( 'No import mode specified, table exists, using "{}"'.format( CREATE)) import_mode = CREATE if import_mode == CREATE and primary_keys is None: raise ValueError( 'Import mode CREATE requires a primary key to be specified') import_flags = IMPORT_MODES_TO_FLAGS[import_mode] logger.info( 'Importing to table {} in dolt directory located in {}, import mode {}' .format(table_name, repo.repo_dir(), import_mode)) fname = tempfile.mktemp(suffix='.csv') try: write_import_file(fname) args = ['table', 'import', table_name] + import_flags if import_mode == CREATE: args += ['--pk={}'.format(','.join(primary_keys))] repo.execute(args + [fname]) finally: if os.path.exists(fname): os.remove(fname)