Esempio n. 1
0
    def inner(table_name: str, repo: Dolt) -> DoltTableUpdate:
        if branch and branch != repo.log():
            repo.checkout(branch)

        query_commit = commit_ref or list(repo.log().keys())[0]
        table = get_table_metadata(repo.get_engine(), table_name)
        from_commit, to_commit = get_from_commit_to_commit(repo, query_commit)
        pks_to_drop = get_dropped_pks(repo.get_engine(), table, from_commit,
                                      to_commit)
        result = _read_from_dolt_history(repo.get_engine(), table,
                                         query_commit)
        return pks_to_drop, result
Esempio n. 2
0
def write_to_table(repo: Dolt,
                   table: Table,
                   data: List[dict],
                   commit: bool = False,
                   message: str = None):
    """
    Given a repo, table, and data, will try and use the repo's MySQL Server instance to write the provided data to the
    table. Since Dolt does not yet support ON DUPLICATE KEY clause to INSERT statements we also have to separate
    updates from inserts and run sets of statements.
    :param repo:
    :param table:
    :param data:
    :param commit:
    :param message:
    :return:
    """
    coerced_data = list(clean_types(data))
    inserts, updates = get_inserts_and_updates(repo.get_engine(), table,
                                               coerced_data)
    if inserts:
        logger.info('Inserting {} rows'.format(len(inserts)))
        with repo.get_engine().connect() as conn:
            conn.execute(table.insert(), inserts)

    # We need to prefix the columns with "_" in order to use bindparam properly
    _updates = deepcopy(updates)
    for dic in _updates:
        for col in list(dic.keys()):
            dic['_{}'.format(col)] = dic.pop(col)

    if _updates:
        logger.info('Updating {} rows'.format(len(_updates)))
        with repo.get_engine().connect() as conn:
            statement = table.update()
            for pk_col in [
                    col.name for col in table.columns if col.primary_key
            ]:
                statement = statement.where(
                    table.c[pk_col] == bindparam('_{}'.format(pk_col)))
            non_pk_cols = [
                col.name for col in table.columns if not col.primary_key
            ]
            statement = statement.values(
                {col: bindparam('_{}'.format(col))
                 for col in non_pk_cols})
            conn.execute(statement, _updates)

    if commit:
        repo.add(str(table.name))
        message = message or 'Inserting {} records at '.format(
            len(data), datetime.now())
        repo.commit(message)
Esempio n. 3
0
    def inner(table_name: str, repo: Dolt) -> DoltTableUpdate:
        current_branch, _ = repo.branch()
        if branch and branch != current_branch:
            repo.checkout(branch)

        from_commit, to_commit = get_from_commit_to_commit(repo, commit_ref)
        metadata = MetaData(bind=repo.get_engine())
        metadata.reflect()
        table = metadata.tables[table_name]
        pks_to_drop = get_dropped_pks(repo.get_engine(), table, from_commit,
                                      to_commit)
        result = _read_from_dolt_diff(repo.get_engine(), table, from_commit,
                                      to_commit)
        return pks_to_drop, result
Esempio n. 4
0
def sync_schema_to_dolt(source_engine: Engine, repo: Dolt, table_map: Mapping[str, str], type_mapping: dict):
    """

    :param source_engine:
    :param repo:
    :param table_map:
    :param type_mapping:
    :return:
    """
    source_metadata = MetaData(bind=source_engine)
    source_metadata.reflect()
    target_metadata = MetaData(bind=repo.get_engine())
    target_metadata.reflect()
    for source_table_name, target_table_name in table_map.items():
        source_table = source_metadata.tables[source_table_name]
        target_table = coerce_schema_to_dolt(target_table_name, source_table, type_mapping)
        if target_table_name in target_metadata.tables.keys():
            target_table.drop(repo.get_engine())

        target_table.create(repo.get_engine())
Esempio n. 5
0
def _create_table_inferred(repo: Dolt, table_name: str,
                           data: Mapping[str,
                                         List[Any]], primary_keys: List[str]):
    # generate and execute a create table statement
    cols_to_types = {}
    for col_name, list_of_values in data.items():
        # Just take the first value to by the type
        first_non_null = None
        for val in list_of_values:
            if val is not None:
                first_non_null = val
                break
            raise ValueError(
                'Cannot provide an empty list, types cannot be inferred')
        cols_to_types[col_name] = _get_col_type(first_non_null, list_of_values)

    metadata = MetaData(bind=repo.get_engine())
    table = _get_table_def(metadata, table_name, cols_to_types, primary_keys)
    table.create()
Esempio n. 6
0
def _dolt_table_read_helper(repo: Dolt, table_name: str):
    table = get_table_metadata(repo.get_engine(), table_name)
    with repo.get_engine().connect() as conn:
        result = conn.execute(table.select())
        return [dict(row) for row in result]
Esempio n. 7
0
def import_dict(repo: Dolt,
                table_name: str,
                data: Mapping[str, List[Any]],
                primary_keys: List[str] = None,
                import_mode: str = None,
                batch_size: int = DEFAULT_BATCH_SIZE):
    """
    Provides a column major interface for writing Python data structures to Dolt, specifically data should be a dict
    where the keys are column names and the values are equal length lists of values to be written to Dolt. The lists
    must consist of:
        - values that match the type of the table in the schema of the table being written to
        - values of the same type that can be coalesced to a Python type by the (very limited) type inference logic
          for generating a schema from a data structure

    Note it is necessary for all list to be of the same length since we must coalesce the lists into rows, and that
    doesn't really make sense when the lists are not of the same length.

    Let's proceed with the example of creating a simple table and showing how to write some data structures:
        CREATE TABLE players (id INT, name VARCHAR(16), PRIMARY KEY (id))

    Now write in update mode:
    >>> dict_of_lists = {'id': [1, 2], 'name': ['Roger', 'Rafael']}
    >>> import_dict(repo, 'players', dict_of_lists, import_mode='update')

    Alternatively we can let the Python code infer a schema:
    >>> import_dict(repo, 'players', dict_of_lists, ['id'], import_mode='create')

    Assertions:
        - all list values are of equal length
        - when inferring a schema each list value has elements of a type that can be mapped to a SQL type, the logic is
          currently very limited
        - when inferring a schema

    This function requires the Dolt SQL server to be running on the host and port provided, defaulting to
    127.0.0.1:3306.

    :param repo:
    :param table_name:
    :param data:
    :param primary_keys:
    :param import_mode:
    :param batch_size:
    :return:
    """
    assert import_mode in [UPDATE, CREATE]

    # Grab some basic information about the data
    assert data, 'Cannot provide an empty dictionary'
    row_count = len(list(data.values())[0])
    assert row_count > 0, 'Must provide at least a single row'
    assert all(len(val_list) == row_count for val_list in
               data.values()), 'Must provide value lists of uniform length'

    # Get an Engine object

    # If the table does not exist, create it using type inference to build a create statement
    if import_mode == CREATE:
        assert primary_keys, 'primary_keys need to be provided when inferring a schema'
        _create_table_inferred(repo, table_name, data, primary_keys)

    rows = []
    for i in range(row_count):
        rows.append({col: data[col][i] for col in data.keys()})

    clean_rows = coerce_dates(rows)

    logger.info('Inserting {row_count} rows into table {table_name}'.format(
        row_count=row_count, table_name=table_name))

    metadata = MetaData(bind=repo.get_engine())
    metadata.reflect()
    table = metadata.tables[table_name]

    for i in range(max(1, math.ceil(len(clean_rows) / batch_size))):
        batch_start = i * batch_size
        batch_end = min((i + 1) * batch_size, len(clean_rows))
        batch = clean_rows[batch_start:batch_end]
        logger.info('Writing records {} through {} of {} rows to Dolt'.format(
            batch_start, batch_end, len(clean_rows)))
        with repo.get_engine().connect() as conn:
            conn.execute(table.insert(), batch)