コード例 #1
0
ファイル: migratetable.py プロジェクト: cash2one/dataMining-1
def migrate_table_by_tsmp(source_config,
                          dest_config,
                          full_table,
                          tsmp_field,
                          from_tsmp,
                          to_tsmp,
                          block=2000,
                          mode='REPLACE'):

    log.info('migrate %s by tsmp from %s to %s', full_table,
             source_config['host'], dest_config['host'])
    conn_pool_source = ibbdlib.get_db_pool(**source_config)
    conn_pool_dest = ibbdlib.get_db_pool(**dest_config)
    db_name = full_table[:full_table.index('.')]
    tb_name = full_table[full_table.index('.') + 1:]
    table_info = _describe_table(conn_pool_source, full_table)

    sql_query = "SELECT * FROM %s WHERE %s BETWEEN '%s' AND '%s'" % (
        full_table, tsmp_field, from_tsmp, to_tsmp)
    insert_query = '%s INTO %s values(%s)' % (mode, full_table, ','.join(
        ['%s'] * len(table_info)))
    log.info('block %d', block)
    block_cursor = 0
    rows_count = 0
    for rows in _yield_table_rows(conn_pool_source, sql_query, block=block):
        block_cursor += 1
        log.info('block cursor %d', block_cursor)
        rows_count += len(rows)
        _execute_sql_stms(conn_pool_dest, insert_query, rows)
    log.info('migrate rows %d', rows_count)
    conn_pool_source.disconnect()
    conn_pool_dest.disconnect()
コード例 #2
0
ファイル: migratetable.py プロジェクト: cash2one/dataMining-1
def migrate_table_by_query(source_config,
                           dest_config,
                           source_full_table,
                           query,
                           block=3000,
                           dest_full_table=None,
                           mode='REPLACE'):
    conn_pool_source = ibbdlib.get_db_pool(**source_config)
    conn_pool_dest = ibbdlib.get_db_pool(**dest_config)
    db_name = source_full_table[:source_full_table.index('.')]
    tb_name = source_full_table[source_full_table.index('.') + 1:]
    table_info = _describe_table(conn_pool_source, source_full_table)
    insert_query = '%s INTO %s values(%s)' % (mode, dest_full_table
                                              or source_full_table, ','.join(
                                                  ['%s'] * len(table_info)))
    log.info('block %d', block)
    block_cursor = 0
    rows_count = 0
    for rows in _yield_table_rows(conn_pool_source, query, block=block):
        block_cursor += 1
        log.info('block cursor %d', block_cursor)
        rows_count += len(rows)
        _execute_sql_stms(conn_pool_dest, insert_query, rows)
    log.info('migrate rows %d', rows_count)
    conn_pool_source.disconnect()
    conn_pool_dest.disconnect()
コード例 #3
0
def migrate_table_by_query(
    source_config,
    dest_config,
    source_full_table,
    query,
    block=3000,
    dest_full_table=None,
    mode='REPLACE'
):
    conn_pool_source = ibbdlib.get_db_pool(**source_config)
    conn_pool_dest = ibbdlib.get_db_pool(**dest_config)
    db_name = source_full_table[:source_full_table.index('.')]
    tb_name = source_full_table[source_full_table.index('.') + 1:]
    table_info = _describe_table(conn_pool_source, source_full_table)
    insert_query = '%s INTO %s values(%s)' % (
        mode,
        dest_full_table or source_full_table,
        ','.join(['%s'] * len(table_info)))
    log.info('block %d', block)
    block_cursor = 0
    rows_count = 0
    for rows in _yield_table_rows(conn_pool_source, query, block=block):
        block_cursor += 1
        log.info('block cursor %d', block_cursor)
        rows_count += len(rows)
        _execute_sql_stms(conn_pool_dest, insert_query, rows)
    log.info('migrate rows %d', rows_count)
    conn_pool_source.disconnect()
    conn_pool_dest.disconnect()
コード例 #4
0
ファイル: etlBySQL.py プロジェクト: cash2one/dataMining-1
def _get_db_pool():
    return ibbdlib.get_db_pool(max_connections=500,
                               **{
                                   'host': '223.4.246.146',
                                   'user': '******',
                                   'passwd': 'huangj'
                               })
コード例 #5
0
def migrate_table_by_tsmp(
    source_config,
    dest_config,
    full_table,
    tsmp_field,
    from_tsmp,
    to_tsmp,
    block=2000,
    mode='REPLACE'
):

    log.info('migrate %s by tsmp from %s to %s',
             full_table,
             source_config['host'],
             dest_config['host'])
    conn_pool_source = ibbdlib.get_db_pool(**source_config)
    conn_pool_dest = ibbdlib.get_db_pool(**dest_config)
    db_name = full_table[:full_table.index('.')]
    tb_name = full_table[full_table.index('.') + 1:]
    table_info = _describe_table(conn_pool_source, full_table)

    sql_query = "SELECT * FROM %s WHERE %s BETWEEN '%s' AND '%s'" % (full_table, tsmp_field, from_tsmp, to_tsmp)
    insert_query = '%s INTO %s values(%s)' % (
        mode,
        full_table,
        ','.join(['%s'] * len(table_info)))
    log.info('block %d', block)
    block_cursor = 0
    rows_count = 0
    for rows in _yield_table_rows(conn_pool_source, sql_query, block=block):
        block_cursor += 1
        log.info('block cursor %d', block_cursor)
        rows_count += len(rows)
        _execute_sql_stms(conn_pool_dest, insert_query, rows)
    log.info('migrate rows %d', rows_count)
    conn_pool_source.disconnect()
    conn_pool_dest.disconnect()
コード例 #6
0
ファイル: etl.py プロジェクト: cash2one/dataMining-1
def _get_db_conn_pool():
    return ibbdlib.get_db_pool()
コード例 #7
0
ファイル: etlBySQL.py プロジェクト: ruige123456/dataMining
def _get_db_pool():
    return ibbdlib.get_db_pool(max_connections=500, **{"host": "223.4.246.146", "user": "******", "passwd": "huangj"})
コード例 #8
0
ファイル: etl.py プロジェクト: ruige123456/dataMining
def _get_db_conn_pool():
    return ibbdlib.get_db_pool()
コード例 #9
0
ファイル: etl_dispatch.py プロジェクト: cash2one/dataMining-1
def _get_db_pool():
    return ibbdlib.get_db_pool(max_connections=1000, **db_server)
コード例 #10
0
ファイル: etlByHJ.py プロジェクト: ruige123456/dataMining
def _get_db_pool():
	#需要修改的参数
    return ibbdlib.get_db_pool(max_connections=500, **{'host': '223.4.246.146', 'user': '******', 'passwd': 'huangj'})
コード例 #11
0
ファイル: migratetable.py プロジェクト: cash2one/dataMining-1
def migrate_table(source_config,
                  dest_config,
                  full_table,
                  incre_field=None,
                  incre_from=None,
                  incre_to=None,
                  block=2000,
                  mode='REPLACE'):
    """迁移表

    args:
        source_config   源主机配置 {'host': '', 'user': '******', 'passwd': ''}
        dest_config     目标主机配置
        full_table      迁移表的全名like (mysql.user)
        incre_field     增量更新字段like (population_tsmp)
        incre_to        更新截止时间like (2012-12-12)
        mode            UPDATE MODE: REPLACE/INSERT IGNORE/INSERT

    流程描述:
        1.检查目标主机是否存在此表,如果不存在复制源表结构到目标主机
        2.判断是否增量迁移(如果传入incre_field则为增量迁移)
        3.如果是全量迁移,二话不说直接SELECT出来
        4.如果是增量迁移,先计算增量迁移的时间起点,终点默认为当天凌晨零点
            (1).如果目标主机表中没有数据时间戳记录,则增量起点为源主机表中的数据时间戳起点
            (2).如果目标主机表中存在最近更新时间戳,则将其+1作为迁移时间戳起点(不检查数据完整性)
        5.擦屁股 T_T
    """

    log.info('migrate table %s' % full_table)
    log.info('source %s  destination %s' %
             (source_config['host'], dest_config['host']))
    conn_pool_source = ibbdlib.get_db_pool(**source_config)
    conn_pool_dest = ibbdlib.get_db_pool(**dest_config)

    # 检查目标数据库是否存在表,如果不存在则创建之

    db_name = full_table[:full_table.index('.')]
    tb_name = full_table[full_table.index('.') + 1:]
    if not _check_table_existence(conn_pool_source, db_name, tb_name):
        log.error('source table not exists')
        conn_pool_source.disconnect()
        conn_pool_dest.disconnect()
        return
    if not _check_table_existence(conn_pool_dest, db_name, tb_name) or \
        len(_describe_table(conn_pool_source,
                            full_table)) != len(_describe_table(conn_pool_dest,
                                                                full_table)):
        log.warn('dest table not exists')
        _execute_sql_stm(conn_pool_dest,
                         'DROP TABLE IF EXISTS %s' % full_table)
        table_ddl = _get_table_ddl(conn_pool_source, full_table)
        log.info('create dest table')
        _execute_sql_stm(conn_pool_dest, table_ddl)

    # migrate data 开始迁移数据

    table_info = _describe_table(conn_pool_source, full_table)
    if not incre_field:

        # 没有传入增量字段,全量迁移表

        log.info('migrate full table rows')
        query_stm = 'SELECT * FROM %s' % full_table
        insert_query = '%s INTO %s values(%s)' % (mode, full_table, ','.join(
            ['%s'] * len(table_info)))
        block_cursor = 0
        log.info('block %d', block)
        rows_count = 0
        for rows in _yield_table_rows(conn_pool_source, query_stm,
                                      block=block):
            rows_count += len(rows)
            block_cursor += 1
            log.info('block cursor %d', block_cursor)
            _execute_sql_stms(conn_pool_dest, insert_query, rows)
        log.info('migrate complete within %d rows', rows_count)
    else:

        # 增量迁移
        # 更新截止日期默认为当天凌晨

        if not incre_to:
            incre_to = date.today()
        else:
            incre_to = parser.parse(incre_to).date()

        if not incre_from:

            # 目标主机的数据更新时间戳范围
            dest_tsmp_range = _get_table_tsmp_range(conn_pool_dest, full_table,
                                                    'population_tsmp')

            # 如果时间戳为空则全量迁移
            if not dest_tsmp_range[1]:

                # 数据源表的开始时间戳
                source_tsmp_range = _get_table_tsmp_range(
                    conn_pool_source, full_table, 'population_tsmp')
                if not source_tsmp_range[0]:
                    return
                migrate_date = source_tsmp_range[0].date()
            else:
                migrate_date = dest_tsmp_range[1].date() + timedelta(1)
        else:
            migrate_date = incre_from

        # 从migrate_date开始迁移
        log.info('migrate from %s' % migrate_date)
        while 1:
            if not migrate_date:
                migrate_date = source_tsmp_range[0].date()
            if migrate_date == incre_to:
                break
            log.info('migrate from %s' % migrate_date)
            query_stm = "SELECT * FROM %s WHERE %s BETWEEN '%s' AND '%s'" % (
                full_table, incre_field, migrate_date,
                migrate_date + timedelta(1))
            insert_query = '%s INTO %s values(%s)' % (
                mode, full_table, ','.join(['%s'] * len(table_info)))
            block_cursor = 0
            rows_count = 0
            log.info('block %d', block)
            for rows in _yield_table_rows(conn_pool_source,
                                          query_stm,
                                          block=block):
                block_cursor += 1
                log.info('block cursor %d', block_cursor)
                rows_count += len(rows)
                _execute_sql_stms(conn_pool_dest, insert_query, rows)
            log.info('migrate complete within %d rows' % rows_count)

            # increse migrate_date
            migrate_date = migrate_date + timedelta(1)

    # clear

    conn_pool_source.disconnect()
    conn_pool_dest.disconnect()
コード例 #12
0
def migrate_table(
    source_config,
    dest_config,
    full_table,
    incre_field=None,
    incre_from=None,
    incre_to=None,
    block=2000,
    mode='REPLACE'
):
    """迁移表

    args:
        source_config   源主机配置 {'host': '', 'user': '******', 'passwd': ''}
        dest_config     目标主机配置
        full_table      迁移表的全名like (mysql.user)
        incre_field     增量更新字段like (population_tsmp)
        incre_to        更新截止时间like (2012-12-12)
        mode            UPDATE MODE: REPLACE/INSERT IGNORE/INSERT

    流程描述:
        1.检查目标主机是否存在此表,如果不存在复制源表结构到目标主机
        2.判断是否增量迁移(如果传入incre_field则为增量迁移)
        3.如果是全量迁移,二话不说直接SELECT出来
        4.如果是增量迁移,先计算增量迁移的时间起点,终点默认为当天凌晨零点
            (1).如果目标主机表中没有数据时间戳记录,则增量起点为源主机表中的数据时间戳起点
            (2).如果目标主机表中存在最近更新时间戳,则将其+1作为迁移时间戳起点(不检查数据完整性)
        5.擦屁股 T_T
    """

    log.info('migrate table %s' % full_table)
    log.info('source %s  destination %s' % (source_config['host'], dest_config['host']))
    conn_pool_source = ibbdlib.get_db_pool(**source_config)
    conn_pool_dest = ibbdlib.get_db_pool(**dest_config)

    # 检查目标数据库是否存在表,如果不存在则创建之

    db_name = full_table[:full_table.index('.')]
    tb_name = full_table[full_table.index('.') + 1:]
    if not _check_table_existence(conn_pool_source, db_name, tb_name):
        log.error('source table not exists')
        conn_pool_source.disconnect()
        conn_pool_dest.disconnect()
        return
    if not _check_table_existence(conn_pool_dest, db_name, tb_name) or \
        len(_describe_table(conn_pool_source,
                            full_table)) != len(_describe_table(conn_pool_dest,
                                                                full_table)):
        log.warn('dest table not exists')
        _execute_sql_stm(conn_pool_dest, 'DROP TABLE IF EXISTS %s' % full_table)
        table_ddl = _get_table_ddl(conn_pool_source, full_table)
        log.info('create dest table')
        _execute_sql_stm(conn_pool_dest, table_ddl)

    # migrate data 开始迁移数据

    table_info = _describe_table(conn_pool_source, full_table)
    if not incre_field:

        # 没有传入增量字段,全量迁移表

        log.info('migrate full table rows')
        query_stm = 'SELECT * FROM %s' % full_table
        insert_query = '%s INTO %s values(%s)' % (
            mode,
            full_table,
            ','.join(['%s'] * len(table_info)))
        block_cursor = 0
        log.info('block %d', block)
        rows_count = 0
        for rows in _yield_table_rows(conn_pool_source, query_stm, block=block):
            rows_count += len(rows)
            block_cursor += 1
            log.info('block cursor %d', block_cursor)
            _execute_sql_stms(conn_pool_dest, insert_query, rows)
        log.info('migrate complete within %d rows', rows_count)
    else:

        # 增量迁移
        # 更新截止日期默认为当天凌晨

        if not incre_to:
            incre_to = date.today()
        else:
            incre_to = parser.parse(incre_to).date()

        if not incre_from:

            # 目标主机的数据更新时间戳范围
            dest_tsmp_range = _get_table_tsmp_range(conn_pool_dest, full_table, 'population_tsmp')

            # 如果时间戳为空则全量迁移
            if not dest_tsmp_range[1]:

                # 数据源表的开始时间戳
                source_tsmp_range = _get_table_tsmp_range(conn_pool_source, full_table, 'population_tsmp')
                if not source_tsmp_range[0]:
                    return
                migrate_date = source_tsmp_range[0].date()
            else:
                migrate_date = dest_tsmp_range[1].date() + timedelta(1)
        else:
            migrate_date = incre_from

        # 从migrate_date开始迁移
        log.info('migrate from %s' % migrate_date)
        while 1:
            if not migrate_date:
                migrate_date = source_tsmp_range[0].date()
            if migrate_date == incre_to:
                break
            log.info('migrate from %s' % migrate_date)
            query_stm = "SELECT * FROM %s WHERE %s BETWEEN '%s' AND '%s'" % (
                full_table,
                incre_field,
                migrate_date,
                migrate_date + timedelta(1))
            insert_query = '%s INTO %s values(%s)' % (
                mode,
                full_table,
                ','.join(['%s'] * len(table_info)))
            block_cursor = 0
            rows_count = 0
            log.info('block %d', block)
            for rows in _yield_table_rows(conn_pool_source, query_stm, block=block):
                block_cursor += 1
                log.info('block cursor %d', block_cursor)
                rows_count += len(rows)
                _execute_sql_stms(conn_pool_dest, insert_query, rows)
            log.info('migrate complete within %d rows' % rows_count)

            # increse migrate_date
            migrate_date = migrate_date + timedelta(1)

    # clear

    conn_pool_source.disconnect()
    conn_pool_dest.disconnect()