Example #1
0
def arpriori_trade():
    while 1:
        full_table_name = tasks_queue.get()
        log.info(full_table_name)
        query = \
            """SELECT T.*
            FROM(
              SELECT shop_id, GROUP_CONCAT(DISTINCT(item_id)) AS list
              FROM %s
              GROUP BY shop_id, nick)T
            WHERE LOCATE(',', T.list)<>0""" \
            % full_table_name
        db_conn = ibbdlib.get_db_conn(**db_server)
        metadata = dict()
        try:
            log.info('%s query data...', full_table_name)
            for row in db_conn.iter(query):
                if not metadata.get(row.shop_id):
                    metadata[row.shop_id] = [row.list.split(',')]
                else:
                    metadata[row.shop_id].append(row.list.split(','))
            log.info('%s run apriori...', full_table_name)
            for (shop_id, t) in metadata.items():
                t = Apriori.apriori(t, 0.1)
                query = 'INSERT IGNORE INTO ibbd2.ststc_shop_apriori values(%%s, CURDATE(), %s, NOW())' % ','.join(['%s'
                        ] * 4)
                db_conn.executemany(query, [[shop_id, pair.split(',')[0], pair.split(',')[1], data['sup'], data['num']]
                                    for (pair, data) in t.items() if len(pair.split(',')) == 2])
            log.info('%s finished', full_table_name)
        except:
            log.error(traceback.format_exc())
        finally:
            db_conn.close()
            tasks_queue.task_done()
Example #2
0
def main():
    sids = list(sorted(_get_shop_ids()))
    step = len(sids) / split_tb_num
    interval = []
    for i in range(split_tb_num):
        interval.append(sids[(i + 1) * step])

    db_conn = ibbdlib.get_db_conn(host='223.4.155.172', user='******', passwd='spider')
    sql_dml_list = []
    for i in range(len(interval)):
        ddl_table = '%s.%s_%d' % (db, his_table, i + 1)
        ddl = "DROP TABLE IF EXISTS %s" % ddl_table
        db_conn.execute(ddl)
        ddl = 'CREATE TABLE %s LIKE topdata.top_item_trade' % ddl_table
        db_conn.execute(ddl)
        sql = \
            """INSERT IGNORE INTO %s
            SELECT *, MD5(CONCAT_WS(',', item_id, nick, rank, trade_time)) 
            FROM %s.%s 
            WHERE shop_id %s"""
        if i == 0:
            sql = sql % (ddl_table, his_db, his_table, "< '%s'" % interval[i])
        elif i == len(interval) - 1:
            sql = sql % (ddl_table, his_db, his_table, ">= '%s'" % interval[i])
        else:
            sql = sql % (ddl_table, his_db, his_table, ">='%s' AND shop_id <'%s'" % (interval[i], interval[i + 1]))
        sql_dml_list.append(sql)
        print '%s;'%sql

    db_conn.close()
def arpriori_trade(full_table_name):
    query = \
        """SELECT T.*
        FROM(
          SELECT shop_id, GROUP_CONCAT(DISTINCT(item_id)) AS list
          FROM topdata.top_item_trade_his_20121219_1
          GROUP BY shop_id, nick)T
        WHERE LOCATE(',', T.list)<>0
        """
    db_conn = ibbdlib.get_db_conn(**db_server)
    metadata = dict()
    try:
        for row in db_conn.iter(cmd_str):
            if not metadata.get(row.shop_id):
                metadata[row.shop_id] = [row.list.split(',')]
            else:
                metadata[row.shop_id].append(row.list.split(','))

        for (shop_id, t) in metadata.items():
            print shop_id
            t = Apriori.apriori(t, 0.1)
            print json.dumps(t, indent=4)
            query = 'INSERT IGNORE INTO ibbd2.ststc_shop_apriori values(%%s, CURDATE(), %s, NOW())' % ','.join(['%s'] * 4)
            db_conn.executemany(query, [[shop_id, pair.split(',')[0], pair.split(',')[1], data['sup'], data['num']]
                                for (pair, data) in t.items() if len(pair.split(',')) == 2])
    except Exception, e:
        print e
Example #4
0
def split_trade_data():
    sids = list(sorted(_get_shop_ids()))
    step = len(sids) / (split_tb_num - 1)
    interval = [sids[0]]
    for i in range(split_tb_num - 1):
        interval.append(sids[(i + 1) * step - 1])
    interval.append(sids[-1])
    return interval

    db_conn = ibbdlib.get_db_conn(host='223.4.246.146', user='******', passwd='huangj')
    for i in range(len(interval) - 1):
        ddl_table = '%s.%s_%d' % (db, his_table, i + 1)
        ddl = 'DROP TABLE IF EXISTS %s' % ddl_table
        db_conn.execute(ddl)
        ddl = 'CREATE TABLE %s LIKE topdata.top_item_trade' % ddl_table
        print 'create table', ddl_table
        db_conn.execute(ddl)
        sql = \
            """INSERT IGNORE INTO %s
            SELECT *, MD5(CONCAT_WS(',', item_id, nick, rank, trade_time)) 
            FROM %s.%s 
            WHERE shop_id %s"""
        sql = sql % (ddl_table, his_db, his_table, ">='%s' AND shop_id <'%s'" % (interval[i], interval[i + 1]))
        if i == len(interval) - 2:
            sql = sql.replace('<', '<=')
        print 'migrate metadata to', ddl_table,
        print db_conn.execute_rowcount(sql)

    db_conn.close()
Example #5
0
def split_topspider_table(full_table):
    db_conn = ibbdlib.get_db_conn(host='223.4.155.172',
                                  user='******',
                                  passwd='spider')
    cmd_str = "SELECT DISTINCT(shop_id) AS shop_id FROM %s" % (full_table)
    sids = []
    for row in db_conn.iter(cmd_str):
        sids.append(row.shop_id)
    length = len(sids) / 100
    i = 1
    sid_dict = dict()
    for sid in sids:
        if sid_dict.get(i):
            if len(sid_dict[i]) < length:
                sid_dict[i].append(sid)
            else:
                i += 1
                sid_dict[i] = [sid]
        else:
            sid_dict[i] = [sid]
    for k in sid_dict.keys():
        print k, len(sid_dict[k])
        db_conn.executemany(
            "INSERT IGNORE INTO topdata.top_trade_dist(table_name, shop_id) values(%s, %s)",
            [['top_item_trade_%d' % k, sid] for sid in sid_dict[k]])
    db_conn.close()
Example #6
0
def main():
    sids = list(sorted(_get_shop_ids()))
    step = len(sids) / split_tb_num
    interval = []
    for i in range(split_tb_num):
        interval.append(sids[(i + 1) * step])

    db_conn = ibbdlib.get_db_conn(host='223.4.155.172',
                                  user='******',
                                  passwd='spider')
    sql_dml_list = []
    for i in range(len(interval)):
        ddl_table = '%s.%s_%d' % (db, his_table, i + 1)
        ddl = "DROP TABLE IF EXISTS %s" % ddl_table
        db_conn.execute(ddl)
        ddl = 'CREATE TABLE %s LIKE topdata.top_item_trade' % ddl_table
        db_conn.execute(ddl)
        sql = \
            """INSERT IGNORE INTO %s
            SELECT *, MD5(CONCAT_WS(',', item_id, nick, rank, trade_time)) 
            FROM %s.%s 
            WHERE shop_id %s"""
        if i == 0:
            sql = sql % (ddl_table, his_db, his_table, "< '%s'" % interval[i])
        elif i == len(interval) - 1:
            sql = sql % (ddl_table, his_db, his_table, ">= '%s'" % interval[i])
        else:
            sql = sql % (ddl_table, his_db, his_table,
                         ">='%s' AND shop_id <'%s'" %
                         (interval[i], interval[i + 1]))
        sql_dml_list.append(sql)
        print '%s;' % sql

    db_conn.close()
Example #7
0
def _get_shop_ids():
    db_conn = ibbdlib.get_db_conn(**db_server)
    cmd_str = 'SELECT DISTINCT(shop_id) AS shop_id FROM %s.%s' % (
        his_db, his_table)
    sids = list(db_conn.iter(cmd_str))
    db_conn.close()
    return [row.shop_id for row in sids]
Example #8
0
def split_trade_data():
    sids = list(sorted(_get_shop_ids()))
    step = len(sids) / (split_tb_num - 1)
    interval = [sids[0]]
    for i in range(split_tb_num - 1):
        interval.append(sids[(i + 1) * step - 1])
    interval.append(sids[-1])
    return interval

    db_conn = ibbdlib.get_db_conn(host='223.4.246.146',
                                  user='******',
                                  passwd='huangj')
    for i in range(len(interval) - 1):
        ddl_table = '%s.%s_%d' % (db, his_table, i + 1)
        ddl = 'DROP TABLE IF EXISTS %s' % ddl_table
        db_conn.execute(ddl)
        ddl = 'CREATE TABLE %s LIKE topdata.top_item_trade' % ddl_table
        print 'create table', ddl_table
        db_conn.execute(ddl)
        sql = \
            """INSERT IGNORE INTO %s
            SELECT *, MD5(CONCAT_WS(',', item_id, nick, rank, trade_time)) 
            FROM %s.%s 
            WHERE shop_id %s"""
        sql = sql % (ddl_table, his_db, his_table, ">='%s' AND shop_id <'%s'" %
                     (interval[i], interval[i + 1]))
        if i == len(interval) - 2:
            sql = sql.replace('<', '<=')
        print 'migrate metadata to', ddl_table,
        print db_conn.execute_rowcount(sql)

    db_conn.close()
Example #9
0
def _get_shop_ids():
    db_conn = ibbdlib.get_db_conn(host='223.4.155.172', user='******', passwd='spider')
    cmd_str = 'SELECT DISTINCT(shop_id) AS shop_id FROM %s.%s' % (his_db, his_table)
    sids = []
    for row in db_conn.iter(cmd_str):
        sids.append(row.shop_id)
    db_conn.close()
    return sids
def updatePrice():
    db_conn = ibbdlib.get_db_conn(**db_server)
    cmd_str = 'SELECT DISTINCT(item_id) AS item_id FROM ibbd2.user_item_monitor'
    iids = [row.item_id for row in list(db_conn.iter(cmd_str))]
    db_conn.close()
    re_conn = ibbdlib.get_redis_conn(**redis_server)
    re_conn.sadd('Schedule_itempricemonitor', *iids)
    del re_conn
Example #11
0
def _get_shop_ids():
    db_conn = ibbdlib.get_db_conn(host='223.4.155.172',
                                  user='******',
                                  passwd='spider')
    cmd_str = 'SELECT DISTINCT(shop_id) AS shop_id FROM %s.%s' % (his_db,
                                                                  his_table)
    sids = list(db_conn.iter(cmd_str))
    db_conn.close()
    return [row.shop_id for row in sids]
Example #12
0
def main():
    dbConn = ibbdlib.get_db_conn()
    cats = []
    for t in re.findall(r'\n(.+?)\n\{(.+?)\}' ,s.decode('utf-8'), re.S):
        print t[0].encode('GBK')
        print ','.join(t[1].replace(' ', '').split()).encode('GBK')
        cats.extend([[t[0].encode('utf-8'), t2.encode('utf-8')] for t2 in t[1].replace(' ', '').split()])
    dbConn.executemany("INSERT IGNORE INTO ibbd2.lib_industry_2 VALUES(%s,%s)", cats)
    dbConn.close()
Example #13
0
def master():
    db_conn = ibbdlib.get_db_conn(**db_server['223.4.155.172'])
    db_conn.execute("TRUNCATE TABLE ibbd2.lib_cat_industry_keyword_2")
    db_conn.close()
    ibbdlib.migratetable.migrate_table(db_server['223.4.155.152'],
                                       db_server['223.4.155.172'],
                                       'ibbd2.user2')
    ibbdlib.migratetable.migrate_table(db_server['223.4.155.152'],
                                       db_server['223.4.155.172'],
                                       'ibbd2.user_keywords')
    ibbdlib.migratetable.migrate_table(db_server['223.4.155.152'],
                                       db_server['223.4.155.172'],
                                       'ibbd2.lib_cat_industry_keyword_2')
    ibbdlib.migratetable.migrate_table_by_query(
        db_server['223.4.155.152'], db_server['223.4.155.172'],
        'ibbd2.user_shop_monitor_items', """SELECT user, shop_type,
        (CASE WHEN shop_id='' THEN 0 ELSE shop_id END) shop_id,
        user_num_id, item_id, status,
        population_tsmp
        FROM ibbd2.user_shop_monitor_items""")

    # keep connection

    dbConnPool = None
    reConn = None
    while True:
        try:
            dbConnPool = ConnectionPool(max_connections=1000,
                                        host='localhost',
                                        user='******',
                                        passwd='ibbd_etl_secure',
                                        db='topspider',
                                        charset='utf8')
            # reConn = getRedisConn2()
            reConn = ibbdlib.get_redis_conn(**redis_server)
            break
        except Exception as e:
            print e
    runKeywordsSchedule(reConn, dbConnPool)
    _ensure_schedule_complete(reConn)
    dbConnPool.disconnect()
    dbConnPool = ConnectionPool(max_connections=1000,
                                host='localhost',
                                user='******',
                                passwd='ibbd_etl_secure',
                                db='topspider',
                                charset='utf8')
    while True:
        try:
            # migrateScheduleToRedis(reConn, dbConnPool)
            break
        except:
            traceback.print_exc()
    dbConnPool.disconnect()
    del reConn
Example #14
0
def rollbackNullSiteIdItemSchedule():
    print 'init...'
    server = get_re_conn()
    iids = []
    db_conn = ibbdlib.get_db_conn()
    for row in db_conn.iter("SELECT item_id FROM ibbd2.top_cat_items WHERE site_id=''"):
        iids.append(row.item_id)
    db_conn.close()
    print 'Schedule_itemFullInfo', len(iids)
    server.sadd('Schedule_itemFullInfo', *iids)
    del server
Example #15
0
def main():
    dbConn = ibbdlib.get_db_conn()
    cats = []
    for t in re.findall(r'\n(.+?)\n\{(.+?)\}', s.decode('utf-8'), re.S):
        print t[0].encode('GBK')
        print ','.join(t[1].replace(' ', '').split()).encode('GBK')
        cats.extend([[t[0].encode('utf-8'),
                      t2.encode('utf-8')]
                     for t2 in t[1].replace(' ', '').split()])
    dbConn.executemany("INSERT IGNORE INTO ibbd2.lib_industry_2 VALUES(%s,%s)",
                       cats)
    dbConn.close()
Example #16
0
def get_tables(db):
    db_conn = ibbdlib.get_db_conn(**db_server)
    cmdStr = \
        """SELECT CONCAT(TABLE_SCHEMA, '.', TABLE_NAME) AS TABLE_NAME, TABLE_TYPE, TABLE_ROWS, DATA_LENGTH, INDEX_LENGTH, TABLE_COMMENT
        FROM information_schema.TABLES
        WHERE TABLE_SCHEMA='%s'
        ORDER BY TABLE_NAME""" \
        % db
    info_dict = dict((row.TABLE_NAME, dict((k, ((v.encode('utf-8'
                     ) if isinstance(v, unicode) else v) if v else '')) for (k,
                     v) in row.items())) for row in db_conn.query(cmdStr))
    db_conn.close()
    return info_dict
Example #17
0
def get_tables(db):
    db_conn = ibbdlib.get_db_conn(**db_server)
    cmdStr = \
        """SELECT CONCAT(TABLE_SCHEMA, '.', TABLE_NAME) AS TABLE_NAME, TABLE_TYPE, TABLE_ROWS, DATA_LENGTH, INDEX_LENGTH, TABLE_COMMENT
        FROM information_schema.TABLES
        WHERE TABLE_SCHEMA='%s'
        ORDER BY TABLE_NAME""" \
        % db
    info_dict = dict(
        (row.TABLE_NAME,
         dict((k, ((
             v.encode('utf-8') if isinstance(v, unicode) else v) if v else ''))
              for (k, v) in row.items())) for row in db_conn.query(cmdStr))
    db_conn.close()
    return info_dict
Example #18
0
def _prepar_trade_date():
    # rename schedule
    redis_conn = ibbdlib.get_redis_conn(**redis_server)
    if redis_conn.scard('Schedule_itemTrade2') > 0:
        redis_conn.rename('Schedule_itemTrade2', 'Schedule_itemTrade2_bak')

    # insert lock
    db_conn = ibbdlib.get_db_conn(**db_server)
    # while True:
    #     try:
    #         is_inserting = False
    #         for row in db_conn.iter("SHOW PROCESSLIST"):
    #             if row.Info:
    #                 if row.Info.encode('utf-8', 'ignore').find('top_item_trade') > -1:
    #                     is_inserting = True
    #         if is_inserting:
    #             print 'processing insert lock, wait for 10 seconds'
    #             time.sleep(10)
    #         else:
    #             print 'no insert lock'
    #             break
    #     except:
    #         traceback.print_exc()
    try:
        # db_conn.execute(
        #     "ALTER TABLE topspider.top_item_trade RENAME topspider.top_item_trade_his_%s" % fetch_date_short)
        # db_conn.execute(
        #     "CREATE TABLE topspider.top_item_trade LIKE topspider.top_item_trade_his_%s" % fetch_date_short)
        for i in range(split_tb_num):
            table_name = 'top_item_trade_his_%s_%d' % (
                datetime.now().strftime('%Y%m%d'), i + 1)
            db_conn.execute(
                "CREATE TABLE topspider.%s LIKE topspider.top_item_trade" %
                table_name)
        db_conn.execute("TRUNCATE TABLE topspider.top_itemsearchresult")
    except:
        traceback.print_exc()
    db_conn.close()

    # rename schedule name
    if redis_conn.scard('Schedule_itemTrade2_bak') > 0:
        redis_conn.rename('Schedule_itemTrade2_bak', 'Schedule_itemTrade2')
        pass
    del redis_conn
Example #19
0
def get_columns(db):
    db_conn = ibbdlib.get_db_conn(**db_server)
    cmdStr = \
        """SELECT CONCAT(TABLE_SCHEMA, '.', TABLE_NAME) AS TABLE_NAME,ORDINAL_POSITION, COLUMN_NAME, COLUMN_TYPE, COLUMN_KEY, COLUMN_COMMENT
        FROM information_schema.COLUMNS
        WHERE TABLE_SCHEMA='%s'""" \
        % db
    info_dict = dict()
    for row in db_conn.query(cmdStr):
        if row.TABLE_NAME in info_dict:
            info_dict[row.TABLE_NAME].append(dict((k, ((v.encode('utf-8'
                    ) if isinstance(v, unicode) else v) if v else '')) for (k,
                    v) in row.items()))
        else:
            info_dict[row.TABLE_NAME] = [dict((k, ((v.encode('utf-8'
                    ) if isinstance(v, unicode) else v) if v else '')) for (k,
                    v) in row.items())]
    db_conn.close()
    return info_dict
Example #20
0
def get_columns(db):
    db_conn = ibbdlib.get_db_conn(**db_server)
    cmdStr = \
        """SELECT CONCAT(TABLE_SCHEMA, '.', TABLE_NAME) AS TABLE_NAME,ORDINAL_POSITION, COLUMN_NAME, COLUMN_TYPE, COLUMN_KEY, COLUMN_COMMENT
        FROM information_schema.COLUMNS
        WHERE TABLE_SCHEMA='%s'""" \
        % db
    info_dict = dict()
    for row in db_conn.query(cmdStr):
        if row.TABLE_NAME in info_dict:
            info_dict[row.TABLE_NAME].append(
                dict((k, ((v.encode('utf-8') if isinstance(v, unicode) else v
                           ) if v else '')) for (k, v) in row.items()))
        else:
            info_dict[row.TABLE_NAME] = [
                dict((k, ((v.encode('utf-8') if isinstance(v, unicode) else v
                           ) if v else '')) for (k, v) in row.items())
            ]
    db_conn.close()
    return info_dict
def _prepar_trade_date():
    # rename schedule
    redis_conn = ibbdlib.get_redis_conn(**redis_server)
    if redis_conn.scard("Schedule_itemTrade2") > 0:
        redis_conn.rename("Schedule_itemTrade2", "Schedule_itemTrade2_bak")

    # insert lock
    db_conn = ibbdlib.get_db_conn(**db_server)
    # while True:
    #     try:
    #         is_inserting = False
    #         for row in db_conn.iter("SHOW PROCESSLIST"):
    #             if row.Info:
    #                 if row.Info.encode('utf-8', 'ignore').find('top_item_trade') > -1:
    #                     is_inserting = True
    #         if is_inserting:
    #             print 'processing insert lock, wait for 10 seconds'
    #             time.sleep(10)
    #         else:
    #             print 'no insert lock'
    #             break
    #     except:
    #         traceback.print_exc()
    try:
        # db_conn.execute(
        #     "ALTER TABLE topspider.top_item_trade RENAME topspider.top_item_trade_his_%s" % fetch_date_short)
        # db_conn.execute(
        #     "CREATE TABLE topspider.top_item_trade LIKE topspider.top_item_trade_his_%s" % fetch_date_short)
        for i in range(split_tb_num):
            table_name = "top_item_trade_his_%s_%d" % (datetime.now().strftime("%Y%m%d"), i + 1)
            db_conn.execute("CREATE TABLE topspider.%s LIKE topspider.top_item_trade" % table_name)
        db_conn.execute("TRUNCATE TABLE topspider.top_itemsearchresult")
    except:
        traceback.print_exc()
    db_conn.close()

    # rename schedule name
    if redis_conn.scard("Schedule_itemTrade2_bak") > 0:
        redis_conn.rename("Schedule_itemTrade2_bak", "Schedule_itemTrade2")
        pass
    del redis_conn
Example #22
0
def split_topspider_table(full_table):
    db_conn = ibbdlib.get_db_conn(host='223.4.155.172', user='******', passwd='spider')
    cmd_str = "SELECT DISTINCT(shop_id) AS shop_id FROM %s" % (full_table)
    sids = []
    for row in db_conn.iter(cmd_str):
        sids.append(row.shop_id)
    length = len(sids) / 100
    i = 1
    sid_dict = dict()
    for sid in sids:
        if sid_dict.get(i):
            if len(sid_dict[i]) < length:
                sid_dict[i].append(sid)
            else:
                i +=1
                sid_dict[i] = [sid]
        else:
            sid_dict[i] = [sid]
    for k in sid_dict.keys():
        print k, len(sid_dict[k])
        db_conn.executemany("INSERT IGNORE INTO topdata.top_trade_dist(table_name, shop_id) values(%s, %s)", [['top_item_trade_%d'%k, sid] for sid in sid_dict[k]])
    db_conn.close()
Example #23
0
def _prepar_trade_date():
    # rename schedule
    redis_conn = ibbdlib.get_redis_conn()
    if redis_conn.scard('Schedule_itemTrade2') > 0:
        redis_conn.rename('Schedule_itemTrade2', 'Schedule_itemTrade2_bak')

    # insert lock
    db_conn = ibbdlib.get_db_conn(**db_server)
    while 1:
        try:
            is_inserting = False
            for row in db_conn.iter("SHOW PROCESSLIST"):
                if row.Info:
                    if row.Info.encode('utf-8', 'ignore').find('top_item_trade') > -1:
                        is_inserting = True
            if is_inserting:
                print 'processing insert lock, wait for 10 seconds'
                time.sleep(10)
            else:
                print 'no insert lock'
                break
        except:
            traceback.print_exc()
    try:
        db_conn.execute(
            "ALTER TABLE topspider.top_item_trade RENAME topspider.top_item_trade_his_%s" % fetch_date_short)
        db_conn.execute(
            "CREATE TABLE topspider.top_item_trade LIKE topspider.top_item_trade_his_%s" % fetch_date_short)
    except:
        traceback.print_exc()
    db_conn.close()

    # rename schedule name
    if redis_conn.scard('Schedule_itemTrade2_bak') > 0:
        redis_conn.rename('Schedule_itemTrade2_bak', 'Schedule_itemTrade2')
    del redis_conn
import ibbdlib
import ibbdlib.tornadolib

from dateutil.parser import parse as dateparser

keyword = '香港虚拟主机'
keyword_en = 'hongkongvirtualhost'
from_date_str = '2012-12-09'
to_date_str = '2012-12-20'
from_date = dateparser(from_date_str)
to_date = dateparser(to_date_str)
date_range_str = [(from_date + timedelta(i)).strftime('%Y%m%d') for i in range((to_date - from_date).days + 1)]

db_server = {'host': '223.4.155.152', 'user': '******', 'passwd': 'spider'}
db_conn = ibbdlib.get_db_conn(**db_server)

try:
    trade_source_table_list = []
    for date_str in date_range_str:
        select_query = \
            "SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA='topdata' AND TABLE_NAME LIKE %s ORDER BY TABLE_NAME"
        for table_name in db_conn.iter(select_query, 'top_item_trade_his_%s%%' % date_str):
            print table_name.TABLE_NAME
            trade_source_table_list.append(table_name.TABLE_NAME)
    if db_server['host'].find('152') != -1:
        trade_source_table_list.extend(['top_item_trade_his_%d' % (i+1) for i in range(26)])
except Exception, e:
    traceback.print_exc()
db_conn.close()
Example #25
0
#!/usr/bin/python2.7
# -*- coding: utf-8 -*-

import MySQLdb
import ibbdlib

# db_conn = MySQLdb.connect(host='223.4.155.172', user='******', passwd='spider')
db_conn = ibbdlib.get_db_conn(host='223.4.155.172',
                              user='******',
                              passwd='spider')
# cursor = db_conn.cursor()
cursor = db_conn._db.cursor()
try:
    print cursor.callproc('ibbd2.sp_insert_date', tuple(['2012-12-12']))
except Exception, e:
    print e
finally:
    cursor.close()
    db_conn.close()
Example #26
0
        'host': '223.4.155.152',
        'user': '******',
        'passwd': 'spider',
        'port': 3306,
    },
    '223.4.155.172': {
        'host': '223.4.155.172',
        'user': '******',
        'passwd': 'spider',
        'port': 3306,
    }
}

if __name__ == '__main__':
    print 'start...'
    db_conn = ibbdlib.get_db_conn(**db_server['223.4.155.172'])
    db_dml_connpool = ibbdlib.connectionpool.ConnectionPool(
        connection_class=MySQLdb.connections.Connection,
        max_connections=100,
        **db_server['223.4.155.172'])
    iids = [
        row.item_id for row in db_conn.iter(
            "SELECT DISTINCT item_id FROM topspider.top_itemsearchresult WHERE keyword='{}'"
            .format(keyword))
    ]
    iids_errors = []
    for i, iid in enumerate(iids):
        print '%d of %d %s' % (i + 1, len(iids), iid)
        try:

            # (data, t1, t2) = getTrades3('http://item.taobao.com/item.htm?id=%s' % iid,
Example #27
0
def save_log_file(log):
    db_conn = ibbdlib.get_db_conn()
    with open(log) as f:
        # db_conn.executemany("INSERT IGNORE INTO ibbdlog.ibbd_log values(%s)"%(','.join(['%s']*8)), map(lambda row: row.replace('\n','').split('\t') + [hashlib.md5(row.split('\t')[0]+row.split('\t')[6]).hexdigest()] if len(row.split('\t')) >= 7, open(log).readlines()))
        db_conn.executemany("INSERT IGNORE INTO ibbdlog.ibbd_log values(%s)"%(','.join(['%s']*8)), [row.replace('\n','').split('\t') + [hashlib.md5(row.split('\t')[0]+row.split('\t')[6]).hexdigest()] for row in open(log).readlines() if len(row.split('\t')) >= 7])
    db_conn.close()
Example #28
0
def _get_db_server():
    return ibbdlib.get_db_conn(**db_server['223.4.155.172'])
Example #29
0
def master():
    db_conn = ibbdlib.get_db_conn(**db_server['223.4.155.172'])
    db_conn.execute("TRUNCATE TABLE ibbd2.lib_cat_industry_keyword_2")
    db_conn.close()
    ibbdlib.migratetable.migrate_table(
        db_server['223.4.155.152'],
        db_server['223.4.155.172'],
        'ibbd2.user2')
    ibbdlib.migratetable.migrate_table(
        db_server['223.4.155.152'],
        db_server['223.4.155.172'],
        'ibbd2.user_keywords')
    ibbdlib.migratetable.migrate_table(
        db_server['223.4.155.152'],
        db_server['223.4.155.172'],
        'ibbd2.lib_cat_industry_keyword_2')
    ibbdlib.migratetable.migrate_table_by_query(
        db_server['223.4.155.152'],
        db_server['223.4.155.172'],
        'ibbd2.user_shop_monitor_items',
        """SELECT user, shop_type,
        (CASE WHEN shop_id='' THEN 0 ELSE shop_id END) shop_id,
        user_num_id, item_id, status,
        population_tsmp
        FROM ibbd2.user_shop_monitor_items"""
    )

    # keep connection

    dbConnPool = None
    reConn = None
    while True:
        try:
            dbConnPool = ConnectionPool(max_connections=1000,
                                        host='localhost',
                                        user='******',
                                        passwd='ibbd_etl_secure',
                                        db='topspider',
                                        charset='utf8')
            # reConn = getRedisConn2()
            reConn = ibbdlib.get_redis_conn(**redis_server)
            break
        except Exception as e:
            print e
    runKeywordsSchedule(reConn, dbConnPool)
    _ensure_schedule_complete(reConn)
    dbConnPool.disconnect()
    dbConnPool = ConnectionPool(max_connections=1000,
                                host='localhost',
                                user='******',
                                passwd='ibbd_etl_secure',
                                db='topspider',
                                charset='utf8')
    while True:
        try:
            # migrateScheduleToRedis(reConn, dbConnPool)
            break
        except:
            traceback.print_exc()
    dbConnPool.disconnect()
    del reConn
Example #30
0
db_server = {'223.4.155.152': {
    'host': '223.4.155.152',
    'user': '******',
    'passwd': 'spider',
    'port': 3306,
    }, '223.4.155.172': {
    'host': '223.4.155.172',
    'user': '******',
    'passwd': 'spider',
    'port': 3306,
    }}

if __name__ == '__main__':
    print 'start...'
    db_conn = ibbdlib.get_db_conn(**db_server['223.4.155.172'])
    db_dml_connpool = ibbdlib.connectionpool.ConnectionPool(connection_class=MySQLdb.connections.Connection, 
        max_connections=100, **db_server['223.4.155.172'])
    iids = [row.item_id for row in db_conn.iter("SELECT DISTINCT item_id FROM topspider.top_itemsearchresult WHERE keyword='{}'".format(keyword))]
    iids_errors = []
    for i, iid in enumerate(iids):
        print '%d of %d %s' % (i+1, len(iids), iid)
        try:
            
            # (data, t1, t2) = getTrades3('http://item.taobao.com/item.htm?id=%s' % iid, 
            #     fromTsmp='1900/1/1', 
            #     toTsmp='2013/1/18',
            #     reqinterval=0.2)
            # saveTopTrade(data, connpool=db_dml_connpool, table='topspider.top_item_trade_hongkongvps')
            # print len(data)
            topitem = TopItem('http://item.taobao.com/item.htm?id=%s' % iid).getFullInfo()
Example #31
0
#!/usr/bin/python2.7
# -*- coding: utf-8 -*-

import MySQLdb
import ibbdlib

# db_conn = MySQLdb.connect(host='223.4.155.172', user='******', passwd='spider')
db_conn = ibbdlib.get_db_conn(host='223.4.155.172', user='******', passwd='spider')
# cursor = db_conn.cursor()
cursor = db_conn._db.cursor()
try:
    print cursor.callproc('ibbd2.sp_insert_date', tuple(['2012-12-12']))
except Exception, e:
    print e
finally:
    cursor.close()
    db_conn.close()
Example #32
0
def _get_db_server():
    return ibbdlib.get_db_conn(**db_server['223.4.155.172'])
#!/usr/bin/python2.7
# -*- coding: utf-8 -*-

import ibbdlib
import ibbdlib.redislib
from datetime import datetime, timedelta

cmd_str = "SELECT item_id, MAX(date_of) AS date_of FROM ibbd2.top_item2 GROUP BY item_id"

commands = []
db_conn = ibbdlib.get_db_conn()
for row in db_conn.iter(cmd_str):
    commands.append(['HSET', 'TopItemTradeUpdateTime', str(row.item_id), (row.date_of+timedelta(1)).strftime('%Y/%m/%d')])
print commands[0]
db_conn.close()
re_conn = ibbdlib.get_redis_conn()
ibbdlib.redislib.redis_pipe(re_conn, commands)
del re_conn
Example #34
0
def _get_db_conn():
    return ibbdlib.get_db_conn(**db_server)
Example #35
0
#!/usr/bin/python2.7
# -*- coding: utf-8 -*-

import re
import requests

import ibbdlib

site = 'http://www.taobao.com/'

req = requests.get(site)
site_encoding = req.encoding
site_content = req.text.encode('utf-8')
meta_category_items = re.findall(r'<div\s+class\="category-item\s+.+?">.+?</div>', site_content, re.S)
category_items = []
for category_item in meta_category_items:
    cat = re.findall(r'<img.+?alt\=\"(.+?)\"', category_item, re.S)[0]
    industrys = re.findall(r'<h5>\s*<a.+?>(.+?)<', category_item, re.S)
    category_items.extend([[cat, t] for t in industrys])

db_conn = ibbdlib.get_db_conn(**{'host': '223.4.155.152', 'user': '******', 'passwd': 'spider'})
try:
    db_conn.executemany("INSERT IGNORE INTO ibbd2.lib_industry_3 values(%s, %s)", category_items)
except Exception, e:
    print e
finally:
    db_conn.close()
#!/usr/bin/python2.7
# -*- coding: utf-8 -*-

import ibbdlib
import ibbdlib.redislib
from datetime import datetime, timedelta

cmd_str = "SELECT item_id, MAX(date_of) AS date_of FROM ibbd2.top_item2 GROUP BY item_id"

commands = []
db_conn = ibbdlib.get_db_conn()
for row in db_conn.iter(cmd_str):
    commands.append([
        'HSET', 'TopItemTradeUpdateTime',
        str(row.item_id), (row.date_of + timedelta(1)).strftime('%Y/%m/%d')
    ])
print commands[0]
db_conn.close()
re_conn = ibbdlib.get_redis_conn()
ibbdlib.redislib.redis_pipe(re_conn, commands)
del re_conn
Example #37
0
import re
import requests

import ibbdlib

site = 'http://www.taobao.com/'

req = requests.get(site)
site_encoding = req.encoding
site_content = req.text.encode('utf-8')
meta_category_items = re.findall(
    r'<div\s+class\="category-item\s+.+?">.+?</div>', site_content, re.S)
category_items = []
for category_item in meta_category_items:
    cat = re.findall(r'<img.+?alt\=\"(.+?)\"', category_item, re.S)[0]
    industrys = re.findall(r'<h5>\s*<a.+?>(.+?)<', category_item, re.S)
    category_items.extend([[cat, t] for t in industrys])

db_conn = ibbdlib.get_db_conn(**{
    'host': '223.4.155.152',
    'user': '******',
    'passwd': 'spider'
})
try:
    db_conn.executemany(
        "INSERT IGNORE INTO ibbd2.lib_industry_3 values(%s, %s)",
        category_items)
except Exception, e:
    print e
finally:
    db_conn.close()
Example #38
0
import ibbdlib
import ibbdlib.tornadolib

from dateutil.parser import parse as dateparser

keyword = '香港虚拟主机'
keyword_en = 'hongkongvirtualhost'
from_date_str = '2012-12-09'
to_date_str = '2012-12-20'
from_date = dateparser(from_date_str)
to_date = dateparser(to_date_str)
date_range_str = [(from_date + timedelta(i)).strftime('%Y%m%d')
                  for i in range((to_date - from_date).days + 1)]

db_server = {'host': '223.4.155.152', 'user': '******', 'passwd': 'spider'}
db_conn = ibbdlib.get_db_conn(**db_server)

try:
    trade_source_table_list = []
    for date_str in date_range_str:
        select_query = \
            "SELECT TABLE_NAME FROM information_schema.TABLES WHERE TABLE_SCHEMA='topdata' AND TABLE_NAME LIKE %s ORDER BY TABLE_NAME"
        for table_name in db_conn.iter(select_query,
                                       'top_item_trade_his_%s%%' % date_str):
            print table_name.TABLE_NAME
            trade_source_table_list.append(table_name.TABLE_NAME)
    if db_server['host'].find('152') != -1:
        trade_source_table_list.extend(
            ['top_item_trade_his_%d' % (i + 1) for i in range(26)])
except Exception, e:
    traceback.print_exc()
#!/usr/bin/python2.7
# -*- coding: utf-8 -*-

import traceback
import json
from collections import Counter

import ibbdlib

db_conn = ibbdlib.get_db_conn(host="223.4.155.152", user="******", passwd="spider")
# remote_db_conn = ibbdlib.get_db_conn(host='223.4.155.172', user='******', passwd='spider')


def magic():
    query = """SELECT T1.user, T1.keyword, T2.cat, T2.industry
    FROM ibbd2.user_keywords T1
    LEFT JOIN ibbd2.lib_cat_industry_keyword_2 T2
    ON T1.keyword = T2.keyword
    WHERE T1.status='1'
    AND T1.industry=''
    AND T2.keyword IS NOT NULL"""
    user_industry = dict()
    for row in db_conn.iter(query):
        user = row.user.encode("utf-8")
        industry = "{0}-{1}".format(row.cat.encode("utf-8"), row.industry.encode("utf-8"))
        if user_industry.get(user):
            user_industry[user].append(industry)
        else:
            user_industry[user] = [industry]
    for user in user_industry.keys():
        c = Counter()