Esempio n. 1
0
def master():
    db_conn = ibbdlib.get_db_conn(**db_server['223.4.155.172'])
    db_conn.execute("TRUNCATE TABLE ibbd2.lib_cat_industry_keyword_2")
    db_conn.close()
    ibbdlib.migratetable.migrate_table(db_server['223.4.155.152'],
                                       db_server['223.4.155.172'],
                                       'ibbd2.user2')
    ibbdlib.migratetable.migrate_table(db_server['223.4.155.152'],
                                       db_server['223.4.155.172'],
                                       'ibbd2.user_keywords')
    ibbdlib.migratetable.migrate_table(db_server['223.4.155.152'],
                                       db_server['223.4.155.172'],
                                       'ibbd2.lib_cat_industry_keyword_2')
    ibbdlib.migratetable.migrate_table_by_query(
        db_server['223.4.155.152'], db_server['223.4.155.172'],
        'ibbd2.user_shop_monitor_items', """SELECT user, shop_type,
        (CASE WHEN shop_id='' THEN 0 ELSE shop_id END) shop_id,
        user_num_id, item_id, status,
        population_tsmp
        FROM ibbd2.user_shop_monitor_items""")

    # keep connection

    dbConnPool = None
    reConn = None
    while True:
        try:
            dbConnPool = ConnectionPool(max_connections=1000,
                                        host='localhost',
                                        user='******',
                                        passwd='ibbd_etl_secure',
                                        db='topspider',
                                        charset='utf8')
            # reConn = getRedisConn2()
            reConn = ibbdlib.get_redis_conn(**redis_server)
            break
        except Exception as e:
            print e
    runKeywordsSchedule(reConn, dbConnPool)
    _ensure_schedule_complete(reConn)
    dbConnPool.disconnect()
    dbConnPool = ConnectionPool(max_connections=1000,
                                host='localhost',
                                user='******',
                                passwd='ibbd_etl_secure',
                                db='topspider',
                                charset='utf8')
    while True:
        try:
            # migrateScheduleToRedis(reConn, dbConnPool)
            break
        except:
            traceback.print_exc()
    dbConnPool.disconnect()
    del reConn
Esempio n. 2
0
def main():
    connpool = ConnectionPool(max_connections=20, **{
        'host': 'localhost',
        'user': '******',
        'passwd': '',
        'db': 'ibbd2',
        })
    reConn = getRedisConn2()
    test_schedule_cat(reConn, connpool)
    del reConn
    connpool.disconnect()
Esempio n. 3
0
def _reload_slave_db_pool(schedule_name):
    global RE_CONN, DB_POOLS
    host = None
    while 1:
        try:
            db_server = json.loads(RE_CONN.get('slave_db_server'))
            db_server = db_server.get(schedule_name) or \
                db_server.get('default') or \
                dict((k, v) for k, v in db_server.items() if not isinstance(v, dict))
            host = db_server.get('host')
            if not DB_POOLS.get(db_server.get('host')):
                log.info('add slave db server %s' % db_server.get('host'))
                DB_POOLS[db_server.get('host')] = ConnectionPool(
                    max_connections=1000, **db_server)
            break
        except:
            traceback.print_exc()
            time.sleep(10)
    return DB_POOLS.get(host)
Esempio n. 4
0
def prepar():
    # get slave ID
    global SLAVEID
    SLAVEID = getMacId()

    # get slave ID
    global IP
    IP = getMyIpInfo()['data']['ip']

    global log
    log_output = (slave_config.get(IP)['slave_info_log']
                  if slave_config.get(IP) else
                  slave_config.get('default')['slave_info_log'])
    log = ibbdlib.ibbdlog.get_logger(
        log_path='../log/',
        log_file='slave %s %s.log' %
        (PID, time.strftime('%Y%m%d', time.localtime())),
        log_name='spider.slave',
        msg_format=
        '%(asctime)s\t%(name)s\t%(process)d\t%(levelname)s\t%(message)s',
        output=log_output)

    _slave_info('initing...')

    # connection pool
    conn_config = slave_config.get(IP) or slave_config.get('default')
    conn_config = slave_config.get('default')
    dbConnPool = ConnectionPool(
        max_connections=1000,
        **dict((k, v) for k, v in conn_config.get('mysql').items()
               if not isinstance(v, dict)))
    reConn = getRedisConn2(conn_config.get('redis'))
    _slave_info('init... complete')
    _slave_info('slave start to work')

    global DB_POOLS, RE_CONN
    DB_POOLS[conn_config.get('mysql')['host']] = dbConnPool
    RE_CONN = reConn
Esempio n. 5
0
def main():
    _slave_info('initing...')

    # get slave ID

    global SLAVEID
    SLAVEID = getMacId()

    # get slave ID

    global IP
    IP = getMyIpInfo()['data']['ip']

    # connection pool

    conn_config = slave_config.get('IP') or slave_config.get('default')
    conn_config = slave_config.get('default')
    dbConnPool = ConnectionPool(max_connections=1000,
                                **conn_config.get('mysql'))
    reConn = getRedisConn2(conn_config.get('redis'))
    _slave_info('init... complete')
    _slave_info('slave start to work')
    dbConnPool.disconnect()
    del reConn
Esempio n. 6
0
@desc   library
'''

import re
import time
import json
import uuid

import requests
import MySQLdb
import redis
from config import spider_header, mysql_db_config, redis_config, cookies
from dfhspider.connectionpool import ConnectionPool

SLAVEID = uuid.getnode()
connpool = ConnectionPool(max_connections=20, **mysql_db_config)


def getMyIpInfo():
    refer = 'http://ip.taobao.com/service/getIpInfo.php'
    try:
        r = requests.request('POST', refer, data={'ip': 'myip'})
        j = json.loads(r.text, encoding=r.encoding)
    except Exception, e:
        j = {'error': e.message}
    return j


try:
    IP = (getMyIpInfo().get('data') or {}).get('ip') or ''
except:
Esempio n. 7
0
import ibbdlib
from dfhspider.app import getItemsByKeyword
from dfhspider.connectionpool import ConnectionPool
from dfhspider.topbll import saveKeywordSearchResult

db_server = {
    'host': 'localhost',
    'user': '******',
    'passwd': 'ibbd_etl_secure',
    'db': 'ibbd2',
    'charset': 'utf8'
}

now = datetime.now()

db_pool = ConnectionPool(**db_server)
db_conn_test = db_pool.get_connection()
db_conn_test.close()

db_conn = ibbdlib.get_db_conn(**db_server)
query = """SELECT DISTINCT T2.keyword
    FROM(
        SELECT DISTINCT(keyword) AS keyword
        FROM ibbd2.user_keywords T3
        WHERE T3.status='1'
        ORDER BY keyword
    ) T2
    LEFT JOIN(
        SELECT DISTINCT(keyword) AS keyword
        FROM topspider.top_itemsearchresult T1
        WHERE T1.population_tsmp > curdate()
Esempio n. 8
0
def main():

    # get log

    global log
    log = _get_log()

    # slave info

    _slave_info('initing...')

    # get slave ID

    global SLAVEID
    SLAVEID = getMacId()

    # get slave ID

    global IP
    IP = getMyIpInfo()['data']['ip']

    # connection pool

    conn_config = slave_config.get('IP') or slave_config.get('default')

    # dbConnPool = ConnectionPool(max_connections=1000, **conn_config.get('mysql'))

    dbConnPoolList = dict()
    for (db_server_ip, db_server_config) in db_server.items():
        dbConnPoolList[db_server_ip] = ConnectionPool(max_connections=1000, **db_server_config)
    dbConnPoolList['master'] = ConnectionPool(max_connections=1000, **db_server_master)
    reConn = getRedisConn2(conn_config.get('redis'))
    _slave_info('init... complete')
    _slave_info('slave start to work')

    # thread pool

    threadPool = []
    while 1:
        while 1:
            try:
                _slave_activity(reConn)
                _slave_interval(reConn, 10, 10)
                break
            except Exception, e:
                pass
        for priority in sorted(SCHEDULE_PRIORITY.keys()):

            # clear inactive Thread

            threadPool = [childThread for childThread in threadPool if childThread.isAlive()]
            for schedule in SCHEDULE_PRIORITY[priority]:
                log.debug('schedule %s', schedule)

                # how schedule should run
                # 1.this schedule exists
                # 2.schedule not active
                # 3.current thread
                # final:call schedule function by name

                if reConn.exists(schedule) and schedule not in [t.name for t in threadPool] and sum(map(lambda t: t \
                        in [otherSchedule for otherSchedule in SCHEDULE_PRIORITY[priority] if otherSchedule
                        != schedule], [thread.name for thread in threadPool])) == len(threadPool):
                    threadPool.append(threading.Thread(target=globals()[THREAD_TARGET[schedule]], name=schedule,
                                      args=(reConn, dbConnPoolList)))
                    threadPool[-1].start()
        time.sleep(10)
        log.debug('thread waiting for %d', 30)