def main(): connpool = ConnectionPool(max_connections=20, **{ 'host': 'localhost', 'user': '******', 'passwd': '', 'db': 'ibbd2', }) reConn = getRedisConn2() test_schedule_cat(reConn, connpool) del reConn connpool.disconnect()
def master(): db_conn = ibbdlib.get_db_conn(**db_server['223.4.155.172']) db_conn.execute("TRUNCATE TABLE ibbd2.lib_cat_industry_keyword_2") db_conn.close() ibbdlib.migratetable.migrate_table(db_server['223.4.155.152'], db_server['223.4.155.172'], 'ibbd2.user2') ibbdlib.migratetable.migrate_table(db_server['223.4.155.152'], db_server['223.4.155.172'], 'ibbd2.user_keywords') ibbdlib.migratetable.migrate_table(db_server['223.4.155.152'], db_server['223.4.155.172'], 'ibbd2.lib_cat_industry_keyword_2') ibbdlib.migratetable.migrate_table_by_query( db_server['223.4.155.152'], db_server['223.4.155.172'], 'ibbd2.user_shop_monitor_items', """SELECT user, shop_type, (CASE WHEN shop_id='' THEN 0 ELSE shop_id END) shop_id, user_num_id, item_id, status, population_tsmp FROM ibbd2.user_shop_monitor_items""") # keep connection dbConnPool = None reConn = None while True: try: dbConnPool = ConnectionPool(max_connections=1000, host='localhost', user='******', passwd='ibbd_etl_secure', db='topspider', charset='utf8') # reConn = getRedisConn2() reConn = ibbdlib.get_redis_conn(**redis_server) break except Exception as e: print e runKeywordsSchedule(reConn, dbConnPool) _ensure_schedule_complete(reConn) dbConnPool.disconnect() dbConnPool = ConnectionPool(max_connections=1000, host='localhost', user='******', passwd='ibbd_etl_secure', db='topspider', charset='utf8') while True: try: # migrateScheduleToRedis(reConn, dbConnPool) break except: traceback.print_exc() dbConnPool.disconnect() del reConn
def main(): _slave_info('initing...') # get slave ID global SLAVEID SLAVEID = getMacId() # get slave ID global IP IP = getMyIpInfo()['data']['ip'] # connection pool conn_config = slave_config.get('IP') or slave_config.get('default') conn_config = slave_config.get('default') dbConnPool = ConnectionPool(max_connections=1000, **conn_config.get('mysql')) reConn = getRedisConn2(conn_config.get('redis')) _slave_info('init... complete') _slave_info('slave start to work') dbConnPool.disconnect() del reConn
def _reload_slave_db_pool(schedule_name): global RE_CONN, DB_POOLS host = None while 1: try: db_server = json.loads(RE_CONN.get('slave_db_server')) db_server = db_server.get(schedule_name) or \ db_server.get('default') or \ dict((k, v) for k, v in db_server.items() if not isinstance(v, dict)) host = db_server.get('host') if not DB_POOLS.get(db_server.get('host')): log.info('add slave db server %s' % db_server.get('host')) DB_POOLS[db_server.get('host')] = ConnectionPool( max_connections=1000, **db_server) break except: traceback.print_exc() time.sleep(10) return DB_POOLS.get(host)
def prepar(): # get slave ID global SLAVEID SLAVEID = getMacId() # get slave ID global IP IP = getMyIpInfo()['data']['ip'] global log log_output = (slave_config.get(IP)['slave_info_log'] if slave_config.get(IP) else slave_config.get('default')['slave_info_log']) log = ibbdlib.ibbdlog.get_logger( log_path='../log/', log_file='slave %s %s.log' % (PID, time.strftime('%Y%m%d', time.localtime())), log_name='spider.slave', msg_format= '%(asctime)s\t%(name)s\t%(process)d\t%(levelname)s\t%(message)s', output=log_output) _slave_info('initing...') # connection pool conn_config = slave_config.get(IP) or slave_config.get('default') conn_config = slave_config.get('default') dbConnPool = ConnectionPool( max_connections=1000, **dict((k, v) for k, v in conn_config.get('mysql').items() if not isinstance(v, dict))) reConn = getRedisConn2(conn_config.get('redis')) _slave_info('init... complete') _slave_info('slave start to work') global DB_POOLS, RE_CONN DB_POOLS[conn_config.get('mysql')['host']] = dbConnPool RE_CONN = reConn
def master(): db_conn = ibbdlib.get_db_conn(**db_server['223.4.155.172']) db_conn.execute("TRUNCATE TABLE ibbd2.lib_cat_industry_keyword_2") db_conn.close() ibbdlib.migratetable.migrate_table( db_server['223.4.155.152'], db_server['223.4.155.172'], 'ibbd2.user2') ibbdlib.migratetable.migrate_table( db_server['223.4.155.152'], db_server['223.4.155.172'], 'ibbd2.user_keywords') ibbdlib.migratetable.migrate_table( db_server['223.4.155.152'], db_server['223.4.155.172'], 'ibbd2.lib_cat_industry_keyword_2') ibbdlib.migratetable.migrate_table_by_query( db_server['223.4.155.152'], db_server['223.4.155.172'], 'ibbd2.user_shop_monitor_items', """SELECT user, shop_type, (CASE WHEN shop_id='' THEN 0 ELSE shop_id END) shop_id, user_num_id, item_id, status, population_tsmp FROM ibbd2.user_shop_monitor_items""" ) # keep connection dbConnPool = None reConn = None while True: try: dbConnPool = ConnectionPool(max_connections=1000, host='localhost', user='******', passwd='ibbd_etl_secure', db='topspider', charset='utf8') # reConn = getRedisConn2() reConn = ibbdlib.get_redis_conn(**redis_server) break except Exception as e: print e runKeywordsSchedule(reConn, dbConnPool) _ensure_schedule_complete(reConn) dbConnPool.disconnect() dbConnPool = ConnectionPool(max_connections=1000, host='localhost', user='******', passwd='ibbd_etl_secure', db='topspider', charset='utf8') while True: try: # migrateScheduleToRedis(reConn, dbConnPool) break except: traceback.print_exc() dbConnPool.disconnect() del reConn
@desc library ''' import re import time import json import uuid import requests import MySQLdb import redis from config import spider_header, mysql_db_config, redis_config, cookies from dfhspider.connectionpool import ConnectionPool SLAVEID = uuid.getnode() connpool = ConnectionPool(max_connections=20, **mysql_db_config) def getMyIpInfo(): refer = 'http://ip.taobao.com/service/getIpInfo.php' try: r = requests.request('POST', refer, data={'ip': 'myip'}) j = json.loads(r.text, encoding=r.encoding) except Exception, e: j = {'error': e.message} return j try: IP = (getMyIpInfo().get('data') or {}).get('ip') or '' except:
import ibbdlib from dfhspider.app import getItemsByKeyword from dfhspider.connectionpool import ConnectionPool from dfhspider.topbll import saveKeywordSearchResult db_server = { 'host': 'localhost', 'user': '******', 'passwd': 'ibbd_etl_secure', 'db': 'ibbd2', 'charset': 'utf8' } now = datetime.now() db_pool = ConnectionPool(**db_server) db_conn_test = db_pool.get_connection() db_conn_test.close() db_conn = ibbdlib.get_db_conn(**db_server) query = """SELECT DISTINCT T2.keyword FROM( SELECT DISTINCT(keyword) AS keyword FROM ibbd2.user_keywords T3 WHERE T3.status='1' ORDER BY keyword ) T2 LEFT JOIN( SELECT DISTINCT(keyword) AS keyword FROM topspider.top_itemsearchresult T1 WHERE T1.population_tsmp > curdate()
def main(): # get log global log log = _get_log() # slave info _slave_info('initing...') # get slave ID global SLAVEID SLAVEID = getMacId() # get slave ID global IP IP = getMyIpInfo()['data']['ip'] # connection pool conn_config = slave_config.get('IP') or slave_config.get('default') # dbConnPool = ConnectionPool(max_connections=1000, **conn_config.get('mysql')) dbConnPoolList = dict() for (db_server_ip, db_server_config) in db_server.items(): dbConnPoolList[db_server_ip] = ConnectionPool(max_connections=1000, **db_server_config) dbConnPoolList['master'] = ConnectionPool(max_connections=1000, **db_server_master) reConn = getRedisConn2(conn_config.get('redis')) _slave_info('init... complete') _slave_info('slave start to work') # thread pool threadPool = [] while 1: while 1: try: _slave_activity(reConn) _slave_interval(reConn, 10, 10) break except Exception, e: pass for priority in sorted(SCHEDULE_PRIORITY.keys()): # clear inactive Thread threadPool = [childThread for childThread in threadPool if childThread.isAlive()] for schedule in SCHEDULE_PRIORITY[priority]: log.debug('schedule %s', schedule) # how schedule should run # 1.this schedule exists # 2.schedule not active # 3.current thread # final:call schedule function by name if reConn.exists(schedule) and schedule not in [t.name for t in threadPool] and sum(map(lambda t: t \ in [otherSchedule for otherSchedule in SCHEDULE_PRIORITY[priority] if otherSchedule != schedule], [thread.name for thread in threadPool])) == len(threadPool): threadPool.append(threading.Thread(target=globals()[THREAD_TARGET[schedule]], name=schedule, args=(reConn, dbConnPoolList))) threadPool[-1].start() time.sleep(10) log.debug('thread waiting for %d', 30)