def request_workflow_thread(): while True: try: priority,requestInfo,param = requestQueue.get(block=True, timeout=10) requestQueue.task_done() logger.debug('PriorityQueue size {}'.format(requestQueue.qsize())) except Exception as e: logger.exception(sys.exc_info()) logger.error('request_workflow_thread queue empty') break if requestInfo and requestInfo.requestName: logger.debug('run {} params {}'.format(requestInfo.requestName,requestInfo.requestUrl)) if requestInfo.requestName == 'down_media_file': results = eval(requestInfo.requestName)(requestInfo.requestUrl,param) else: results=eval(requestInfo.requestName)(requestInfo.requestUrl) if isgeneratorfunction(eval(requestInfo.requestName)): logger.debug('isgeneratorfunction {} true'.format(requestInfo.requestName)) for resultInfo,result in results: if resultInfo.status == 999: logger.debug('PriorityQueue put {},{} '.format(resultInfo.requestName,resultInfo.requestUrl)) requestQueue.put((PRIORITYDEFINE[resultInfo.requestName],resultInfo,result)) else: if isinstance(result, MediaInfo) or isinstance(result, MediaInfo) or isinstance(result, ArtistInfo): dbsession.add(result) request_info_update_insert(requestInfo) try: dbsession.commit() except Exception as e: logger.exception(sys.exc_info()) logger.error('dbsession error') dbsession.rollback() else: logger.debug('isgeneratorfunction {} false'.format(requestInfo.requestName)) resultInfo, result = results if isinstance(result, MediaInfo) or isinstance(result, MediaInfo) or isinstance(result, ArtistInfo): dbsession.add(result) request_info_update_insert(requestInfo) if resultInfo.status != 999: logger.debug('dbsession add resultInfo {}'.format(resultInfo.urlId)) else: logger.debug('PriorityQueue put {},{} '.format(resultInfo.requestName, resultInfo.requestUrl)) requestQueue.put((PRIORITYDEFINE[resultInfo.requestName],resultInfo,result)) try: dbsession.commit() except Exception as e: logger.exception(sys.exc_info()) logger.error('dbsession error') dbsession.rollback() else: logger.error('request_workflow_thread requestInfo none') break
def get_media_workflow(songid): # 正常流程 jQuery = '17204780742719340729_1586053549318' item = '1586053553445' url = 'http://musicapi.taihe.com/v1/restserver/ting?method=baidu.ting.song.playAAC&format=jsonp&callback=jQuery{}&songid={}&from=web&_={}'.format(jQuery,songid,item) requestInfo,info = get_media_info_js_request(url) #成功获取媒体信息 if info: #请求信息入库 dbsession.add(requestInfo) # 媒体信息入库 dbsession.add(info) s_time = time.time() requestInfo,result = down_media_file(info.mediaUrl, info) e_time = time.time() # 更新媒体信息 下载状态 下载用时 info.downStatus = '00' info.downTime = e_time - s_time if requestInfo and requestInfo.status == 200: # 请求信息入库 dbsession.add(requestInfo) else: # 判断哪些请求入库 哪些请求信息入队列 if requestInfo.status == 0: dbsession.add(requestInfo) if requestInfo.status<0 and (requestInfo.runCnt + requestInfo.status)>0: dbsession.add(requestInfo) else: requestQueue.put([requestInfo.status,requestInfo]) else: # print(requestInfo.status, requestInfo.runCnt) if requestInfo and requestInfo.status == 0: dbsession.add(requestInfo) if requestInfo and requestInfo.status < 0 and (requestInfo.runCnt + requestInfo.status) > 0: dbsession.add(requestInfo) else: requestQueue.put([requestInfo.status, requestInfo]) logger.debug('requestQueue size {}'.format(requestQueue.qsize())) try: dbsession.commit() except Exception as e: dbsession.rollback() logger.exception(sys.exc_info())
def init_scrapy_work(): init_session_pool() result = dbsession.query(RequestInfo).first() if result is None: url = 'https://d.weibo.com/1087030002_2986_top' requestInfo = request_variable_init(url) requestInfo.requestName = 'weibo_http_get_navigation_page_list' requestQueue.put( (PRIORITYDEFINE[requestInfo.requestName], requestInfo, None)) else: logger.debug('scrapy reboot from db') infos = dbsession.query(RequestInfo).filter_by(status='999').all() for item in infos: logger.debug('scrapy reboot from db {}'.format(item)) requestQueue.put((1, item, None))
def test_scrapy_work(): artistDict = get_artist_list('http://music.taihe.com/artist', r'D:\project\python\pylib\artistjson.txt') # print(artistDict) # for k,v in artistDict.items(): # url = 'http://music.taihe.com/artist/{}'.format(k) # requestInfo = request_variable_init(url) # requestInfo.requestName = 'get_artist_music_list' # print(url,v) # requestQueue.put((PRIORITYDEFINE[requestInfo.requestName],requestInfo)) url = 'http://music.taihe.com/artist/{}'.format('2517') requestInfo = request_variable_init(url) requestInfo.requestName = 'get_artist_music_list' requestQueue.put((PRIORITYDEFINE[requestInfo.requestName], requestInfo, None)) scrapy_work()
def init_work(): result = dbsession.query(RequestInfo).first() if result is None: artistDict = get_artist_list('http://music.taihe.com/artist', r'D:\project\python\pylib\artistjson.txt') for k, v in artistDict.items(): url = 'http://music.taihe.com/artist/{}'.format(k) requestInfo = request_variable_init(url) requestInfo.requestName = 'get_artist_music_list' requestQueue.put((PRIORITYDEFINE[requestInfo.requestName], requestInfo, None)) print(requestQueue.qsize()) else: logger.debug('scrapy reboot from db') infos = dbsession.query(RequestInfo).filter_by( status = '999').all() for item in infos: logger.debug('scrapy reboot from db {}'.format(item)) requestQueue.put((1, item, None))
def request_workflow_thread(): while True: try: session = sessionQueue.get(block=True, timeout=10) sessionQueue.task_done() sessionQueue.put(session) priority, requestInfo, param = requestQueue.get(block=True, timeout=10) requestQueue.task_done() logger.debug('PriorityQueue size {}'.format(requestQueue.qsize())) except Exception as e: logger.exception(sys.exc_info()) logger.error('request_workflow_thread queue empty') break time.sleep(20) if requestInfo and requestInfo.requestName: logger.debug('run {} params {}'.format(requestInfo.requestName, requestInfo.requestUrl)) results = eval(requestInfo.requestName)(requestInfo.requestUrl, session) # 判断函数是否是生成器 if isgeneratorfunction(eval(requestInfo.requestName)): logger.debug('isgeneratorfunction {} true'.format( requestInfo.requestName)) for resultInfo, result in results: if resultInfo.status == 999: logger.debug('PriorityQueue put {},{} '.format( resultInfo.requestName, resultInfo.requestUrl)) requestQueue.put( (PRIORITYDEFINE[resultInfo.requestName], resultInfo, result)) else: if isinstance(result, list): for item in result: if isinstance(item, WeiboUser): dbsession.add(item) elif isinstance(result, WeiboUser): dbsession.add(result) request_info_update_insert(requestInfo) try: dbsession.commit() except Exception as e: logger.exception(sys.exc_info()) logger.error('dbsession error') dbsession.rollback() else: logger.debug('isgeneratorfunction {} false'.format( requestInfo.requestName)) resultInfo, result = results if isinstance(result, list): for item in result: if isinstance(item, WeiboUser): dbsession.add(item) elif isinstance(result, WeiboUser): dbsession.add(result) request_info_update_insert(requestInfo) if resultInfo.status != 999: logger.debug('dbsession add resultInfo {}'.format( resultInfo.urlId)) else: logger.debug('PriorityQueue put {},{} '.format( resultInfo.requestName, resultInfo.requestUrl)) requestQueue.put((PRIORITYDEFINE[resultInfo.requestName], resultInfo, result)) try: dbsession.commit() except Exception as e: logger.exception(sys.exc_info()) logger.error('dbsession error') dbsession.rollback() else: logger.error('request_workflow_thread requestInfo none') break