コード例 #1
0
ファイル: music.py プロジェクト: viviluhui/splider
def request_workflow_thread():
    while True:
        try:
            priority,requestInfo,param = requestQueue.get(block=True, timeout=10)
            requestQueue.task_done()
            logger.debug('PriorityQueue size {}'.format(requestQueue.qsize()))
        except Exception as e:
            logger.exception(sys.exc_info())
            logger.error('request_workflow_thread queue empty')
            break

        if requestInfo and requestInfo.requestName:
            logger.debug('run {} params {}'.format(requestInfo.requestName,requestInfo.requestUrl))
            if requestInfo.requestName == 'down_media_file':
                results = eval(requestInfo.requestName)(requestInfo.requestUrl,param)
            else:
                results=eval(requestInfo.requestName)(requestInfo.requestUrl)
            if isgeneratorfunction(eval(requestInfo.requestName)):
                logger.debug('isgeneratorfunction {} true'.format(requestInfo.requestName))
                for resultInfo,result in results:
                    if resultInfo.status == 999:
                        logger.debug('PriorityQueue put {},{} '.format(resultInfo.requestName,resultInfo.requestUrl))
                        requestQueue.put((PRIORITYDEFINE[resultInfo.requestName],resultInfo,result))
                    else:
                        if isinstance(result, MediaInfo) or isinstance(result, MediaInfo) or isinstance(result, ArtistInfo):
                            dbsession.add(result)
                    request_info_update_insert(requestInfo)
                    try:
                        dbsession.commit()
                    except Exception as e:
                        logger.exception(sys.exc_info())
                        logger.error('dbsession error')
                        dbsession.rollback()
            else:
                logger.debug('isgeneratorfunction {} false'.format(requestInfo.requestName))
                resultInfo, result = results

                if isinstance(result, MediaInfo) or isinstance(result, MediaInfo) or isinstance(result, ArtistInfo):
                    dbsession.add(result)

                request_info_update_insert(requestInfo)
                if resultInfo.status != 999:
                    logger.debug('dbsession add resultInfo {}'.format(resultInfo.urlId))
                else:
                    logger.debug('PriorityQueue put {},{} '.format(resultInfo.requestName, resultInfo.requestUrl))
                    requestQueue.put((PRIORITYDEFINE[resultInfo.requestName],resultInfo,result))

                try:
                    dbsession.commit()
                except Exception as e:
                    logger.exception(sys.exc_info())
                    logger.error('dbsession error')
                    dbsession.rollback()

        else:
            logger.error('request_workflow_thread requestInfo none')
            break
コード例 #2
0
ファイル: music.py プロジェクト: viviluhui/splider
def get_media_workflow(songid):
    # 正常流程
    jQuery = '17204780742719340729_1586053549318'
    item = '1586053553445'
    url = 'http://musicapi.taihe.com/v1/restserver/ting?method=baidu.ting.song.playAAC&format=jsonp&callback=jQuery{}&songid={}&from=web&_={}'.format(jQuery,songid,item)

    requestInfo,info = get_media_info_js_request(url)

    #成功获取媒体信息
    if info:
        #请求信息入库
        dbsession.add(requestInfo)
        # 媒体信息入库
        dbsession.add(info)

        s_time = time.time()
        requestInfo,result = down_media_file(info.mediaUrl, info)
        e_time = time.time()

        # 更新媒体信息 下载状态 下载用时
        info.downStatus = '00'
        info.downTime = e_time - s_time

        if requestInfo and requestInfo.status == 200:
            # 请求信息入库
            dbsession.add(requestInfo)
        else:
            # 判断哪些请求入库 哪些请求信息入队列
            if requestInfo.status == 0:
                dbsession.add(requestInfo)
            if requestInfo.status<0 and (requestInfo.runCnt + requestInfo.status)>0:
                dbsession.add(requestInfo)
            else:
                requestQueue.put([requestInfo.status,requestInfo])
    else:
        # print(requestInfo.status, requestInfo.runCnt)
        if requestInfo and requestInfo.status == 0:
            dbsession.add(requestInfo)
        if requestInfo and requestInfo.status < 0 and (requestInfo.runCnt + requestInfo.status) > 0:
            dbsession.add(requestInfo)
        else:
            requestQueue.put([requestInfo.status, requestInfo])
            logger.debug('requestQueue size {}'.format(requestQueue.qsize()))

    try:
        dbsession.commit()
    except Exception as e:
        dbsession.rollback()
        logger.exception(sys.exc_info())
コード例 #3
0
ファイル: weibo_scrapy.py プロジェクト: viviluhui/splider
def init_scrapy_work():
    init_session_pool()

    result = dbsession.query(RequestInfo).first()
    if result is None:
        url = 'https://d.weibo.com/1087030002_2986_top'
        requestInfo = request_variable_init(url)
        requestInfo.requestName = 'weibo_http_get_navigation_page_list'
        requestQueue.put(
            (PRIORITYDEFINE[requestInfo.requestName], requestInfo, None))
    else:
        logger.debug('scrapy reboot from db')
        infos = dbsession.query(RequestInfo).filter_by(status='999').all()
        for item in infos:
            logger.debug('scrapy reboot from db {}'.format(item))
            requestQueue.put((1, item, None))
コード例 #4
0
ファイル: music.py プロジェクト: viviluhui/splider
def test_scrapy_work():
    artistDict = get_artist_list('http://music.taihe.com/artist', r'D:\project\python\pylib\artistjson.txt')

    # print(artistDict)
    # for k,v in artistDict.items():
    #     url = 'http://music.taihe.com/artist/{}'.format(k)
    #     requestInfo = request_variable_init(url)
    #     requestInfo.requestName = 'get_artist_music_list'
    #     print(url,v)
    #     requestQueue.put((PRIORITYDEFINE[requestInfo.requestName],requestInfo))

    url = 'http://music.taihe.com/artist/{}'.format('2517')
    requestInfo = request_variable_init(url)
    requestInfo.requestName = 'get_artist_music_list'
    requestQueue.put((PRIORITYDEFINE[requestInfo.requestName], requestInfo, None))

    scrapy_work()
コード例 #5
0
ファイル: music.py プロジェクト: viviluhui/splider
def init_work():
    result = dbsession.query(RequestInfo).first()
    if result is None:
        artistDict = get_artist_list('http://music.taihe.com/artist', r'D:\project\python\pylib\artistjson.txt')

        for k, v in artistDict.items():
            url = 'http://music.taihe.com/artist/{}'.format(k)
            requestInfo = request_variable_init(url)
            requestInfo.requestName = 'get_artist_music_list'
            requestQueue.put((PRIORITYDEFINE[requestInfo.requestName], requestInfo, None))

        print(requestQueue.qsize())
    else:
        logger.debug('scrapy reboot from db')
        infos = dbsession.query(RequestInfo).filter_by( status = '999').all()
        for item in infos:
            logger.debug('scrapy reboot from db {}'.format(item))
            requestQueue.put((1, item, None))
コード例 #6
0
ファイル: weibo_scrapy.py プロジェクト: viviluhui/splider
def request_workflow_thread():
    while True:
        try:
            session = sessionQueue.get(block=True, timeout=10)
            sessionQueue.task_done()
            sessionQueue.put(session)

            priority, requestInfo, param = requestQueue.get(block=True,
                                                            timeout=10)
            requestQueue.task_done()
            logger.debug('PriorityQueue size {}'.format(requestQueue.qsize()))
        except Exception as e:
            logger.exception(sys.exc_info())
            logger.error('request_workflow_thread queue empty')
            break

        time.sleep(20)
        if requestInfo and requestInfo.requestName:
            logger.debug('run {} params {}'.format(requestInfo.requestName,
                                                   requestInfo.requestUrl))
            results = eval(requestInfo.requestName)(requestInfo.requestUrl,
                                                    session)

            # 判断函数是否是生成器
            if isgeneratorfunction(eval(requestInfo.requestName)):
                logger.debug('isgeneratorfunction {} true'.format(
                    requestInfo.requestName))
                for resultInfo, result in results:
                    if resultInfo.status == 999:
                        logger.debug('PriorityQueue put {},{} '.format(
                            resultInfo.requestName, resultInfo.requestUrl))
                        requestQueue.put(
                            (PRIORITYDEFINE[resultInfo.requestName],
                             resultInfo, result))
                    else:
                        if isinstance(result, list):
                            for item in result:
                                if isinstance(item, WeiboUser):
                                    dbsession.add(item)
                        elif isinstance(result, WeiboUser):
                            dbsession.add(result)
                    request_info_update_insert(requestInfo)
                    try:
                        dbsession.commit()
                    except Exception as e:
                        logger.exception(sys.exc_info())
                        logger.error('dbsession error')
                        dbsession.rollback()
            else:
                logger.debug('isgeneratorfunction {} false'.format(
                    requestInfo.requestName))
                resultInfo, result = results

                if isinstance(result, list):
                    for item in result:
                        if isinstance(item, WeiboUser):
                            dbsession.add(item)
                elif isinstance(result, WeiboUser):
                    dbsession.add(result)

                request_info_update_insert(requestInfo)
                if resultInfo.status != 999:
                    logger.debug('dbsession add resultInfo {}'.format(
                        resultInfo.urlId))
                else:
                    logger.debug('PriorityQueue put {},{} '.format(
                        resultInfo.requestName, resultInfo.requestUrl))
                    requestQueue.put((PRIORITYDEFINE[resultInfo.requestName],
                                      resultInfo, result))

                try:
                    dbsession.commit()
                except Exception as e:
                    logger.exception(sys.exc_info())
                    logger.error('dbsession error')
                    dbsession.rollback()

        else:
            logger.error('request_workflow_thread requestInfo none')
            break