Ejemplo n.º 1
0
            if not exist:
                print u"worker[%d]结束" % pid
                break
            tableId, keyword = reuslt

            pfi.keyword = keyword
            pfi.tableId = tableId
            timeoutNum = OVER_TIME_PROCESS
            if pfi.needInitDriver:
                timeoutNum += OVER_TIME_INIT_DRIVER
            if not doByTimeout(getOnWtitch, pfi, timeoutNum):
                (STATUS_NON, tableId)
                print u"worker[%d]超时,进行重启" % pid
                pfi.needInitDriver = True
        except Exception, e:
            pfi.needInitDriver = True
            print 'process error', e, traceback.format_exc()


if __name__ == "__main__":
    __lock = threading.Lock()
    updateStatus(STATUS_SUCCESS, None)
    poolNum = PROCESS_NUM
    arg_list = []
    for i in range(poolNum):
        req = {}
        req["lock"] = __lock
        req["pid"] = i
        arg_list.append(req)
    doInThread(process, arg_list, poolNum=poolNum)
Ejemplo n.º 2
0
    recoreDate.append([href])
    driverContent.quit()


while True:
    retry(lambda: driver.find_elements_by_xpath("//p[@class='title']/a"))
    aEles = driver.find_elements_by_xpath("//p[@class='title']/a")
    hrefs = []
    for aEle in aEles:
        href = aEle.get_attribute("href")
        if href in matchRecordData:
            print "processd ", href
        else:
            hrefs.append(href)
    if 0 == len(hrefs):
        print "no new href"
    else:
        doInThread(processInNewDriver, hrefs, poolNum=4)
        write(recoreDate, FILENAME_RECORD)

    try:
        driver.find_element_by_xpath(
            "//a[@id='EntrezSystem2.PEntrez.PubMed.Pubmed_ResultsPanel.Entrez_Pager.Page' and @sid='3']"
        ).send_keys(Keys.ENTER)
        print "下一页"
        import time
        time.sleep(1)
    except Exception, e:
        print "已到尾页"
        break