Exemple #1
0
def getLnks(dv, nReq, nRst=None):
    '''
    一个生成器,从search result页获取paper链接
    
    Parameters
    ----------
    dv : 当前handle是search result页的webdriver
    nReq : 采集paper数量的需求
    nRst : 检索结果的数量,靠getNumOfRst获取
    
    Returns
    -------
    getLnks : 一个生成器,生成获取的paper链接
    '''

    if nRst is None: nRst = getNumOfRst(dv)

    ###paper链接xpath
    lnkPath = '//a[@class="smallV110 snowplow-full-record"]'

    ###等待页面打开
    waitTillOpen(dv, value=lnkPath)

    lnk = dv.find_element_by_xpath(lnkPath)

    pat = __getPattern(lnk.get_attribute('href'))

    for doc in range(1, 1 + min(nReq, nRst, MAX_DOC)):
        yield __getLnk(doc, pat)
Exemple #2
0
def sortResults(dv, sid, qid, sortReq=''):
    '''
    按需求对检索结果排序
    
    Parameters
    ----------
    dv : 当前handle是search result页的webdriver
    sid : sid
    qid : qid
    sortReq : 排序需求,默认或无效需求视为“日期降序”
    '''
    ###等待页面打开
    waitTillOpen(dv)

    if type(sortReq) is not str: sortReq = ''
    sortId = str2SortId.get(sortReq.upper(), SortId.PYD)
    dv.execute_script(__getSortJs(sid, qid, sortId))
Exemple #3
0
def getNumOfRst(dv):
    '''
    获取检索结果的数量
    
    Parameters
    ----------
    dv : 当前handle是search result页的webdriver
    
    Returns
    -------
    getNumOfRst : 检索结果的数量
    '''

    nRstPath = '//h3[@class="title4"]/*'

    ###等待页面打开
    waitTillOpen(dv, value=nRstPath)

    return int(dv.find_element_by_xpath(nRstPath).text.replace(',', ''))
Exemple #4
0
def getIds(dv):
    '''
    获取sid和qid
    sid决定用户session,qid决定检索编号,二者共同决定检索结果
    
    Parameters
    ----------
    dv : 当前handle是search result页的webdriver
    
    Returns
    -------
    getIds : 返回id对(sid, qid)
    '''
    ###等待页面打开
    waitTillOpen(dv)

    sid = dv.execute_script('return SID')
    qid = dv.execute_script('return qid.value')
    return sid, qid
Exemple #5
0
    def __getSynonyms(this, s: str, mode):
        '''
        使用有道翻译获得同义词集
        '''
        dv = this.__dv
        #dv.refresh()
        pathInput = '//textarea[@class="input__original__area"]'
        pathAnswer = '//div[contains(@class,"input__target__text")]/p/span'
        pathSuggestWait = '//*[@class="suggest__title"]/../../*[contains(@style,"block")]'
        pathSuggest = '//*[@class="suggest__title"]/../ul/*'
        pathRelative = '//div[@class="dict__relative"]/*'
        pathTrans = '//a[@id="transMachine"]'

        ###翻译语言
        if mode == 'ch2en': this.__ch2en()
        else: this.__auto()

        ###输入
        waitTillOpen(dv, 10, value=pathInput)
        ipts = dv.find_elements_by_xpath(pathInput)

        this.__iknow()

        ipt = ipts[0]
        ipt.clear()

        for i in range(10):
            ans = dv.find_elements_by_xpath(pathAnswer)
            if not ans: break
            dv.find_element_by_xpath(pathTrans).click()
            sleep(0.1)
        else:
            assert 0, 'translate area not cleared'

        ipt.send_keys(s)

        rst = set()

        ###翻译结果
        waitTillOpen(dv, value=pathAnswer)
        ans = dv.find_elements_by_xpath(pathAnswer)
        rst |= {x.text for x in ans}
        if ans:
            ###翻译改进结果
            ans[0].click()

            try:
                waitTillOpen(dv, 10, value=pathSuggestWait)
                sug = dv.find_elements_by_xpath(pathSuggest)
                rst |= {x.text for x in sug}
            except TimeoutException:
                pass

        ###翻译相关结果
        relative = dv.find_elements_by_xpath(pathRelative)
        rst |= {x.text for x in relative}
        this.__auto()
        ipt.clear()
        dv.find_element_by_xpath(pathTrans).click()
        return rst
Exemple #6
0
 def __waitTillOpen(this):
     '''
     打开paper页时等待页面加载
     如果长时间打不开则视为被反爬系统禁止访问
     
     Raise
     -----
     Exception : paper页打不开报错
     '''
     for i in range(6):
         try:
             waitTillOpen(this.driver, 10)
             if i:
                 print('INFO : open succeed, tried %dth' % (i + 1),
                       ' ' * 20)
             break
         except TimeoutException as e:
             print('ERROR : failed to open the page of paper, tried %dth' %
                   (i + 1),
                   end='\r')
     else:
         print()
         raise Exception('maybe banned by wos, please check')
Exemple #7
0
    def __clickIfNotActive(this, element, secWait, pathWait, reClick=0):
        '''
        点击元素,如果元素不处于活跃状态的话,并等待直到一个路径的元素出现
        
        Parameters
        ----------
        
        element : 点击的元素
        secWait : 一次点击的最长等待时间
        pathWait : 等待加载的判断路径
        reClick : 重复点击次数,防止论文引用量排序点击没有反应,默认为0

        '''
        dv = this.__dv
        #         dv.execute_script('arguments[0].scrollIntoView();', element)
        if 'active' not in element.get_attribute('class'):
            #             element.click()
            dv.execute_script('arguments[0].click();', element)
            sleep(waitUnit)
            waitTillOpen(dv, secWait, value=pathWait)
            for i in range(reClick):
                dv.execute_script('arguments[0].click();', element)
                sleep(waitUnit)
                waitTillOpen(dv, secWait, value=pathWait)
Exemple #8
0
    def __auto(this):
        pathLang = '//*[@class="select-text"]'
        pathAuto = '//*[@data-value="AUTO"]/../../*[contains(@style,"block")]/*[@data-value="AUTO"]/a'
        pathWait = '//*[@data-value="AUTO"]/../../*[contains(@style,"none")]/*[@data-value="AUTO"]'

        if this.__getMode() == 'AUTO': return
        dv = this.__dv
        waitTillOpen(dv, 10, value=pathLang)
        dv.find_element_by_xpath(pathLang).click()
        waitTillOpen(dv, 10, value=pathAuto)
        dv.find_element_by_xpath(pathAuto).click()
        waitTillOpen(dv, 10, value=pathWait)
Exemple #9
0
    def __ch2en(this):
        pathLang = '//*[@class="select-text"]'
        pathCh2En = '//*[@data-value="zh-CHS2en"]/../../*[contains(@style,"block")]/*[@data-value="zh-CHS2en"]/a'
        pathWait = '//*[@data-value="zh-CHS2en"]/../../*[contains(@style,"none")]/*[@data-value="zh-CHS2en"]'

        if this.__getMode() == 'zh-CHS2en': return
        dv = this.__dv
        waitTillOpen(dv, 10, value=pathLang)
        dv.find_element_by_xpath(pathLang).click()
        waitTillOpen(dv, 10, value=pathCh2En)
        dv.find_element_by_xpath(pathCh2En).click()
        waitTillOpen(dv, 10, value=pathWait)