コード例 #1
0
 def getBrief(this):
     '''
     抽取scholar的个人简介
     '''
     pathBasic = '//*[contains(@class,"ant-tabs-nav-list")]/*[1]'
     path = '//*[contains(@class,"active")]//*[contains(@class,"bio")]'
     basic = this.__dv.find_elements_by_xpath(pathBasic)
     if not basic: return None
     basic = basic[0]
     this.__clickIfNotActive(basic, waitLoading, path)
     return getTextByXpath(this.__dv, path)
コード例 #2
0
 def getExperience(this):
     '''
     抽取scholar的工作经历
     '''
     pathBasic = '//*[contains(@class,"ant-tabs-nav-list")]/*[1]'
     path = '//*[contains(@class,"active")]//*[@class="aff_inst"]/div'
     basic = this.__dv.find_elements_by_xpath(pathBasic)
     if not basic: return None
     basic = basic[0]
     this.__clickIfNotActive(basic, waitLoading, path)
     return getTextByXpath(this.__dv, path)
コード例 #3
0
    def getPapers(this):
        '''
        抽取scholar的论文(按引用量降序的一页,以便去重)
        '''

        ###“学术成果”标签,可点击
        pathAchievements = '//*[contains(@class,"ant-tabs-nav-list")]/*[2]'

        ###发表论文or科研项目
        pathPP = '//*[contains(@class,"ant-tabs-nav-list")]/*[2]/self::*[contains(@class,"active")]/../../../..//span[@class="title"]'
        ###“按引用量排序”标签,可点击
        pathRefSort = '//*[contains(@class,"pubs_sort_line")]/div/*[2]'

        ###论文标签
        pathPaper = '//*[contains(@class,"pubs_sort_line")]/div'\
                    '/*[2]/self::*[contains(@class,"active")]/../../../../..'\
                    '//*[@class="content"]'
        ###论文id相对路径
        pathId = '..'
        ###论文题目相对路径
        pathTitle = './/*[contains(@class,"title")]/span'
        ###论文作者相对路径
        pathAuthor = './/*[contains(@class,"authors")]'
        ###论文期刊相对路径
        pathVenue = './/*[contains(@class,"venue-line")]'
        ###论文引用量相对路径
        pathCited = './/*[@class="cited"]/strong'
        ###加载等待中
        pathLoading = '//*[contains(@class,"sk_chase")]'
        ###没有论文
        pathNodata = '//div[contains(@class,"profilePapers___1bMnJ")]//img[contains(@src,"noData")]'

        ###是否有论文
        pathHavePapers = '//div[contains(@class,"profilePapers___1bMnJ")]//*[@class="content"]'

        ###点击“学术成果”
        achievements = this.__dv.find_elements_by_xpath(pathAchievements)
        if not achievements: return []
        achievements = achievements[0]
        ###没有“学术成果”键
        if 'disabled' in achievements.get_attribute('class'):
            return []
        this.__clickIfNotActive(achievements, waitLoading, pathPP)

        ###点击“按引用量排序”
        if not this.__dv.find_elements_by_xpath(pathHavePapers): return []
        refSort = this.__dv.find_elements_by_xpath(pathRefSort)
        if not refSort: raise Exception("no sort by reference")
        refSort = refSort[0]
        this.__clickIfNotActive(refSort, waitLoading,
                                pathPaper + '|' + pathNodata, 1)

        if not waitTillLoaded(
                this.__dv, waitLoading, value=pathLoading, waitUnit=waitUnit):
            raise Exception("load failed")

        if this.__dv.find_elements_by_xpath(pathNodata): return []

        ###抽取结果
        papers = this.__dv.find_elements_by_xpath(pathPaper)

        return [{
            "id": stdAminerId(getAttributeByXpath(p, pathId, 'id')),
            "title": getTextByXpath(p, pathTitle),
            "authors": rmUnseen(getTextByXpath(p, pathAuthor)),
            "venue": getTextByXpath(p, pathVenue),
            "cited": getTextByXpath(p, pathCited)
        } for p in papers]
コード例 #4
0
 def getAddress(this):
     '''
     抽取scholar的住址
     '''
     path = '//*[contains(@class,"map-marker")]/../span'
     return getTextByXpath(this.__dv, path)
コード例 #5
0
 def getFax(this):
     '''
     抽取scholar的传真
     '''
     path = '//*[contains(@class,"fax")]/../span'
     return getTextByXpath(this.__dv, path)
コード例 #6
0
 def getEmail(this):
     '''
     抽取scholar的email
     '''
     path = '//*[contains(@class,"envelope")]/../span'
     return getTextByXpath(this.__dv, path)
コード例 #7
0
 def getPhone(this):
     '''
     抽取scholar的电话
     '''
     path = '//*[contains(@class,"phone")]/../span'
     return getTextByXpath(this.__dv, path)
コード例 #8
0
 def getDepartment(this):
     '''
     抽取scholar的机构
     '''
     path = '//*[contains(@class,"institution")]/../span'
     return getTextByXpath(this.__dv, path)
コード例 #9
0
 def getTitle(this):
     '''
     抽取scholar的职称
     '''
     path = '//*[contains(@class,"briefcase")]/../span'
     return getTextByXpath(this.__dv, path)