Python Paper Examples

Programming Language: Python

Namespace/Package Name: data

Class/Type: Paper

Examples at hotexamples.com: 3

Python Paper - 3 examples found. These are the top rated real world Python examples of data.Paper extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

add_author(3)

Example #1

Show file

File: scrape.py Project: zhangchitc/pkudblp

    def __get_paper_from_acm (self, entry_url):
        resp_body = self.op.open (entry_url).read ()
        root = sp.fromstring (resp_body)

        divmain = root.xpath ("//div[@id='divmain']")[0]

        title = divmain.xpath ("div/h1/strong")[0].text
        
        # use regex to extract abstract link
        abst_url = re.compile (r"tab.abstract.cfm[^']*").search (resp_body).group (0)
        abst_url = 'http://dl.acm.org/' + abst_url
        abst_body = self.op.open (abst_url).read ()
        
        # extract all text node from this dom tree
        abst = ''.join (sp.fromstring (abst_body).xpath ('//div/p/div/p/descendant-or-self::*/text()'))
        
        # instantiate a Paper class
        paper = Paper (title, abst)

        # locate the author table block
        author_table = divmain.xpath ("table/tr/td/table")[1]
        
        # add each author
        for author_row in author_table.xpath ('tr'):
            name = author_row.xpath ('td/a/text()')[0]
            affn = author_row.xpath ('td/a/small/text()')[0]
            paper.add_author (Author (name, affn))

        return paper

Example #2

Show file

File: scrape3.py Project: zhangchitc/pkudblp

    def __get_paper_from_ms (self, entry_url):
        resp_body = self.__deljs_html (self.op.open (entry_url).read ())
        root = sp.fromstring (resp_body)
 
        title = root.xpath ("//span[@id='ctl00_MainContent_PaperItem_title']")[0].text
        #abst = root.xpath ("//span[@id='ctl00_MainContent_PaperItem_snippet']")[0].text

        # instantiate a Paper class
        paper = Paper (title)

        # locate the div block for the paper description
        paper_div = root.xpath ("//div[@id='ctl00_MainContent_PaperItem_divPaper']/div")[1]
       
        for author_url in paper_div.xpath ("a[@class='author-name-tooltip']/@href"):
            # print author_url
            paper.add_author (self.__get_author_from_ms (author_url))

        return paper

Example #3

Show file

File: scrape3.py Project: zhangchitc/pkudblp

    def __get_paper_from_acm (self, entry_url):
        resp_body = self.__deljs_html (self.op.open (self.__wrapper (entry_url)).read ())
        root = sp.fromstring (resp_body)

        divmain = root.xpath ("//div[@id='divmain']")[0]

        title = divmain.xpath ("div/h1/strong")[0].text
        
        # UPDATE: NO NEED FOR ABSTRACT
        # use regex to extract abstract link
        #abst_url = re.compile (r"tab.abstract.cfm[^']*").search (resp_body).group (0)
        #abst_url = 'http://dl.acm.org/' + abst_url
        #abst_body = self.op.open (abst_url).read ()
        
        # extract all text node from this dom tree
        #abst = ''.join (sp.fromstring (abst_body).xpath ('//div/p/div/p/descendant-or-self::*/text()'))
        
        # instantiate a Paper class
        paper = Paper (title)

        # locate the author table block
        author_table = divmain.xpath ("table/tr/td/table")[1]

        # add each author
        for author_row in author_table.xpath ('tr'):
            name = author_row.xpath ('td/a/text()')[0]
            
            # if the text is in tag <a>, then it has a link to this affiliation
            if len (author_row.xpath ('td/a/small/text()')) > 0:
                affn = author_row.xpath ('td/a/small/text()')[0]
            elif len (author_row.xpath ('td/small/text()')) > 0:
                affn = author_row.xpath ('td/small/text()')[0]
            else:
                affn = ""

            paper.add_author (Author (name, affn))

        return paper