Exemple #1
0
def book_from_xml(xml):
    authors = re.findall('<author>(.+?)</author>', xml)
    title = re.search('<title>(.+?)</title>', xml)

    # title
    if title is not None:
        title = title.group(1)
    else:
        title = ""

    # year
    year = re.search('<year>(.+?)</year>', xml)
    if year is not None:
        year = year.group(1)
    else:
        year = ""

    # link
    link = re.search('<ee>(.+?)</ee>', xml)
    if link is not None:
        link = link.group(1)
    else:
        link = ""

    book = Publication(title, authors, year, link)

    # contributor. It can be not existing
    publisher = re.search('<journal>(.+?)</journal', xml)
    if publisher is not None:
        publisher = publisher.group(1)
        book.add_contributor(publisher)

    book.print_info()
Exemple #2
0
    def get_book_from_block(self, block):
        title = re.search('<h3>.*?<a.*?>(.*?)</a>', block)
        if title is not None:
            title = title.group(1)
        else:
            title = ""

        authors = re.findall('class="author-name-tooltip".*?>(.*?)</a>', block)
        for i, auth in enumerate(authors):
            authors[i] = re.sub('<.*?>', '', auth)

        year = re.search('class="conference">.*?<span.*? (\d{4}).*?</span>', block)
        if year is not None:
            year = year.group(1)
        else:
            year = ""

        link = re.search('<h3>.*?<a.*?href="(.*?)">', block)
        if link is not None:
            link = self.base_href + link.group(1)
        else:
            link = ""


        book = Publication(title, authors, year, link)

        desc = re.search('class="abstract">.*?<span.*?>(.*?)</span>.*?class="conference">.*?<span', block)
        if desc is not None:
            desc = desc.group(1)
            book.add_description(desc)

        publisher = re.search('class="conference">.*?</span><a.*?>(.*?)</a>', block)
        if publisher is not None:
            publisher = publisher.group(1)
            book.add_publisher(publisher)

        book.print_info()