Ejemplo n.º 1
0
def open_review_bib_iclr_2013_2017(conf, year, localfile):
    # need to download manually
    first_reject_13 = 'Heteroscedastic Conditional Ordinal Random'
    first_reject_17 = 'Energy-Based Spherical Sparse Coding'
    first_workshop_13 = 'Why Size Matters: Feature Coding as Nystrom Sampling'
    first_workshop_17 = 'Learning Continuous Semantic Representations'
    assert (year in [2013, 2017])
    if year == 2013:
        first_reject = first_reject_13
        first_workshop = first_workshop_13
    else:
        first_reject = first_reject_17
        first_workshop = first_workshop_17

    soup = BeautifulSoup(open(localfile), 'lxml')
    res = ''
    for paper in soup.select('#notes .note.panel'):
        title = paper.select('h2')[0].get_text().strip()
        pdflink = paper.select('a.note_content_pdf')[0]['href']
        if first_reject in title:
            break
        if first_workshop in title:
            conf = 'ICLRWorkshop'
        if not pdflink.startswith('http'):
            pdflink = 'https://openreview.net/' + pdflink
        authors_str = paper.select('.meta_row')[0].get_text()
        authors = authors_str2lst(authors_str)
        id = gen_id(year, conf, authors, title)
        bib = gen_single_bib(id, title, ' and '.join(authors), pdflink, year,
                             conf)
        res += bib
    return res
Ejemplo n.º 2
0
def iclr_bib_2015_2016(year):
    assert (year == 2015 or year == 2016)
    # 15 first workshop:
    first_workshop_15 = 'Learning Non-deterministic Representations with Energy-based Ensembles'
    link = ('https://iclr.cc/archive/www/doku.php%3Fid=iclr' + str(year) +
            ':accepted-main.html')
    conf = 'ICLR'
    html_file = download_to_hash(link)
    soup = BeautifulSoup(open(html_file), 'lxml')
    res = ''
    for div in soup.select('li.level1 div'):
        title = div.a.get_text()
        pdflink = div.a['href']
        div.a.decompose()
        authors_str = div.get_text()
        authors = authors_str2lst(authors_str)
        if authors:
            # change title to workshop
            if year == 2015 and first_workshop_15 in title:
                conf = 'ICLRWorkshop'
            id = gen_id(year, conf, authors, title)
            bib = gen_single_bib(id, title, ' and '.join(authors), pdflink,
                                 year, conf)
            res += bib
    return res
Ejemplo n.º 3
0
def iclr_bib_2014():
    link = 'https://iclr.cc/archive/2014/conference-proceedings/'
    year = 2014
    conf = 'ICLR'
    html_file = download_to_hash(link)
    soup = BeautifulSoup(open(html_file), 'lxml')
    # all blocks
    ps = soup.select('#sites-canvas-main-content p')
    # remove the empty blocks
    ps = [p for p in ps if p.get_text().strip()]
    # remove the conference title: Listed below are the conference papers ..
    ps = ps[1:]
    # titles
    ptitles = [p for p in ps if p.find('a')]
    # authors
    pauthors = [p for p in ps if not p.find('a')]
    [p.get_text().strip() for p in pauthors]
    [p.get_text().strip() for p in ptitles]
    res = ''
    for ptitle, pauthor in zip(ptitles, pauthors):
        title = ptitle.get_text().strip()
        authors_str = pauthor.get_text().strip()
        authors = authors_str2lst(authors_str)
        # actually arxiv
        pdflink = ptitle.find('a')['href']
        id = gen_id(year, conf, authors, title)
        bib = gen_single_bib(id, title, ' and '.join(authors), pdflink, year,
                             conf)
        res += bib
    return res
Ejemplo n.º 4
0
def open_review_bib_iclr_2018_2019(year, localfile):
    # this is different from others ..
    # year = 2018
    soup = BeautifulSoup(open(localfile), 'lxml')
    res = ''
    sections = [
        '#accepted-oral-papers li.note', '#accepted-poster-papers li.note',
        '#workshop-papers li.note'
    ]
    confs = ['ICLR', 'ICLR', 'ICLRWorkshop']
    for section, conf in zip(sections, confs):
        for paper in soup.select(section):
            title = paper.h4.a.get_text().strip()
            # there is one error in 2018
            if title == 'No Title': continue
            pdflink = paper.select('.pdf-link')[0]['href']
            if not pdflink.startswith('http'):
                pdflink = 'https://openreview.net/' + pdflink
            authors_str = paper.select('.note-authors')[0].get_text()
            authors = authors_str2lst(authors_str)
            id = gen_id(year, conf, authors, title)
            bib = gen_single_bib(id, title, ' and '.join(authors), pdflink,
                                 year, conf)
            res += bib
    return res
Ejemplo n.º 5
0
def acl_conference_bib(year, conf, link):
    # link = 'https://aclanthology.info/events/acl-2018'
    # year = 2018
    # conf = 'ACL'
    html_file = download_to_hash(link)
    soup = BeautifulSoup(open(html_file), 'lxml')
    res = ''
    # soup.select('#content p')[3].select('a[href^=/people]')

    # len(soup.select('#content p'))
    for p in soup.select('#content p'):
        strong = p.strong
        title = strong.a.get_text()
        authors = [a.get_text() for a in p.select('a[href^=/people]')]
        if authors:
            pdflink = p.a['href']
            id = gen_id(year, conf, authors, title)
            bib = gen_single_bib(id, title, ' and '.join(authors), pdflink,
                                 year, conf)
            res += bib
    return res
Ejemplo n.º 6
0
def nips_bib(year):
    ID = year - 1988 + 1
    conf = 'NIPS'
    link = 'https://papers.nips.cc/book/advances-in-neural-information-processing-systems-' + str(
        ID) + '-' + str(year)
    html_file = download_to_hash(link)
    NIPS_pdf_prefix = 'https://papers.nips.cc/'
    res = '\n'
    with open(html_file) as f:
        soup = BeautifulSoup(f, 'html.parser')
        for li in soup.select("div.main")[0].ul.find_all('li'):
            href = li.a['href']
            title = li.a.string
            authors = list(
                map(lambda author: author.string, li.select('.author')))
            pdflink = NIPS_pdf_prefix + href + '.pdf'
            if title and authors:
                id = gen_id(year, conf, authors, title)
                bib = gen_single_bib(id, title, ' and '.join(authors), pdflink,
                                     year, conf)
                res += bib
    return res
Ejemplo n.º 7
0
def ieee_conference_bib(year, conf, link):
    html_file = download_to_hash(link)
    soup = BeautifulSoup(open(html_file), 'lxml')
    res = ''
    for div in soup.select('.txt'):
        if div.h3.a and div.select('.authors a'):
            for f in div.select('formula'):
                f.decompose()
            title = div.h3.get_text()
            # "/document/8461262/"
            arnumber = re.findall(r'/(\d+)/', div.h3.a['href'])[0]
            pdflink = 'https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=' + arnumber
            authors = [
                author.get_text().strip()
                for author in div.select('.authors a')
            ]
            # print(authors)
            id = gen_id(year, conf, authors, title)
            bib = gen_single_bib(id, title, ' and '.join(authors), pdflink,
                                 year, conf)
            res += bib
    return res
Ejemplo n.º 8
0
def springer_bib(year, conf, link):
    """Return bib for this page only.
    """
    html_file = download_to_hash(link)
    soup = BeautifulSoup(open(html_file), 'lxml')
    res = ''
    for paper in soup.select('.chapter-item'):
        meta = paper.select('.content-type-list__meta')[0]
        title = meta.select('div')[0].get_text()
        authors_str = meta.select('div')[1].get_text()
        authors = authors_str2lst(authors_str)
        pdflink_a = paper.select('a.test-book-toc-download-link')
        pdflink = ''
        # some conference may not have a pdflink, e.g.
        # https://link.springer.com//book/10.1007/BFb0015518
        if pdflink_a:
            pdflink = pdflink_a[0]['href']
        if not pdflink.startswith('http'):
            pdflink = 'https://link.springer.com/' + pdflink
        id = gen_id(year, conf, authors, title)
        bib = gen_single_bib(id, title, ' and '.join(authors), pdflink, year, conf)
        res += bib
    return res