Example #1
0
def add_doi(xml_node, collection_id, volume_id, force=False):
    if 'id' in xml_node.attrib:
        # normal paper
        paper_id = int(xml_node.attrib['id'])
    else:
        # frontmatter
        paper_id = 0

    anth_id = build_anthology_id(collection_id, volume_id, paper_id)
    new_doi_text = f'{data.DOI_PREFIX}{anth_id}'
    doi_url = f'{data.DOI_URL_PREFIX}{data.DOI_PREFIX}{anth_id}'
    if not test_url(doi_url):
        print(f"-> [{anth_id}] Skipping since DOI {doi_url} doesn't exist")
        return False

    doi = xml_node.find('doi')
    if doi is not None:
        print(
            f'-> [{anth_id}] Cowardly refusing to overwrite existing DOI {doi.text} (use --force)',
            file=sys.stderr)
        return False

    else:
        doi = make_simple_element('doi', text=new_doi_text)
        print(f'Adding DOI {new_doi_text}', file=sys.stderr)
        xml_node.append(doi)
        return True
Example #2
0

def get_anth_url(volume_id, paper_id=None, width=4):
    return "https://www.aclweb.org/anthology/{volume_id}-{paper_id:0{width}d}".format(
        volume_id=volume_id, paper_id=paper_id, width=width
    )


if __name__ == "__main__":
    filename = sys.argv[1]
    outfilename = sys.argv[2]
    tree = etree.parse(filename)
    volume = tree.getroot()
    for paper in volume.findall("paper"):
        if "href" in paper.attrib:
            if not test_url(paper.attrib["href"]):
                sys.stderr.write(
                    "{}:{} removing href attribute: {}\n".format(
                        filename, paper.sourceline, paper.attrib["href"]
                    )
                )
                del paper.attrib["href"]

        href = paper.find("href")
        if href is not None:
            assert len(href) == 0
            if not test_url(href.text):
                sys.stderr.write(
                    "{}:{} removing href element: {}\n".format(
                        filename, href.sourceline, href.text
                    )