def add_doi(xml_node, collection_id, volume_id, force=False): if 'id' in xml_node.attrib: # normal paper paper_id = int(xml_node.attrib['id']) else: # frontmatter paper_id = 0 anth_id = build_anthology_id(collection_id, volume_id, paper_id) new_doi_text = f'{data.DOI_PREFIX}{anth_id}' doi_url = f'{data.DOI_URL_PREFIX}{data.DOI_PREFIX}{anth_id}' if not test_url(doi_url): print(f"-> [{anth_id}] Skipping since DOI {doi_url} doesn't exist") return False doi = xml_node.find('doi') if doi is not None: print( f'-> [{anth_id}] Cowardly refusing to overwrite existing DOI {doi.text} (use --force)', file=sys.stderr) return False else: doi = make_simple_element('doi', text=new_doi_text) print(f'Adding DOI {new_doi_text}', file=sys.stderr) xml_node.append(doi) return True
def get_anth_url(volume_id, paper_id=None, width=4): return "https://www.aclweb.org/anthology/{volume_id}-{paper_id:0{width}d}".format( volume_id=volume_id, paper_id=paper_id, width=width ) if __name__ == "__main__": filename = sys.argv[1] outfilename = sys.argv[2] tree = etree.parse(filename) volume = tree.getroot() for paper in volume.findall("paper"): if "href" in paper.attrib: if not test_url(paper.attrib["href"]): sys.stderr.write( "{}:{} removing href attribute: {}\n".format( filename, paper.sourceline, paper.attrib["href"] ) ) del paper.attrib["href"] href = paper.find("href") if href is not None: assert len(href) == 0 if not test_url(href.text): sys.stderr.write( "{}:{} removing href element: {}\n".format( filename, href.sourceline, href.text )