class DOI(DOIMetadata): """find and download a pdf for the doi given >>> DOI('10.1016/0166-218X(92)00170-Q') """ headers = generic_headers headers['Host'] = 'gen.lib.rus.ec' headers['Referer'] = 'http://gen.lib.rus.ec/scimag/' url = 'http://gen.lib.rus.ec/scimag/?s={}&journalid=&v=&i=&p=&redirect=1' def __init__(self, doi, *args, **kwargs): super().__init__(doi, *args, **kwargs) self.url = URIRef(self.url.format(self.doi)) r = requests.get(self.url, headers=self.headers) r.raise_for_status() self.links = re.compile(r'a href="([^"]+pdf)"').findall(r.text) link, *links = self.links r = requests.get(link, stream=True) self.filename = Literal('.'.join((doi.replace('/','_'), 'pdf'))) with open(self.filename, 'wb') as fd: for chunk in r.iter_content(1024*10): fd.write(chunk) self.path = URIRef(urljoin('file:', pjoin(os.getcwd(), self.filename))) self.graph.add((self.path, URIRef('http://purl.org/dc/terms/identifier'), self.doi)) self.graph.commit()