Ejemplo n.º 1
0
class DOI(DOIMetadata):
    """find and download a pdf for the doi given
    >>> DOI('10.1016/0166-218X(92)00170-Q')
    """
    headers = generic_headers
    headers['Host'] = 'gen.lib.rus.ec'
    headers['Referer'] = 'http://gen.lib.rus.ec/scimag/'
    
    url = 'http://gen.lib.rus.ec/scimag/?s={}&journalid=&v=&i=&p=&redirect=1'
    
    def __init__(self, doi, *args, **kwargs):
        super().__init__(doi, *args, **kwargs)
        self.url = URIRef(self.url.format(self.doi))
        
        r = requests.get(self.url, headers=self.headers)
        r.raise_for_status()
        
        self.links = re.compile(r'a href="([^"]+pdf)"').findall(r.text)
        link, *links = self.links
        r = requests.get(link, stream=True)
        
        self.filename = Literal('.'.join((doi.replace('/','_'), 'pdf')))
        with open(self.filename, 'wb') as fd:
            for chunk in r.iter_content(1024*10):
                fd.write(chunk)
        
        self.path = URIRef(urljoin('file:', pjoin(os.getcwd(), self.filename)))
        self.graph.add((self.path, URIRef('http://purl.org/dc/terms/identifier'), self.doi))
        self.graph.commit()