Example #1
0
def load_ftp(url):
    """Download an FTP resource using curl."""
    cmd = ['curl', '-sm {}'.format(settings.timeout), url]
    rslt = subprocess.run(cmd, stdout=subprocess.PIPE)
    if rslt.returncode != 0:
        raise RuntimeError('CURL Error {}'.format(rslt.returncode))
    return redif.decode(rslt.stdout)
Example #2
0
def load(file, cat):
    """Load a series or an archive file."""
    rdf = redif.load(redif.decode(ftp_get(settings.repec_ftp + file)))

    def key(r):
        r = dict(r)
        url = r['url'].rstrip('/') + '/' if 'url' in r else None
        return (file, cat, r['handle'], url)

    return [key(r) for r in rdf]
Example #3
0
def load(url):
    """Download ReDIF papers."""
    scheme = urlparse(url)[0]
    if scheme == 'ftp':
        papers = redif.decode(fetch_curl(url))
    elif scheme in ['http', 'https']:
        content, encoding = fetch(url)
        papers = redif.decode(content, hint=[encoding])
    else:
        raise RuntimeError('Unknown scheme {}'.format(scheme))
    papers = redif.load(papers)
    for p in papers:
        fields = set(k for k, v in p)
        for f in ['handle', 'template-type']:
            if f not in fields:
                raise RuntimeError('{} is missing'.format(f))
    if len(papers) == 0:
        raise RuntimeError('Empty series')
    return papers
Example #4
0
def load_http(url):
    """Download an HTTP resourse."""
    try:
        headers = {'User-Agent': settings.user_agent}
        response = requests.get(url, timeout=settings.timeout, headers=headers)
    except requests.exceptions.ConnectionError as err:
        if type(err.args[0]) == urllib3.exceptions.MaxRetryError:
            err.args = ('Max retries exceeded', )
            raise
        else:
            raise
    if response.status_code != 200:
        raise RuntimeError('HTTP Error {}'.format(response.status_code))
    return redif.decode(response.content, hint=[response.encoding])
Example #5
0
def collect_names(files):
    """Download files and collect handle -> name associations."""
    handles = {}
    for i, file in enumerate(files):
        print(f'[{i+1}/{len(files)}] {file}...')
        try:
            rdf = redif.load(redif.decode(ftp_get(settings.repec_ftp + file)))
            for record in rdf:
                record = collect(record)
                if 'name' in record:
                    # Account for inconsistent capitalization across records
                    handle = record['handle'][0].lower()
                    newname = record['name'][0]
                    oldname = handles.setdefault(handle, newname)
                    if newname != oldname:
                        print(f'Conflicting names: "{oldname}" vs. "{newname}"'
                              f' in {handle}')
        except Exception:
            print(f'Skipping {file} due to errors')
    return handles