コード例 #1
0
ファイル: download.py プロジェクト: np-nakayama/monnet
def extract_data(path):
    prefix = ensure_path("ted/xml/")
    tf = tarfile.open(path, "r:gz")
    log.info("Extracing %s", path)
    for member in tf.getmembers():
        mpath = os.path.join(prefix, member.name)
        if not os.path.exists(mpath):
            print "foo", member
            tf.extract(member, path=prefix)
コード例 #2
0
ファイル: iso_list.py プロジェクト: fyloso/monnet
def make_iso_list():
    with open(ensure_path('ted/iso_list.txt'), 'w') as fh:
        for year in range(2010, datetime.utcnow().year+1):
            log.info("Listing monthly TED ISOs for %s", year)
            res = requests.get(URL % year, auth=AUTH)
            doc = html.fromstring(res.content)
            urls = [a.get('href') for a in doc.findall('.//a') if 'monthly.iso' in a.get('href')]
            for url in urls:
                url = url.replace('//', '//' + AUTH[0] + ':' + AUTH[1] + '@')
                fh.write(url + '\r\n')
コード例 #3
0
ファイル: download.py プロジェクト: np-nakayama/monnet
def download_by_id(session, bulk_id):
    dest_path = ensure_path("ted/archives/%s.tgz" % bulk_id)
    log.info("Loading: %s" % dest_path)
    if os.path.exists(dest_path):
        log.info("Skip: exists")
        return dest_path
    url = "http://ted.europa.eu/TED/misc/bulkDownloadExport.do?dlTedExportojsId=%s"
    url = url % bulk_id
    data = {"action": "dlTedExport"}
    res = session.post(url, data=data, allow_redirects=True)
    if "html" in res.headers.get("content-type"):
        return False
    with open(dest_path, "wb") as fh:
        fh.write(res.content)
    log.info("Downloaded: %s" % dest_path)
    return dest_path