Example #1
0
 def test_filepath(self):
     r = Cache()
     base = 'http://www.abc.org/'
     in1 = base + 'xyz'
     out = r.filepath(in1)
     # ./xyz
     assert out.endswith('xyz'), out
Example #2
0
 def test_cache(self):
     cache = os.path.join(self.tmp, 'cache')
     r = Cache(cache)
     r.retrieve(self.url)
     assert os.path.exists(os.path.join(cache, 'abc.txt'))
Example #3
0
import os
import re

import BeautifulSoup as bs
import json

from swiss.cache import Cache

cache = os.path.join(os.path.dirname(__file__), 'cache')
DATAPATH = os.path.join(os.path.dirname(__file__), 'data')
europarl_url = 'http://www.europarl.europa.eu'
juri_url = 'http://www.europarl.europa.eu/activities/committees/membersCom.do?body=JURI'
itre_url = 'http://www.europarl.europa.eu/activities/committees/membersCom.do?body=ITRE'
member_base_url = 'http://www.europarl.europa.eu/members/expert/committees/view.do'

retriever = Cache(cache)
infopath = os.path.join(cache, 'info.js')

# from http://effbot.org/zone/re-sub.htm#unescape-html
import re, htmlentitydefs


def unescape(text):
    def fixup(m):
        text = m.group(0)
        if text[:2] == "&#":
            # character reference
            try:
                if text[:3] == "&#x":
                    return unichr(int(text[3:-1], 16))
                else:
Example #4
0
'''Extract COFOG codes into usable (csv) form.


TODO: do other languages (French, Spanish, Russian)
TODO: footnotes about CS and IS ...
'''
import csv
import os
import zipfile
import commands
from StringIO import StringIO

from swiss.cache import Cache

cache_path = os.path.join(os.path.dirname(__file__), 'cache')
cache = Cache(cache_path)

access_db_zip_url = 'http://unstats.un.org/unsd/cr/registry/regdntransfer.asp?f=186'
details_table_name = 'tblTitles_English_COFOG'
db_filename = 'COFOG_english.mdb'
db_filepath = cache.cache_path(db_filename)


def retrieve():
    '''Retrieve remove files into local cache.
    '''
    fp = cache.retrieve(access_db_zip_url)
    zipfo = zipfile.ZipFile(fp)
    # extract is in 2.6
    # zipfo.extract('COFOG_english.mdb', cache.path)
    out = zipfo.read(db_filename)