Example #1
0
 def test_filepath(self):
     r = Cache()
     base = 'http://www.abc.org/'
     in1 = base + 'xyz'
     out = r.filepath(in1)
     # ./xyz
     assert out.endswith('xyz'), out
Example #2
0
 def test_filepath(self):
     r = Cache()
     base = 'http://www.abc.org/'
     in1 = base + 'xyz'
     out = r.filepath(in1)
     # ./xyz
     assert out.endswith('xyz'), out
Example #3
0
    def test_basename(self):
        base = 'http://www.abc.org/'
        in1 = base + 'xyz'
        out = Cache.basename(in1)
        assert out == 'xyz'

        in2 = base + 'xyz/abc.txt'
        out = Cache.basename(in2)
        assert out == 'abc.txt'

        in3 = base + 'membersDo?body=ABC'
        out = Cache.basename(in3)
        assert out == 'membersDo?body=ABC', out

        in3 = base + 'membersDo?body=data/ABC'
        out = Cache.basename(in3)
        assert out == 'membersDo?body=data%47ABC', out
Example #4
0
    def test_basename(self):
        base = 'http://www.abc.org/'
        in1 = base + 'xyz'
        out = Cache.basename(in1)
        assert out == 'xyz'

        in2 = base + 'xyz/abc.txt'
        out = Cache.basename(in2)
        assert out == 'abc.txt'

        in3 = base + 'membersDo?body=ABC'
        out = Cache.basename(in3)
        assert out == 'membersDo?body=ABC', out

        in3 = base + 'membersDo?body=data/ABC'
        out = Cache.basename(in3)
        assert out == 'membersDo?body=data%47ABC', out
Example #5
0

TODO: do other languages (French, Spanish, Russian)
TODO: footnotes about CS and IS ...
'''
import csv
import os
import zipfile
import commands
from StringIO import StringIO

from swiss.cache import Cache

# cache_path = os.path.join(os.path.dirname(__file__), './cache')
cache_path = './cache'
cache = Cache(cache_path)

access_db_zip_url = 'http://unstats.un.org/unsd/cr/registry/regdntransfer.asp?f=186'
details_table_name = 'tblTitles_English_COFOG'
db_filename = 'COFOG_english.mdb'
db_filepath = cache.cache_path(db_filename)

def retrieve():
    '''Retrieve remove files into local cache.
    '''
    fp = cache.retrieve(access_db_zip_url)
    zipfo = zipfile.ZipFile(fp)
    # extract is in 2.6
    # zipfo.extract('COFOG_english.mdb', cache.path)
    out = zipfo.read(db_filename)
    open(db_filepath, 'w').write(out)
Example #6
0
 def test_cache(self):
     cache = os.path.join(self.tmp, 'cache')
     r = Cache(cache)
     r.retrieve(self.url)
     assert os.path.exists(os.path.join(cache, 'abc.txt'))
Example #7
0
 def test_dl(self):
     dest = os.path.join(self.tmp, 'out.txt')
     Cache.dl(self.url, dest)
     assert os.path.exists(dest)
     assert open(dest).read() == 'abc'
Example #8
0
import os
import re

import BeautifulSoup as bs
import json

from swiss.cache import Cache

cache = os.path.join(os.path.dirname(__file__), 'cache')
DATAPATH = os.path.join(os.path.dirname(__file__), 'data')
europarl_url = 'http://www.europarl.europa.eu'
juri_url = 'http://www.europarl.europa.eu/activities/committees/membersCom.do?body=JURI'
itre_url = 'http://www.europarl.europa.eu/activities/committees/membersCom.do?body=ITRE'
member_base_url = 'http://www.europarl.europa.eu/members/expert/committees/view.do'

retriever = Cache(cache)
infopath = os.path.join(cache, 'info.js')

# from http://effbot.org/zone/re-sub.htm#unescape-html
import re, htmlentitydefs


def unescape(text):
    def fixup(m):
        text = m.group(0)
        if text[:2] == "&#":
            # character reference
            try:
                if text[:3] == "&#x":
                    return unichr(int(text[3:-1], 16))
                else:
Example #9
0
'''Extract COFOG codes into usable (csv) form.


TODO: do other languages (French, Spanish, Russian)
TODO: footnotes about CS and IS ...
'''
import csv
import os
import zipfile
import commands
from StringIO import StringIO

from swiss.cache import Cache

cache_path = os.path.join(os.path.dirname(__file__), 'cache')
cache = Cache(cache_path)

access_db_zip_url = 'http://unstats.un.org/unsd/cr/registry/regdntransfer.asp?f=186'
details_table_name = 'tblTitles_English_COFOG'
db_filename = 'COFOG_english.mdb'
db_filepath = cache.cache_path(db_filename)


def retrieve():
    '''Retrieve remove files into local cache.
    '''
    fp = cache.retrieve(access_db_zip_url)
    zipfo = zipfile.ZipFile(fp)
    # extract is in 2.6
    # zipfo.extract('COFOG_english.mdb', cache.path)
    out = zipfo.read(db_filename)
Example #10
0
 def test_cache(self):
     cache = os.path.join(self.tmp, 'cache')
     r = Cache(cache)
     r.retrieve(self.url)
     assert os.path.exists(os.path.join(cache, 'abc.txt'))
Example #11
0
 def test_dl(self):
     dest = os.path.join(self.tmp, 'out.txt')
     Cache.dl(self.url, dest)
     assert os.path.exists(dest)
     assert open(dest).read() == 'abc'