def download():
	res = requests.get(BASE_URL)
	doc = html.fromstring(res.content)
	for a in doc.findall('.//div[@class="centreBodyContent"]//a'):
		link = a.get('href')
		if not 'openXML' in link:
			continue

		fetch_data(link, 'experts/expert_groups.xml', max_age=84600*7)
Example #2
0
def download():
    res = requests.get(BASE_URL)
    doc = html.fromstring(res.content)
    for a in doc.findall('.//div[@class="centreBodyContent"]//a'):
        link = a.get('href')
        if not 'openXML' in link:
            continue

        fetch_data(link, 'experts/expert_groups.xml', max_age=84600 * 7)
Example #3
0
def extract():
    path = fetch_data(URL, 'interests/accredditation.xml', max_age=84600*7)
    with open(path, 'r') as fh:
        extract_data(fh)
Example #4
0
def download():
	for year in range(2007, datetime.now().year):
		log.info("Downloading FTS for %s", year)
		url = BASE_URL % year
		fetch_data(url, 'fts/export_%s.xml' % year)