def download(): res = requests.get(BASE_URL) doc = html.fromstring(res.content) for a in doc.findall('.//div[@class="centreBodyContent"]//a'): link = a.get('href') if not 'openXML' in link: continue fetch_data(link, 'experts/expert_groups.xml', max_age=84600*7)
def download(): res = requests.get(BASE_URL) doc = html.fromstring(res.content) for a in doc.findall('.//div[@class="centreBodyContent"]//a'): link = a.get('href') if not 'openXML' in link: continue fetch_data(link, 'experts/expert_groups.xml', max_age=84600 * 7)
def extract(): path = fetch_data(URL, 'interests/accredditation.xml', max_age=84600*7) with open(path, 'r') as fh: extract_data(fh)
def download(): for year in range(2007, datetime.now().year): log.info("Downloading FTS for %s", year) url = BASE_URL % year fetch_data(url, 'fts/export_%s.xml' % year)