def parse(refs=None, force=False, limit=None): if refs is None: q = Session.query(ObjetParlementaire) #.options(defer('description', 'beschreibung')) if not force: q = q.filter(ObjetParlementaire.etat==None) if limit: q = q.limit(limit) refs = [o.ref for o in q] dicts = map_async(parse_objet, refs) for ref, vals in zip(refs, dicts): o = ObjetParlementaire.get_or_create(ref) for k,v in vals.iteritems(): setattr(o, k, v) Session.commit() logger.info('Updated %i `ObjetParlementaire`s' % len(refs))
def get_doc(ref, lang): with parlament_ctx(ObjetParlementaire.url(ref, lang)) as s: page = etree.parse(s, parser=etree.HTMLParser(recover=True)) return x_doc(page, ref=ref)[0]