Beispiel #1
0
 def retrieve_many(legislature=max(LEGISLATURES), limit=300, force=False):
     dir_ = PDF.LOCAL_DIR % {'legislature':legislature}
     if not os.path.exists(dir_):
         os.makedirs(dir_)
     fnames_existing = os.listdir(dir_)
     if force or not fnames_existing:
         start = 0
     else:
         start = max(PDF.from_filename(f).ref for f in fnames_existing)
     refs = xrange(start + 1, start + limit)
     pdfs = map_async(lambda ref: PDF(ref, legislature).retrieve(), refs)
     retrieved = [pdf.ref for pdf in filter(None, pdfs)]
     if retrieved:
         logger.info(u'copié %i pdfs (legislature %s, refs %s à %s)' % \
                 (len(retrieved), legislature, retrieved[0], retrieved[-1]))
     return retrieved
Beispiel #2
0
def parse(refs=None, force=False, limit=None):

    if refs is None:
        q = Session.query(ObjetParlementaire)
            #.options(defer('description', 'beschreibung'))
        if not force:
            q = q.filter(ObjetParlementaire.etat==None)
        if limit:
            q = q.limit(limit)
        refs = [o.ref for o in q]

    dicts = map_async(parse_objet, refs)
    for ref, vals in zip(refs, dicts):
        o = ObjetParlementaire.get_or_create(ref)
        for k,v in vals.iteritems():
            setattr(o, k, v)
        Session.commit()
    logger.info('Updated %i `ObjetParlementaire`s' % len(refs))
Beispiel #3
0
            try:
                matches = lookup_bio_id(*args)
            except Exception, e:
                logger.info('bio_id error: %s' % e)
                return

            nb = len(set(matches))
            if nb == 1:
                pers.bio_id = bio_id = matches[0]
                return bio_id
            elif nb == 0:
                logger.info('bio_id - no match for %s', args)
            else:
                logger.info('bio_id - %i matches for %s (%s)', nb,args,matches)

    bio_ids = map_async(update_one, q, poolsize=100)
    Session.commit()
    logger.info('Updated %i bio-ids from web.' % len(filter(None, bio_ids)))

    if use_lookup_table:
        nb_from_lt = 0
        for ident, i in bio_ids_lookup_table.iteritems():
            p = db.Personne.query.get(ident)
            if force or p.bio_id is None:
                p.bio_id = i
                logger.info('bio_id - applying lookup table value (%s) for %s',
                            (i, p))
                nb_from_lt += 1
        Session.commit()
        logger.info('Updated %i bio-ids from lookup table.' % nb_from_lt)