def process(output_directory):
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    yesterday = time.time() - 86400
    dfile = 'lois_dites.json'
    destfile = os.path.join(output_directory, dfile)
    if not os.path.exists(destfile) or os.path.getmtime(destfile) < yesterday:
        common_laws = {
            l.id_legi: l.common_name
            for l in LawService().common_laws()
        }
        print_json(common_laws, destfile)
    else:
        common_laws = open_json(destfile)
    return common_laws
def process(dos, OUTPUT_DIR):
    def log_err(txt, arg=None):
        raise Exception()

    for step_i, step in enumerate(dos['steps']):

        step['directory'] = get_step_id(step_i, step)
        step_dir = os.path.join(OUTPUT_DIR,
                                os.path.join(step['directory'], 'texte'))

        articles = step.get('articles_completed', step.get('articles'))
        if not articles:
            continue

        mkdirs(step_dir)
        for data in articles:
            if not data or not "type" in data:
                log_err("JSON badly formatted, missing field type: %s" % data)
                sys.exit(1)
            if data["type"] == "texte":
                textid = data["id"]
                alldata = dict(data)
                alldata['sections'] = []
                alldata['articles'] = []
            elif textid == "":
                log_err("JSON missing first line with text infos")
                sys.exit(1)
            elif data["type"] == "section":
                alldata['sections'].append(data)
            elif data["type"] == "article":
                alldata['articles'].append(data)
            elif data["type"] == "echec":
                alldata['expose'] = data['texte']

        print_json(alldata, os.path.join(step_dir, 'texte.json'))

        step['texte.json'] = alldata

    return dos
    """ % (get_node_id('CMP'), get_node_id('3ème lecture • assemblee'),
           get_node_id('3ème lecture • senat'))

    dot_result += """
    {
        rank=same; %s; %s;
    }
    """ % (get_node_id('congrès • congrès'),
           get_node_id('constitutionnalité • conseil constitutionnel'))
else:
    for stage in ['1ère lecture', '2ème lecture', '3ème lecture', 'CMP']:
        for step in ['depot', 'commission', 'hemicycle']:
            if stage == 'CMP' and step == 'commission': continue
            dot_result += ("""
              {
                rank=same; %s; %s;
              }
            """ % (get_node_id('%s • assemblee • %s' % (stage, step)),
                   get_node_id('%s • senat • %s' % (stage, step))))

dot_result += '\n}'

details = "_detailed" if mode == "detailed" else ""
open('_steps%s.log' % details, 'w').write(steps_logs)
print_json(step_trans, 'steps%s_transitions.json' % details)

print(dot_result)
# open('steps.dot','w').write(dot_result)

# improve layout: https://stackoverflow.com/questions/11588667/how-to-influence-layout-of-graph-items
for _, a in articles.iteritems():
    for s in a['steps']:
        stepid = s['directory']
        if stepid not in good_steps:
            good_steps[stepid] = int(s['id_step'][:2])

for i, s in enumerate(procedure['steps']):
    s['debats_order'] = None
    if 'has_interventions' in s and s['has_interventions'] and s[
            'directory'] not in intervs:
        print >> sys.stderr, "WARNING: removing nearly empty interventions steps for %s" % s[
            'directory'].encode('utf-8')
        s['has_interventions'] = False
    if 'directory' in s:
        if i == len(procedure['steps']) - 1 and not s['enddate']:
            s['debats_order'] = max(good_steps.values()) + 1
        else:
            s['debats_order'] = good_steps.get(s['directory'], None)
    if s.get('step', '') == 'depot' and s['debats_order'] != None:
        if '/propositions/' in s.get('source_url', ''):
            s['auteur_depot'] = u"Députés"
        elif '/leg/ppl' in s.get('source_url', ''):
            s['auteur_depot'] = u"Sénateurs"
        else:
            s['auteur_depot'] = u"Gouvernement"
    for field in dict(s):
        if field.endswith('_directory') or field.endswith('_files'):
            del (s[field])

print_json(procedure)
Example #5
0
def process(dos):
    for step_i, step in enumerate(dos['steps']):
        articles = step.get('articles_completed', step.get('articles'))
        if not articles:
            continue

        for data in articles:
            if data["type"] == "article":
                data['liens'] = []
                for i in range(len(data["alineas"])):
                    text = data["alineas"]["%03d" % (i + 1)]
                    for candidat in metslesliens.donnelescandidats(
                            text, 'structuré'):
                        if 'texte' in candidat:
                            link = text[
                                candidat['index'][0]:candidat['index'][1]]
                            data['liens'].append(link)
                            """
                            data['liens'].append({
                                'url': 'https://duckduckgo.com/?q=!ducky+' + urllib.parse.quote_plus(link),
                                'texte': link,
                                'alinea': i,
                                # 'index': candidat['index'],
                            })
                            """
    return dos


if __name__ == '__main__':
    print_json(process(open_json(sys.argv[1])))
Example #6
0
from monitoring_config import config
from measurement_tds_info import calculate_output_data
from common import print_json

print_json(calculate_output_data(config))
Example #7
0
def callbackstore(ch, method, properties, body):
    common.sleep_random()
    print_method("store", method.routing_key)
    common.print_json(body)
    ch.basic_ack(delivery_tag=method.delivery_tag)
                out['articles'][id]['steps'] = []
                s = create_step(step_id, step['directory'], article)
                s['n_diff'] = 1
                s['diff'] = 'add'
                if nstep >= depots:
                    s['status'] = 'new'
                else:
                    s['status'] = 'depot'
                txt = "\n".join(s['text'])
            if s['status'] == 'sup':
                s['length'] = 50
                s['n_diff'] = 0
            else:
                s['length'] = len(txt)
            out['articles'][id]['steps'].append(s)
        if 'step' in step and not echec:
            old_step_id = step_id

    except Exception as e:
        sys.stderr.write("ERROR parsing step %s:\n%s: %s\n" % (step, type(e), e))
        exit(1)

for a in out['articles']:
    new_steps = []
    for s in out['articles'][a]['steps']:
        del(s['text'])
        new_steps.append(s)
    out['articles'][a]['steps'] = new_steps

print_json(out)
        mkdirs(step_dir)
        for data in articles:
            if not data or not "type" in data:
                log_err("JSON badly formatted, missing field type: %s" % data)
                sys.exit(1)
            if data["type"] == "texte":
                textid = data["id"]
                alldata = dict(data)
                alldata['sections'] = []
                alldata['articles'] = []
            elif textid == "":
                log_err("JSON missing first line with text infos")
                sys.exit(1)
            elif data["type"] == "section":
                alldata['sections'].append(data)
            elif data["type"] == "article":
                alldata['articles'].append(data)
            elif data["type"] == "echec":
                alldata['expose'] = data['texte']

        print_json(alldata, os.path.join(step_dir, 'texte.json'))

        step['texte.json'] = alldata

    return dos


if __name__ == '__main__':
    print_json(process(open_json(sys.argv[1]), 'test_out'))
Example #10
0
        if amdt["groupe"] == "Gouvernement":
            stats["total_amendements_gouvernement"] += 1
        else:
            stats["total_amendements_parlementaire"] += 1

    stats["echecs_procedure"] = len(
        [step for step in dos['steps'] if step.get("echec")])

    if 'end' in dos:
        stats["total_days"] = (datize(dos["end"]) -
                               datize(dos["beginning"])).days + 1

        first_text, first_arts, last_text, last_arts = find_first_and_last_texts(
            dos)

        stats["total_input_articles"] = len(first_arts)
        stats["total_output_articles"] = len(last_arts)
        stats["ratio_articles_growth"] = len(last_arts) / len(first_arts)

        stats["ratio_texte_modif"] = 1 - compute_similarity_by_articles(
            first_arts, last_arts)
        stats["input_text_length"] = len("\n".join(first_text))
        stats["output_text_length"] = len("\n".join(last_text))

    return stats


if __name__ == '__main__':
    print_json(process(sys.argv[1], open_json(sys.argv[2])))
                s = create_step(step_id, step['directory'], article)
                s['n_diff'] = 1
                s['diff'] = 'add'
                if nstep >= depots:
                    s['status'] = 'new'
                else:
                    s['status'] = 'depot'
                txt = "\n".join(s['text'])
            if s['status'] == 'sup':
                s['length'] = 50
                s['n_diff'] = 0
            else:
                s['length'] = len(txt)
            out['articles'][id]['steps'].append(s)
        if 'step' in step and not echec:
            old_step_id = step_id

    except Exception as e:
        sys.stderr.write("ERROR parsing step %s:\n%s: %s\n" %
                         (step, type(e), e))
        exit(1)

for a in out['articles']:
    new_steps = []
    for s in out['articles'][a]['steps']:
        del (s['text'])
        new_steps.append(s)
    out['articles'][a]['steps'] = new_steps

print_json(out)
good_steps = {}
for _, a in articles.iteritems():
    for s in a['steps']:
        stepid = s['directory']
        if stepid not in good_steps:
            good_steps[stepid] = int(s['id_step'][:2])

for i, s in enumerate(procedure['steps']):
    s['debats_order'] = None
    if 'has_interventions' in s and s['has_interventions'] and s['directory'] not in intervs:
        print >> sys.stderr, "WARNING: removing nearly empty interventions steps for %s" % s['directory'].encode('utf-8')
        s['has_interventions'] = False
    if 'directory' in s:
        if i == len(procedure['steps'])-1 and not s['enddate']:
            s['debats_order'] = max(good_steps.values()) + 1
        else:
            s['debats_order'] = good_steps.get(s['directory'], None)
    if s.get('step', '') == 'depot' and s['debats_order'] != None:
        if '/propositions/' in s.get('source_url', ''):
            s['auteur_depot'] = u"Députés"
        elif '/leg/ppl' in s.get('source_url',''):
            s['auteur_depot'] = u"Sénateurs"
        else:
            s['auteur_depot'] = u"Gouvernement"
    for field in dict(s):
        if field.endswith('_directory') or field.endswith('_files'):
            del(s[field])

print_json(procedure)

Example #13
0
def process(OUTPUT_DIR, procedure):
    context = Context([0, OUTPUT_DIR], load_parls=True)

    #['Indéfini', 'Adopté', 'Irrecevable', 'Rejeté', 'Retiré', 'Tombe', 'Non soutenu', 'Retiré avant séance', 'Rectifié', 'Favorable' ,'Satisfait']
    def simplify_sort(sort):
        sort = sort.lower()
        if sort in "adopté favorable":
            return "adopté"
        if sort in "rejeté ":
            return "rejeté"
        if sort in "indéfini":
            return "en attente"
        return "non-voté"

    re_clean_first = re.compile(r'^(.*?)(,| et) .*$')
    def first_author(signataires):
        if signataires is None or "gouvernement" in signataires.lower():
            return ""
        return re_clean_first.sub(r'\1, …', signataires)

    def find_groupe(amd):
        if amd['signataires'] and "gouvernement" in amd['signataires'].lower():
            return "Gouvernement"
        ct = {}
        maxc = 0
        result = ""
        for gpe in amd['groupes_parlementaires']:
            g = gpe['groupe']
            count = 1

            # the new api compact the groups
            if ':' in g:
                g, count = gpe['groupe'].split(':')
                count = int(count)

            g = slug_groupe(g)
            if g not in ct:
                ct[g] = 0
            ct[g] += count
            if ct[g] > maxc:
                maxc = ct[g]
                result = g
        return result

    def add_link(links, pA, pB, weight=1):
        p1 = min(pA, pB)
        p2 = max(pA, pB)
        linkid = "%s-%s" % (p1, p2)
        if linkid not in links:
            links[linkid] = {
              "1": p1,
              "2": p2,
              "w": 0
            }
        links[linkid]["w"] += weight

    article_number_regexp = re.compile(r'article (1er.*|(\d+).*)$', re.I)
    def sort_amendements(texte, amendements):
        articles = {}
        for article in texte:
            if article['type'] == 'article':
                titre = article.get('titre')
                if titre:
                    articles[titre.lower()] = article.get('order') * 10

        def solveorder(art):
            nonlocal articles
            art = art.lower()
            order = 10000;
            if art == 'titre' or art.startswith('intitul'):
                return 0
            elif art.startswith('motion'):
                return 1
            elif art.startswith('projet') \
                or art.startswith('proposition') \
                or art.startswith('texte'):
                return 5
            else:
                m = article_number_regexp.search(art)
                if m:
                    if articles.get(m.group(1)):
                        order = articles.get(m.group(1))
                    elif articles.get(m.group(2)):
                        order = articles.get(m.group(2))
                    if 'avant' in art:
                        order -= 1
                    elif 'après' in art or 'apres' in art:
                        order += 1
            return order


        for amendement in amendements:
            amdt = amendement['amendement']
            amdt['ordre_article'] = solveorder(amdt['sujet'])

        return amendements


    CACHE_BUSTING = 'cache=%d' % time()
    if 'url_jo' in procedure:
        CACHE_BUSTING = 'cache=5feb2018' # fixed cache busting for promulgated laws
    steps = {}
    last_text_id, last_text_typeparl = None, None
    steps = procedure['steps']
    for i, step in enumerate(steps):
        print('    * step -', step.get('stage'), step.get('step'), step.get('source_url'))
        if step.get('step') not in ('commission', 'hemicycle'):
            continue
        if step.get('step') == 'commission' and step.get('stage') == 'CMP':
            continue

        if i == 0:
            continue

        last_step_index = get_previous_step(steps, i, is_old_procedure=procedure.get('use_old_procedure'))
        last_step = steps[last_step_index]
        last_step_with_good_text_number = steps[get_previous_step(steps, i,
            is_old_procedure=procedure.get('use_old_procedure'), get_depot_step=True)
        ]
        texte_url = last_step_with_good_text_number.get('source_url')

        if step.get('stage') != 'CMP' and last_step_with_good_text_number.get('institution') != step.get('institution'):
            print('ERROR - last step is from another institution', file=sys.stderr)
            continue

        # for a CMP hemicycle we have to get the right text inside the CMP commission
        if step.get('stage') == 'CMP' and step.get('step') == 'hemicycle':
            urls = [last_step.get('source_url')]
            if 'cmp_commission_other_url' in last_step:
                urls.append(last_step.get('cmp_commission_other_url'))
            an_url = [url for url in urls if 'nationale.fr' in url]
            senat_url = [url for url in urls if 'senat.fr' in url]
            if step.get('institution') == 'assemblee' and an_url:
                texte_url = an_url[0]
            elif step.get('institution') == 'senat' and senat_url:
                texte_url = senat_url[0]
            else:
                print('WARNING - missing the CMP commission text for', step.get('source_url'), file=sys.stderr)
                continue

        if texte_url is None:
            print('ERROR - no texte url', step.get('source_url'), file=sys.stderr)
            continue

        texte = open_json(os.path.join(context.sourcedir, 'procedure', last_step['directory']), 'texte/texte.json')

        amdt_url = None
        if "nationale.fr" in texte_url:
            if 'assemblee_legislature' not in procedure:
                print('         + no AN legislature - pass text')
                continue
            amdt_url = 'https://nosdeputes.fr/%s/amendements/%s/json?%s' % (procedure.get('assemblee_legislature'), get_text_id(texte_url), CACHE_BUSTING)
        elif "senat.fr" in texte_url:
            amdt_url = 'https://nossenateurs.fr/amendements/%s/json?%s' % (get_text_id(texte_url), CACHE_BUSTING)

        if amdt_url is None:
            continue

        print('      * downloading amendments:', amdt_url, 'for', texte_url)

        amendements_src = download(amdt_url).json().get('amendements', [])

        # TA texts can be zero-paded or not (TA0XXX or TAXXX), we try both
        if 'amendements/TA' in amdt_url:
            textid = get_text_id(texte_url)
            if 'TA0' in textid:
                alternative_url = amdt_url.replace(textid, 'TA' + textid.replace('TA', '').lstrip('0'))
            else:
                alternative_url = amdt_url.replace(textid, 'TA' + textid.replace('TA', '').zfill(4))
            print(' WARNING: TA - trying alternative url too', alternative_url)
            amendements_src += download(alternative_url).json().get('amendements', [])

        print('        parsing amendments:', len(amendements_src))

        # ignore amendments if they are not for the correct step
        amendements_src_filtered = []
        for amd in amendements_src:
            a = amd['amendement']
            if step.get('institution') == 'assemblee':
                # commission amendments can have two forms
                #    - /amendements/LOI/NUM.asp (13th legislature)
                #    - /amendements/LOI/COMMISSION_NAME/NUM.asp (14+ legislature)
                # hemicycle amendments are:
                #    - /amendements/LOI/NUM.asp (13th legislature)
                #    - /amendements/LOI/AN/NUM.asp (14+ legislature)
                amdt_step = 'hemicycle'
                if '/cr-' in a['source']:
                    amdt_step = 'commission'
                else:
                    url_parts = a['source'].split('amendements/')[1].split('/')
                    if len(url_parts) == 3 and url_parts[1] != 'AN':
                        amdt_step = 'commission'
            elif step.get('institution') == 'senat':
                amdt_step = 'commission' if '/commissions/' in a['source'] else 'hemicycle'
            else:
                # CMP - there's not way for now to distinguish the step
                amdt_step = step['step']
            if step['step'] != amdt_step:
                continue
            amendements_src_filtered.append(amd)

        if len(amendements_src_filtered) != len(amendements_src):
            print('WARNING: amendments ignored (not the right step) %s' %
                    (len(amendements_src) - len(amendements_src_filtered)), file=sys.stderr)
        amendements_src = amendements_src_filtered

        step['nb_amendements'] = len(amendements_src)

        if len(amendements_src) > 0:
            amendements_src = sort_amendements(texte['articles'], amendements_src)

            typeparl, urlapi = identify_room(texte_url,
                legislature=step.get('assemblee_legislature', procedure.get('assemblee_legislature')))

            sujets = {}
            groupes = {}

            fix_order = False
            orders = []
            parls = {}
            links = {}
            idents = {}
            for amd in amendements_src:
                a = amd['amendement']
                if "sort" not in a:
                    print('WARNING: amendment has no sort %s\n' % a['url_nos%ss' % typeparl], file=sys.stderr)
                    continue
                if a["sort"] == "Rectifié":
                    continue
                if "sujet" not in a or not a["sujet"]:
                    if a["sort"] not in ["Irrecevable", "Retiré avant séance"]:
                        print('WARNING: amendment has no subject %s\n' % a['url_nos%ss' % typeparl], file=sys.stderr)
                    continue
                key = a['sujet']
                if not key:
                    print('WARNING: amendment has no subject %s\n' % a['url_nos%ss' % typeparl], file=sys.stderr)
                    continue
                if key not in sujets:
                    orders.append(key)
                    sujets[key] = {
                      'titre': key,
                      'order': a['ordre_article'],
                      'amendements': []
                    }
                if a['ordre_article'] > 9000:
                    fix_order = True

                gpe = find_groupe(a)
                if not gpe:
                    if a["sort"] != "Irrecevable":
                        sys.stderr.write('WARNING: no groupe found for %s\n' % a['url_nos%ss' % typeparl])
                    gpe = "Inconnu"
                context.add_groupe(groupes, gpe, urlapi)

                sujets[key]['amendements'].append({
                  'numero': a['numero'],
                  'date': a['date'],
                  'sort': simplify_sort(a['sort']),
                  'groupe': gpe,
                  'id_api': a['id'],
                  'aut': first_author(a['signataires'])
                })

                cosign = []
                hmd5 = a["cle_unicite"]
                if hmd5 not in idents:
                    idents[hmd5] = []
                for parll in a["parlementaires"]:
                    parl = parll["parlementaire"]
                    if parl not in parls:
                        p = context.get_parlementaire(urlapi, parl)
                        parls[parl] = {
                          "i": p["id"],
                          "s": parl,
                          "a": 0,
                          "n": p["nom"],
                          "g": p["groupe_sigle"],
                          "p": p["place_en_hemicycle"]
                        }
                    pid = parls[parl]["i"]
                    parls[parl]["a"] += 1
                    for cid in cosign:
                        add_link(links, pid, cid)
                        #add_link(links, pid, cid, 2)
                    cosign.append(pid)
                    for cid in idents[hmd5]:
                        add_link(links, pid, cid)
                    idents[hmd5].append(pid)

            if fix_order:
                orders.sort(key=cmp_to_key(compare_articles))
                for i, k in enumerate(orders):
                    sujets[k]["order"] = i

            amdtsfile = os.path.join(context.sourcedir, 'viz', 'amendements_%s.json' % step['directory'])
            data = {'id_step': step['directory'],
                    'api_root_url': amdapi_link(urlapi),
                    'groupes': groupes,
                    'sujets': sujets}
            print_json(data, amdtsfile)

            linksfile = os.path.join(context.sourcedir, 'viz', 'amendements_links_%s.json' % step['directory'])
            data = {'id_step': step['directory'],
                    'links': list(links.values()),
                    'parlementaires': dict((p["i"], dict((k, p[k]) for k in "psang")) for p in list(parls.values()))}
            # print_json(data, linksfile)


        ###########  INTERVENTIONS #############
        # TODO: move this to a dedicated file

        print('      * downloading interventions')
        typeparl, urlapi = identify_room(texte_url,
            legislature=step.get('assemblee_legislature', procedure.get('assemblee_legislature')))
        inter_dir = os.path.join(context.sourcedir, 'procedure', step['directory'], 'interventions')
        commission_or_hemicycle = '?commission=1' if step.get('step') == 'commission' else '?hemicycle=1'
        # TODO: TA texts can be zero-paded or not (TA0XXX or TAXXX), we should try both
        seance_name = None
        intervention_files = []

        texts = (get_text_id(texte_url),)
        if last_text_typeparl == typeparl:
            texts = (get_text_id(texte_url), last_text_id)

        for loiid in texts:
            url_seances = 'https://{}.fr/seances/{}/json{}'.format(urlapi, loiid, commission_or_hemicycle)
            print('        * downloading seances - ', url_seances)
            for id_seance_obj in sorted(download(url_seances).json().get('seances', []), key=lambda x: x["seance"]):
                url_seance = 'https://{}.fr/seance/{}/{}/json'.format(urlapi, id_seance_obj['seance'], loiid)
                print('           downloading seance - ', url_seance)
                resp = download(url_seance).json()
                if resp.get('seance'):
                    inter = resp.get('seance')[0]['intervention']
                    seance_name = inter['date'] + 'T' + inter['heure'] + '_' + inter['seance_id']
                    print('            dumping seance -', seance_name)
                    intervention_files.append(seance_name)
                    if not os.path.exists(inter_dir):
                        os.makedirs(inter_dir)
                    print_json(resp, os.path.join(inter_dir, seance_name + '.json'))
            if seance_name:
                step['has_interventions'] = True
                step['intervention_files'] = intervention_files
                break

        last_text_id = get_text_id(texte_url)
        last_text_typeparl = typeparl

    return procedure
                ali_num += 1
                article["alineas"]["%03d" % ali_num] = line
        else:
            #metas
            continue

    if article is not None:
        save_text(texte)
        pr_js(article)

    return all_articles


if __name__ == '__main__':
    if '--test' not in sys.argv:
        print_json(parse(sys.argv[1]))
    else:

        def assert_eq(x, y):
            if x != y:
                print(repr(x), "!=", repr(y))
                raise Exception()

        # keep the dots
        assert_eq(clean_html('<i>....................</i>'),
                  '<i>....................</i>')
        # but remove them for status
        assert_eq(clean_html('...........Conforme.........'), 'Conforme')
        assert_eq(clean_html('...……......……..Conforme....……...…….'),
                  'Conforme')
        # even with spaces