Esempio n. 1
0
def find_amendements(path):
    for amdts_file in glob.glob(os.path.join(path, '**/amendements_*'),
                                recursive=True):
        amendements = open_json(amdts_file)
        for subject in amendements.get('sujets', {}).values():
            for amdt in subject.get('amendements', []):
                yield amdt, amdts_file
def find_parsed_doslegs(api_directory):
    dossiers_json = {}
    for path in glob.glob(os.path.join(api_directory, '**/procedure.json'), recursive=True):
        dos = open_json(path)
        if dos.get('senat_id'):
            dossiers_json[dos['senat_id']] = dos
    print(len(dossiers_json), 'parsed found')
    return dossiers_json
def read_text(path):
    # TODO: format tables
    try:
        articles = open_json(os.path.dirname(path),
                             os.path.basename(path))["articles"]
    except FileNotFoundError:
        return ""

    texte = ""
    for art in articles:
        texte += "# Article " + art["titre"] + "\n\n"
        for key in sorted(art["alineas"].keys()):
            if art["alineas"][key] != "":
                texte += art["alineas"][key] + "\n"
        texte += "\n"
    return texte
def process(output_dir, dos):
    stats = {}

    # # # INTERVENTIONS # # #

    intervs = open_json(os.path.join(output_dir, 'viz/interventions.json'))
    # only keep seances in hemicycle
    intervs = {step_name: step for step_name, step in intervs.items() if '_hemicycle' in step_name}

    stats['total_mots'] = sum([
        sum(i['total_mots'] for i in step['divisions'].values())
            for step in intervs.values()
    ])

    stats["total_intervenants"] = len({orat for step in intervs.values() for orat in step['orateurs'].keys()})
    stats["total_interventions"] = sum({division['total_intervs'] for step in intervs.values() for division in step['divisions'].values()})

    stats["total_seances"] = sum([step['total_seances'] for step in intervs.values()])
    stats["total_seances_assemblee"] = sum([step['total_seances'] for dir, step in intervs.items() if '_assemblee' in dir])
    stats["total_seances_senat"] = sum([step['total_seances'] for dir, step in intervs.items() if '_senat' in dir])

    # # # AMENDMENTS # # #

    add_amendments_stats(stats, find_amendements(output_dir))

    # # # TEXTS # # #

    first_step, last_step = find_first_and_last_steps(dos)
    first_arts = read_articles(first_step)
    last_arts = read_articles(last_step)

    stats["total_input_articles"] = len(first_arts)
    stats["total_output_articles"] = len(last_arts)
    stats["ratio_articles_growth"] = (stats["total_output_articles"] - stats["total_input_articles"]) / stats["total_input_articles"]

    stats["input_text_length"] = step_text_length(first_step)
    stats["output_text_length"] = step_text_length(last_step)
    stats["ratio_text_length_growth"] = (stats["output_text_length"] - stats["input_text_length"]) / stats["input_text_length"]

    stats["input_text_word_count"] = step_word_count(first_step)
    stats["output_text_word_count"] = step_word_count(last_step)
    stats["ratio_word_count_growth"] = (stats["output_text_word_count"] - stats["input_text_word_count"]) / stats["input_text_word_count"]

    adopted_step = find_first_and_last_steps(dos, include_CC=False)[1]
    if has_been_censored(dos):
        stats["censored_articles"], stats["fully_censored_articles"] = count_censored_articles(last_step)
        stats["output_text_before_CC_length"] = step_text_length(adopted_step)
        stats["output_text_before_CC_word_count"] = step_word_count(adopted_step)

    stats["ratio_texte_modif"] = 1 - compute_similarity_by_articles(first_arts, last_arts)

    # # # PROCEDURE # # #

    stats["echecs_procedure"] = len([step for step in dos['steps'] if step.get("echec")])

    # TODO: first institution
    stats['last_stage'] = adopted_step.get('stage')
    if stats['last_stage'] == 'CMP':
        stats['last_institution'] = 'CMP'
    else:
        stats['last_institution'] = adopted_step.get('institution')

    maxdate = dos.get('end')
    if not maxdate:
        for step in dos['steps']:
            if step.get('date'):
                maxdate = step.get('enddate') or step.get('date')
    stats["total_days"] = (datize(maxdate) - datize(dos['beginning'])).days + 1

    stats["attached_law_proposals"] = count_initial_depots(dos['steps']) - 1
    stats["depots_in_institutions"] = count_navettes(dos['steps'])
    stats["texts_produced"] = count_texts(dos['steps'])

    return stats
    stats["ratio_texte_modif"] = 1 - compute_similarity_by_articles(first_arts, last_arts)

    # # # PROCEDURE # # #

    stats["echecs_procedure"] = len([step for step in dos['steps'] if step.get("echec")])

    # TODO: first institution
    stats['last_stage'] = adopted_step.get('stage')
    if stats['last_stage'] == 'CMP':
        stats['last_institution'] = 'CMP'
    else:
        stats['last_institution'] = adopted_step.get('institution')

    maxdate = dos.get('end')
    if not maxdate:
        for step in dos['steps']:
            if step.get('date'):
                maxdate = step.get('enddate') or step.get('date')
    stats["total_days"] = (datize(maxdate) - datize(dos['beginning'])).days + 1

    stats["attached_law_proposals"] = count_initial_depots(dos['steps']) - 1
    stats["depots_in_institutions"] = count_navettes(dos['steps'])
    stats["texts_produced"] = count_texts(dos['steps'])

    return stats


if __name__ == '__main__':
    print_json(process(sys.argv[1], open_json(sys.argv[2])))
    for a in sorted(out['articles']):
        new_steps = []
        for s in out['articles'][a]['steps']:
            del s['text']
            s.pop('_original_index', None)
            if len(new_steps) > 0 and new_steps[-1]['id_step'] == s['id_step']:
                print('same id_step', s['id_step'], file=sys.stderr)
                continue
            new_steps.append(s)
        out['articles'][a]['steps'] = new_steps

    # Set articles' order values after having reinserted missing ones
    orders = {k: n for n, k in enumerate(
        sorted(
            [a['titre'] for a in out['articles'].values() if a['id'] != 'echec'],
            key=cmp_to_key(compare_articles)
        ))
    }
    for a in out['articles'].values():
        if a['id'] == 'echec':
            a['order'] = -1
        else:
            a['order'] = orders[a['titre']]

    return out


if __name__ == '__main__':
    print_json(process(open_json(sys.argv[1])))
            a["statut"] = "conforme"
            a["order"] = order
            order += 1
            write_json(a)
        # do not keep already deleted articles but mark as deleted missing ones
        elif not re_suppr.match(a["statut"]) or texte.get('echec', ''):
            # if the last line of text was some dots, it means that we should keep
            # the articles as-is if they are not deleted
            if line['type'] == 'dots':
                # ex: https://www.senat.fr/leg/ppl09-304.html
                log("DEBUG: Recovering art as non-modifié via dots %s (leftovers)" % cur)
                a["statut"] = "non modifié"
                a["order"] = order
                order += 1
                write_json(a)
            else:
                log("DEBUG: Marking art %s as supprimé (leftovers)" % cur)
                a["statut"] = "supprimé"
                a["alineas"] = dict()
                a["order"] = order
                order += 1
                write_json(a)

    return ALL_ARTICLES

if __name__ == '__main__':
    serialized = open_json(sys.argv[1])
    serialized["debug"] = True
    result = complete(**serialized)
    print_json(result)
GITLAB_TOKEN = sys.argv[2] if len(sys.argv) == 3 else None
if GITLAB_TOKEN:
    gl = gitlab.Gitlab('https://git.regardscitoyens.org/',
                       private_token=GITLAB_TOKEN)
    group = gl.groups.list(search='parlement')[0]

    # delete existing bills
    projects = group.projects.list()
    for project in projects:
        print('delete', project.id)
        gl.projects.delete(project.id)

for procedure_file in sorted(
        glob.glob("data/**/procedure.json", recursive=True)):
    procedure = open_json(procedure_file)

    if len(procedure["steps"]) < 5:
        continue
    if procedure["stats"]["total_amendements"] < 5:
        continue

    project_dir = os.path.dirname(os.path.dirname(procedure_file))

    git_dir = Path(GIT_REPOS_DIRECTORY) / procedure["id"]

    shutil.rmtree(str(git_dir), ignore_errors=True)
    os.makedirs(str(git_dir))

    remote_url = "[email protected]:/parlement/{bill}.git".format(
        bill=procedure["id"])
Esempio n. 9
0
import sys, os, random, glob

from tlfp.tools.common import open_json, print_json

if len(sys.argv) < 2:
    print(
        'USAGE: "python steps_as_dot.py <data_directory> | dot -Tpng > steps.png"'
    )
    sys.exit()

mode = "detailed" if len(sys.argv) == 3 else "simple"

procedure_file = os.path.join(
    os.path.dirname(os.path.dirname(os.path.abspath(__file__))), '..', 'docs',
    'valid_procedure.json')
procedure = open_json(procedure_file)

API_DIRECTORY = sys.argv[1]
all_senat_jo = [open_json(path) for path \
                in glob.glob(os.path.join(API_DIRECTORY, '*/viz/procedure.json'))]
all_senat_jo = [dos for dos in all_senat_jo if dos.get('end')]
# all_senat_jo = [x for x in open_json(sys.argv[1]) if len(x['steps']) > 2]
# all_senat_jo = random.sample(all_senat_jo, 5)

nodes_names_size = {}
step_trans = {}
steps_logs = ""
for dos in all_senat_jo:
    prev_step = None
    last_step = ''
    for step_i, step in enumerate(dos.get('steps', [])):
Esempio n. 10
0
            order += 1
            write_json(a)
        # do not keep already deleted articles but mark as deleted missing ones
        elif not re_suppr.match(a["statut"]) or texte.get('echec', ''):
            # if the last line of text was some dots, it means that we should keep
            # the articles as-is if they are not deleted
            if line['type'] == 'dots':
                # ex: https://www.senat.fr/leg/ppl09-304.html
                log("DEBUG: Recovering art as non-modifié via dots %s (leftovers)"
                    % cur)
                a["statut"] = "non modifié"
                a["order"] = order
                order += 1
                write_json(a)
            else:
                log("DEBUG: Marking art %s as supprimé (leftovers)" % cur)
                a["statut"] = "supprimé"
                a["alineas"] = dict()
                a["order"] = order
                order += 1
                write_json(a)

    return ALL_ARTICLES


if __name__ == '__main__':
    serialized = open_json(sys.argv[1])
    serialized["debug"] = True
    result = complete(**serialized)
    print_json(result)
Esempio n. 11
0
"""
Usage: python generate_dossiers_csv.py <api_directory>

Output in <api_directory>:
- dossiers_promulgues.csv with all the doslegs ready
- home.json for the homepage informations
"""
import glob, os, sys, csv, re, copy, datetime

from tlfp.tools.common import upper_first, open_json, print_json

API_DIRECTORY = sys.argv[1]

re_dos_ok = re.compile(r"%s/[^.]+/" % API_DIRECTORY.strip('/'))
dossiers = [(open_json(path), path) for path \
                in glob.glob(os.path.join(API_DIRECTORY, '*/viz/procedure.json')) if re_dos_ok.search(path)]
dossiers = [(dos, path) for dos, path in dossiers if "_tmp" not in path]


csvfile = csv.writer(open(os.path.join(API_DIRECTORY, 'dossiers.csv'), 'w'), delimiter=';')
csvfile.writerow(('id;Titre;Type de dossier;Date initiale;URL du dossier;État du dossier;Décision du CC;'
    'Date de la décision;Date de promulgation;Numéro de la loi;Thèmes;total_amendements;total_mots;'
    'short_title;loi_dite;assemblee_id').split(';'))


def format_date_for_human(date):
    if not date:
        return ''
    return '/'.join(reversed(date.split('-')))

def in_room(step):
Esempio n. 12
0
        "count": len(tosave),
        "page": npage,
        "next_page": None,
        "dossiers": tosave
    }
    if done < total:
        data["next_page"] = namefile(npage + 1)
    print('[assemble_procedure] >', namefile(npage))
    print_json(data, os.path.join(sourcedir, namefile(npage)))


done = 0
tosave = []

for d in dossiers:
    proc = open_json(os.path.join(sourcedir, d['id'], 'viz'), 'procedure.json')
    proc["id"] = d["id"]

    for f in ["table_concordance", "objet_du_texte"]:
        if f in proc:
            proc.pop(f)

    tosave.append(proc)
    done += 1
    if done % pagesize == 0:
        save_json_page(tosave, done)
        tosave = []

if tosave:
    save_json_page(tosave, done)
# quick script to produce a DOT file of the steps from a list of dosleg
# use "python steps_as_dot.py <data_directory> | dot -Tpng > steps.png" to produce the diagram

# the XKCD font is available here: https://github.com/ipython/xkcd-font/tree/master/xkcd/build
import sys, os, random, glob

from tlfp.tools.common import open_json, print_json

if len(sys.argv) < 2:
    print('USAGE: "python steps_as_dot.py <data_directory> | dot -Tpng > steps.png"')
    sys.exit()

mode = "detailed" if len(sys.argv) == 3 else "simple"

procedure_file = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), '..', 'docs', 'valid_procedure.json')
procedure = open_json(procedure_file)

API_DIRECTORY = sys.argv[1]
all_senat_jo = [open_json(path) for path \
                in glob.glob(os.path.join(API_DIRECTORY, '*/viz/procedure.json'))]
all_senat_jo = [dos for dos in all_senat_jo if dos.get('end')]
# all_senat_jo = [x for x in open_json(sys.argv[1]) if len(x['steps']) > 2]
# all_senat_jo = random.sample(all_senat_jo, 5)

nodes_names_size = {}
step_trans = {}
steps_logs = ""
for dos in all_senat_jo:
    prev_step = None
    last_step = ''
    for step_i, step in enumerate(dos.get('steps', [])):
Esempio n. 14
0
def process(output_dir, dos):
    stats = {}

    # # # INTERVENTIONS # # #

    intervs = open_json(os.path.join(output_dir, 'viz/interventions.json'))
    # only keep seances in hemicycle
    intervs = {
        step_name: step
        for step_name, step in intervs.items() if '_hemicycle' in step_name
    }

    stats['total_mots'] = sum([
        sum(i['total_mots'] for i in step['divisions'].values())
        for step in intervs.values()
    ])

    stats["total_intervenants"] = len({
        orat
        for step in intervs.values() for orat in step['orateurs'].keys()
    })
    stats["total_interventions"] = sum({
        division['total_intervs']
        for step in intervs.values()
        for division in step['divisions'].values()
    })

    stats["total_seances"] = sum(
        [step['total_seances'] for step in intervs.values()])
    stats["total_seances_assemblee"] = sum([
        step['total_seances'] for dir, step in intervs.items()
        if '_assemblee' in dir
    ])
    stats["total_seances_senat"] = sum([
        step['total_seances'] for dir, step in intervs.items()
        if '_senat' in dir
    ])

    # # # AMENDMENTS # # #

    add_amendments_stats(stats, find_amendements(output_dir))

    # # # TEXTS # # #

    first_step, last_step = find_first_and_last_steps(dos)
    first_arts = read_articles(first_step)
    last_arts = read_articles(last_step)

    stats["total_input_articles"] = len(first_arts)
    stats["total_output_articles"] = len(last_arts)
    stats["ratio_articles_growth"] = (
        stats["total_output_articles"] -
        stats["total_input_articles"]) / stats["total_input_articles"]

    stats["input_text_length"] = step_text_length(first_step)
    stats["output_text_length"] = step_text_length(last_step)
    stats["ratio_text_length_growth"] = (
        stats["output_text_length"] -
        stats["input_text_length"]) / stats["input_text_length"]

    stats["input_text_word_count"] = step_word_count(first_step)
    stats["output_text_word_count"] = step_word_count(last_step)
    stats["ratio_word_count_growth"] = (
        stats["output_text_word_count"] -
        stats["input_text_word_count"]) / stats["input_text_word_count"]

    adopted_step = find_first_and_last_steps(dos, include_CC=False)[1]
    if has_been_censored(dos):
        stats["censored_articles"], stats[
            "fully_censored_articles"] = count_censored_articles(last_step)
        stats["output_text_before_CC_length"] = step_text_length(adopted_step)
        stats["output_text_before_CC_word_count"] = step_word_count(
            adopted_step)

    stats["ratio_texte_modif"] = 1 - compute_similarity_by_articles(
        first_arts, last_arts)

    # # # PROCEDURE # # #

    stats["echecs_procedure"] = len(
        [step for step in dos['steps'] if step.get("echec")])

    # TODO: first institution
    stats['last_stage'] = adopted_step.get('stage')
    if stats['last_stage'] == 'CMP':
        stats['last_institution'] = 'CMP'
    else:
        stats['last_institution'] = adopted_step.get('institution')

    maxdate = dos.get('end')
    if not maxdate:
        for step in dos['steps']:
            if step.get('date'):
                maxdate = step.get('enddate') or step.get('date')
    stats["total_days"] = (datize(maxdate) - datize(dos['beginning'])).days + 1

    stats["attached_law_proposals"] = count_initial_depots(dos['steps']) - 1
    stats["depots_in_institutions"] = count_navettes(dos['steps'])
    stats["texts_produced"] = count_texts(dos['steps'])

    return stats
Esempio n. 15
0
        first_arts, last_arts)

    # # # PROCEDURE # # #

    stats["echecs_procedure"] = len(
        [step for step in dos['steps'] if step.get("echec")])

    # TODO: first institution
    stats['last_stage'] = adopted_step.get('stage')
    if stats['last_stage'] == 'CMP':
        stats['last_institution'] = 'CMP'
    else:
        stats['last_institution'] = adopted_step.get('institution')

    maxdate = dos.get('end')
    if not maxdate:
        for step in dos['steps']:
            if step.get('date'):
                maxdate = step.get('enddate') or step.get('date')
    stats["total_days"] = (datize(maxdate) - datize(dos['beginning'])).days + 1

    stats["attached_law_proposals"] = count_initial_depots(dos['steps']) - 1
    stats["depots_in_institutions"] = count_navettes(dos['steps'])
    stats["texts_produced"] = count_texts(dos['steps'])

    return stats


if __name__ == '__main__':
    print_json(process(sys.argv[1], open_json(sys.argv[2])))
def find_amendements(path):
    for amdts_file in glob.glob(os.path.join(path, '**/amendements_*'), recursive=True):
        amendements = open_json(amdts_file)
        for subject in amendements.get('sujets', {}).values():
            for amdt in subject.get('amendements', []):
                yield amdt, amdts_file
        for s in out['articles'][a]['steps']:
            del s['text']
            s.pop('_original_index', None)
            if len(new_steps) > 0 and new_steps[-1]['id_step'] == s['id_step']:
                print('same id_step', s['id_step'], file=sys.stderr)
                continue
            new_steps.append(s)
        out['articles'][a]['steps'] = new_steps

    # Set articles' order values after having reinserted missing ones
    orders = {
        k: n
        for n, k in enumerate(
            sorted([
                a['titre']
                for a in out['articles'].values() if a['id'] != 'echec'
            ],
                   key=cmp_to_key(compare_articles)))
    }
    for a in out['articles'].values():
        if a['id'] == 'echec':
            a['order'] = -1
        else:
            a['order'] = orders[a['titre']]

    return out


if __name__ == '__main__':
    print_json(process(open_json(sys.argv[1])))
    npage = (done - 1) // pagesize
    data = {"total": total,
            "count": len(tosave),
            "page": npage,
            "next_page": None,
            "dossiers": tosave}
    if done < total:
        data["next_page"] = namefile(npage+1)
    print('[assemble_procedure] >', namefile(npage))
    print_json(data, os.path.join(sourcedir, namefile(npage)))

done = 0
tosave = []

for d in dossiers:
    proc = open_json(os.path.join(sourcedir, d['id'], 'viz'), 'procedure.json')
    proc["id"] = d["id"]

    for f in ["table_concordance", "objet_du_texte"]:
        if f in proc:
            proc.pop(f)

    tosave.append(proc)
    done += 1
    if done % pagesize == 0:
        save_json_page(tosave, done)
        tosave = []

if tosave:
    save_json_page(tosave, done)