예제 #1
0
def find_amendements(path):
    for amdts_file in glob.glob(os.path.join(path, '**/amendements_*'),
                                recursive=True):
        amendements = open_json(amdts_file)
        for subject in amendements.get('sujets', {}).values():
            for amdt in subject.get('amendements', []):
                yield amdt
    def step_walker(self, step):

        #Amendement treatment
        if "amendement_directory" in step:
            amdtDir = os.path.join(self.procedurePath,
                                   step["amendement_directory"])
            if not os.path.exists(amdtDir):
                print "ERROR > No Amendements Directory "
                return

            amendements = open_json(amdtDir, "amendements.json")
            for amendement in amendements["amendements"]:
                self.computationClass.computeAmendements(amendement)
            # print ">>%s"% amendement["amendement"]["id"]

        #Intervention treatment
        if "intervention_directory" in step:
            intervDir = os.path.join(self.procedurePath,
                                     step["intervention_directory"])
            if not os.path.exists(intervDir):
                print ">No Intervention Directory "
                return

            #interventions = open_json(amdtDir, "amendements.json")
            seance_files = step["intervention_files"]
            for seance_file in seance_files:
                seance = open_json(intervDir, "%s.json" % seance_file)

                for interv in seance["seance"]:
                    self.computationClass.computeInterventions(interv)

        #Text Treatment
        if "working_text_directory" in step:
            textDir = os.path.join(self.procedurePath,
                                   step["working_text_directory"])
            if not os.path.exists(textDir):
                print "ERROR > no Text directory"
                return

            text = open_json(textDir, "texte.json")

            self.computationClass.computeText(text)

        #Article Etape
        articleEtape = open_json(self.vizPath, "articles_etapes.json")
        self.computationClass.computeArticleEtapes(articleEtape)
    def walk(self):
        procedure = open_json(self.procedurePath, "procedure.json")

        for step in procedure['steps'] :
           self.step_walker(step)
           self.computationClass.computeStep(step)

        self.computationClass.finalize()
    def walk(self):
        procedure = open_json(self.procedurePath, "procedure.json")

        for step in procedure['steps']:
            self.step_walker(step)
            self.computationClass.computeStep(step)

        self.computationClass.finalize()
    def step_walker(self,step):

        #Amendement treatment    
        if "amendement_directory" in step:
            amdtDir = os.path.join(self.procedurePath, 
                step["amendement_directory"])
            if not os.path.exists(amdtDir):
                print "ERROR > No Amendements Directory "
                return;

            amendements = open_json(amdtDir, "amendements.json")
            for amendement in amendements["amendements"]:
                self.computationClass.computeAmendements(amendement)
               # print ">>%s"% amendement["amendement"]["id"]

        #Intervention treatment
        if "intervention_directory" in step:
            intervDir = os.path.join(self.procedurePath, 
                step["intervention_directory"])
            if not os.path.exists(intervDir):
                print ">No Intervention Directory "
                return;

            #interventions = open_json(amdtDir, "amendements.json")
            seance_files = step["intervention_files"]
            for seance_file in seance_files:
                seance = open_json(intervDir, "%s.json"%seance_file)

                for interv in seance["seance"]:
                    self.computationClass.computeInterventions(interv)
        
        #Text Treatment
        if "working_text_directory" in step:
            textDir = os.path.join(self.procedurePath,
                    step["working_text_directory"])
            if not os.path.exists(textDir):
                print "ERROR > no Text directory"
                return;

            text = open_json(textDir, "texte.json")
            
            self.computationClass.computeText(text)

        #Article Etape 
        articleEtape = open_json(self.vizPath, "articles_etapes.json")
        self.computationClass.computeArticleEtapes(articleEtape)
def process(output_directory):
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    yesterday = time.time() - 86400
    dfile = 'lois_dites.json'
    destfile = os.path.join(output_directory, dfile)
    if not os.path.exists(destfile) or os.path.getmtime(destfile) < yesterday:
        common_laws = {
            l.id_legi: l.common_name
            for l in LawService().common_laws()
        }
        print_json(common_laws, destfile)
    else:
        common_laws = open_json(destfile)
    return common_laws
예제 #7
0
import fresh_tomatoes
import common

# get data
datas = common.open_json('data.json')

# transform data to movies
movies = common.dict_to_movies(datas)

# get html
fresh_tomatoes.open_movies_page(movies)
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re, csv, os, sys
from difflib import ndiff, SequenceMatcher
from common import json, open_json, print_json

sourcedir = sys.argv[1]
if not sourcedir:
    sys.stderr.write('Error, no input directory given')
    exit(1)
procedure = open_json(sourcedir, 'procedure.json')

def getParentFolder(root, f):
    abs = os.path.abspath(os.path.join(root, f))
    return os.path.basename(os.path.abspath(os.path.join(abs, os.pardir)))

def unifyStatus(status):
    status = status.encode('utf-8')
    status = status.lstrip().rstrip('s. ')
    if status.endswith('constitution') or status.startswith('sup'):
        return "sup"
    if status.startswith("nouveau"):
        return "new"
    return "none"

def create_step(step_id, directory, article=None, echec_type=None):
    s = {}
    s['id_step'] = step_id
    s['directory'] = directory
    s['text'] = []
예제 #9
0
    def computeStatOverFile(self, file):
        dossiers = open_json("data", file)
        for dossier in dossiers["dossiers"]:
            self.countDossiers += 1

            self.totalDays += dossier["total_days"]

            self.totalAmendement += dossier["total_amendements"]
            self.totalAmendementParl += dossier[
                "total_amendements_parlementaire"]
            self.totalAmendementAdoptes += dossier["total_amendements_adoptes"]
            self.totalAmendementParlAdoptes += dossier[
                "total_amendements_parlementaire_adoptes"]

            self.totalIntervenant += dossier["total_intervenant"]

            self.totalArticles += dossier["total_articles"]
            self.totalArticlesModified += dossier["total_articles_modified"]

            self.totalAccidentProcedure += dossier["total_accident_procedure"]
            if dossier["total_accident_procedure"] > 0:
                self.nbDossiersAccidentProcedure += 1

            ##
            self.textSizeOrig += dossier["input_text_length2"]
            self.textSizeFinal += dossier["output_text_length2"]
            #if (float(dossier["output_text_length"])/dossier["input_text_length"]) > 2.0:
            #    self.countTextWithDoubledVolume +=1
            if dossier["output_text_length"] < dossier["input_text_length"]:
                self.countTextReduced += 1

            if (float(dossier["output_text_length2"]) /
                    dossier["input_text_length2"]) > 2.0:
                self.countTextWithDoubledVolume2 += 1
            if dossier["output_text_length2"] < dossier["input_text_length2"]:
                self.countTextReduced2 += 1

            if dossier["total_amendements"] > 0:
                self.countDossiersAmende += 1

                if dossier["ratio_texte_modif"] >= 0.5:
                    self.countModifSup50 += 1
                if (dossier["output_text_length2"] -
                        dossier["input_text_length2"]) / float(
                            dossier["input_text_length2"]) > 0.5:
                    self.countInflaSup50 += 1
                if (dossier["output_text_length2"] -
                        dossier["input_text_length2"]) / float(
                            dossier["input_text_length2"]) > 1:
                    self.countInflaSup100 += 1

            ##############################################
            self.textValues[dossier["id"]] = {}
            self.textValues[
                dossier["id"]]["short_title"] = dossier["short_title"]
            self.textValues[dossier["id"]]["input_text_length"] = dossier[
                "input_text_length2"]
            self.textValues[dossier["id"]]["output_text_length"] = dossier[
                "output_text_length2"]
            self.textValues[dossier["id"]]["inflation"] = (
                dossier["output_text_length2"] - dossier["input_text_length2"]
            ) / float(dossier["input_text_length2"])
            self.textValues[
                dossier["id"]]["modification"] = dossier["ratio_texte_modif"]

            self.textValues[
                dossier["id"]]["amendement"] = dossier["total_amendements"]
            self.textValues[dossier["id"]]["amendementAdoptes"] = dossier[
                "total_amendements_adoptes"]
            self.textValues[dossier["id"]][
                "tauxAdoption"] = dossier["total_amendements_adoptes"] / (
                    float(dossier["total_amendements"]
                          if dossier["total_amendements"] != 0 else 1))

            self.textValues[
                dossier["id"]]["daysBeforeAdoption"] = dossier["total_days"]
            self.textValues[dossier["id"]]["ProcedureAccident"] = dossier[
                "total_accident_procedure"]
            self.textValues[
                dossier["id"]]["NbIntervenants"] = dossier["total_intervenant"]
예제 #10
0
        mkdirs(step_dir)
        for data in articles:
            if not data or not "type" in data:
                log_err("JSON badly formatted, missing field type: %s" % data)
                sys.exit(1)
            if data["type"] == "texte":
                textid = data["id"]
                alldata = dict(data)
                alldata['sections'] = []
                alldata['articles'] = []
            elif textid == "":
                log_err("JSON missing first line with text infos")
                sys.exit(1)
            elif data["type"] == "section":
                alldata['sections'].append(data)
            elif data["type"] == "article":
                alldata['articles'].append(data)
            elif data["type"] == "echec":
                alldata['expose'] = data['texte']

        print_json(alldata, os.path.join(step_dir, 'texte.json'))

        step['texte.json'] = alldata

    return dos


if __name__ == '__main__':
    print_json(process(open_json(sys.argv[1]), 'test_out'))
예제 #11
0
            texts = (get_text_id(texte_url), last_text_id)

        for loiid in texts:
            url_seances = 'https://{}.fr/seances/{}/json{}'.format(urlapi, loiid, commission_or_hemicycle)
            print('        * downloading seances - ', url_seances)
            for id_seance_obj in sorted(download(url_seances).json().get('seances', []), key=lambda x: x["seance"]):
                url_seance = 'https://{}.fr/seance/{}/{}/json'.format(urlapi, id_seance_obj['seance'], loiid)
                print('           downloading seance - ', url_seance)
                resp = download(url_seance).json()
                if resp.get('seance'):
                    inter = resp.get('seance')[0]['intervention']
                    seance_name = inter['date'] + 'T' + inter['heure'] + '_' + inter['seance_id']
                    print('            dumping seance -', seance_name)
                    intervention_files.append(seance_name)
                    if not os.path.exists(inter_dir):
                        os.makedirs(inter_dir)
                    print_json(resp, os.path.join(inter_dir, seance_name + '.json'))
            if seance_name:
                step['has_interventions'] = True
                step['intervention_files'] = intervention_files
                break

        last_text_id = get_text_id(texte_url)
        last_text_typeparl = typeparl

    return procedure


if __name__ == '__main__':
    process(sys.argv[1], open_json(os.path.join(sys.argv[1], 'viz/procedure.json')))
import sys, os

try:
    from .common import open_json
except:
    from common import open_json

procedure_file = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'doc', 'valid_procedure.json')
procedure = open_json(procedure_file)

def find_anomalies(dossiers, verbose=True):
    anomalies = 0
    for dos in dossiers:
        prev_step = ''
        for step in dos['steps']:
            step_name = ' • '.join((x for x in (step.get('stage'), step.get('institution'), step.get('step','')) if x))
            if procedure.get(prev_step, {}).get(step_name, False) is False:
                if verbose:
                    print('INCORRECT', prev_step, '->', step_name)
                    print(dos.get('url_dossier_senat'), '|',dos.get('url_dossier_assemblee'))
                    print()
                anomalies += 1

            #print(step_name, '      \t\t\t\t===>>', procedure.get(prev_step, {}).get(step_name))

            prev_step = step_name

    if verbose and anomalies:
        print(anomalies, 'anomalies (', len(dossiers), 'doslegs)')
    return anomalies
예제 #13
0
def process(output_dir, dos):
    stats = {}

    intervs = open_json(os.path.join(output_dir, 'viz/interventions.json'))
    stats['total_mots'] = sum([
        sum(i['total_mots'] for i in step['divisions'].values())
        for step in intervs.values()
    ])

    stats["total_intervenants"] = len({
        orat
        for step in intervs.values() for orat in step['orateurs'].keys()
    })
    stats["total_interventions"] = sum({
        division['total_intervs']
        for step in intervs.values()
        for division in step['divisions'].values()
    })

    stats['total_amendements'] \
        = stats['total_amendements'] \
        = stats["total_amendements_adoptes"] \
        = stats["total_amendements_parlementaire"] \
        = stats["total_amendements_parlementaire_adoptes"] \
        = stats["total_amendements_gouvernement"] \
        = stats["total_amendements_gouvernement_adoptes"] \
        = 0

    for amdt in find_amendements(output_dir):
        stats['total_amendements'] += 1
        if amdt["sort"] == "adopté":
            stats["total_amendements_adoptes"] += 1
            if amdt["groupe"] == "Gouvernement":
                stats["total_amendements_gouvernement_adoptes"] += 1
            else:
                stats["total_amendements_parlementaire_adoptes"] += 1

        if amdt["groupe"] == "Gouvernement":
            stats["total_amendements_gouvernement"] += 1
        else:
            stats["total_amendements_parlementaire"] += 1

    stats["echecs_procedure"] = len(
        [step for step in dos['steps'] if step.get("echec")])

    if 'end' in dos:
        stats["total_days"] = (datize(dos["end"]) -
                               datize(dos["beginning"])).days + 1

        first_text, first_arts, last_text, last_arts = find_first_and_last_texts(
            dos)

        stats["total_input_articles"] = len(first_arts)
        stats["total_output_articles"] = len(last_arts)
        stats["ratio_articles_growth"] = len(last_arts) / len(first_arts)

        stats["ratio_texte_modif"] = 1 - compute_similarity_by_articles(
            first_arts, last_arts)
        stats["input_text_length"] = len("\n".join(first_text))
        stats["output_text_length"] = len("\n".join(last_text))

    return stats
예제 #14
0
        if amdt["groupe"] == "Gouvernement":
            stats["total_amendements_gouvernement"] += 1
        else:
            stats["total_amendements_parlementaire"] += 1

    stats["echecs_procedure"] = len(
        [step for step in dos['steps'] if step.get("echec")])

    if 'end' in dos:
        stats["total_days"] = (datize(dos["end"]) -
                               datize(dos["beginning"])).days + 1

        first_text, first_arts, last_text, last_arts = find_first_and_last_texts(
            dos)

        stats["total_input_articles"] = len(first_arts)
        stats["total_output_articles"] = len(last_arts)
        stats["ratio_articles_growth"] = len(last_arts) / len(first_arts)

        stats["ratio_texte_modif"] = 1 - compute_similarity_by_articles(
            first_arts, last_arts)
        stats["input_text_length"] = len("\n".join(first_text))
        stats["output_text_length"] = len("\n".join(last_text))

    return stats


if __name__ == '__main__':
    print_json(process(sys.argv[1], open_json(sys.argv[2])))
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re, csv, os, sys
from difflib import ndiff, SequenceMatcher
from common import json, open_json, print_json

sourcedir = sys.argv[1]
if not sourcedir:
    sys.stderr.write('Error, no input directory given')
    exit(1)
procedure = open_json(sourcedir, 'procedure.json')


def getParentFolder(root, f):
    abs = os.path.abspath(os.path.join(root, f))
    return os.path.basename(os.path.abspath(os.path.join(abs, os.pardir)))


def unifyStatus(status):
    status = status.encode('utf-8')
    status = status.lstrip().rstrip('s. ')
    if status.endswith('constitution') or status.startswith('sup'):
        return "sup"
    if status.startswith("nouveau"):
        return "new"
    return "none"


def create_step(step_id, directory, article=None, echec_type=None):
    s = {}
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re, csv, os, sys
from common import open_json, print_json

sourcedir = sys.argv[1]
if not sourcedir:
    sys.stderr.write('Error, no input directory given')
    exit(1)

procedure = open_json(os.path.join(sourcedir, 'procedure'), 'procedure.json')
articles = open_json(os.path.join(sourcedir, 'viz'), 'articles_etapes.json')['articles']
intervs = open_json(os.path.join(sourcedir, 'viz'), 'interventions.json')
good_steps = {}
for _, a in articles.iteritems():
    for s in a['steps']:
        stepid = s['directory']
        if stepid not in good_steps:
            good_steps[stepid] = int(s['id_step'][:2])

for i, s in enumerate(procedure['steps']):
    s['debats_order'] = None
    if 'has_interventions' in s and s['has_interventions'] and s['directory'] not in intervs:
        print >> sys.stderr, "WARNING: removing nearly empty interventions steps for %s" % s['directory'].encode('utf-8')
        s['has_interventions'] = False
    if 'directory' in s:
        if i == len(procedure['steps'])-1 and not s['enddate']:
            s['debats_order'] = max(good_steps.values()) + 1
        else:
            s['debats_order'] = good_steps.get(s['directory'], None)
예제 #17
0
    def computeStatOverFile(self, file):
        dossiers = open_json("data", file)
        for dossier in dossiers["dossiers"]:
            self.countDossiers += 1

            self.totalDays += dossier["total_days"]

            self.totalAmendement += dossier["total_amendements"]
            self.totalAmendementParl += dossier["total_amendements_parlementaire"]
            self.totalAmendementAdoptes += dossier["total_amendements_adoptes"]
            self.totalAmendementParlAdoptes += dossier["total_amendements_parlementaire_adoptes"]

            self.totalIntervenant += dossier["total_intervenant"]

            self.totalArticles += dossier["total_articles"]
            self.totalArticlesModified += dossier["total_articles_modified"]

            self.totalAccidentProcedure += dossier["total_accident_procedure"]
            if dossier["total_accident_procedure"] > 0:
                self.nbDossiersAccidentProcedure += 1

            ##
            self.textSizeOrig += dossier["input_text_length2"]
            self.textSizeFinal += dossier["output_text_length2"]
            # if (float(dossier["output_text_length"])/dossier["input_text_length"]) > 2.0:
            #    self.countTextWithDoubledVolume +=1
            if dossier["output_text_length"] < dossier["input_text_length"]:
                self.countTextReduced += 1

            if (float(dossier["output_text_length2"]) / dossier["input_text_length2"]) > 2.0:
                self.countTextWithDoubledVolume2 += 1
            if dossier["output_text_length2"] < dossier["input_text_length2"]:
                self.countTextReduced2 += 1

            if dossier["total_amendements"] > 0:
                self.countDossiersAmende += 1

                if dossier["ratio_texte_modif"] >= 0.5:
                    self.countModifSup50 += 1
                if (dossier["output_text_length2"] - dossier["input_text_length2"]) / float(
                    dossier["input_text_length2"]
                ) > 0.5:
                    self.countInflaSup50 += 1
                if (dossier["output_text_length2"] - dossier["input_text_length2"]) / float(
                    dossier["input_text_length2"]
                ) > 1:
                    self.countInflaSup100 += 1

            ##############################################
            self.textValues[dossier["id"]] = {}
            self.textValues[dossier["id"]]["short_title"] = dossier["short_title"]
            self.textValues[dossier["id"]]["input_text_length"] = dossier["input_text_length2"]
            self.textValues[dossier["id"]]["output_text_length"] = dossier["output_text_length2"]
            self.textValues[dossier["id"]]["inflation"] = (
                dossier["output_text_length2"] - dossier["input_text_length2"]
            ) / float(dossier["input_text_length2"])
            self.textValues[dossier["id"]]["modification"] = dossier["ratio_texte_modif"]

            self.textValues[dossier["id"]]["amendement"] = dossier["total_amendements"]
            self.textValues[dossier["id"]]["amendementAdoptes"] = dossier["total_amendements_adoptes"]
            self.textValues[dossier["id"]]["tauxAdoption"] = dossier["total_amendements_adoptes"] / (
                float(dossier["total_amendements"] if dossier["total_amendements"] != 0 else 1)
            )

            self.textValues[dossier["id"]]["daysBeforeAdoption"] = dossier["total_days"]
            self.textValues[dossier["id"]]["ProcedureAccident"] = dossier["total_accident_procedure"]
            self.textValues[dossier["id"]]["NbIntervenants"] = dossier["total_intervenant"]
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re, csv, os, sys
from common import open_json, print_json

sourcedir = sys.argv[1]
if not sourcedir:
    sys.stderr.write('Error, no input directory given')
    exit(1)

procedure = open_json(os.path.join(sourcedir, 'procedure'), 'procedure.json')
articles = open_json(os.path.join(sourcedir, 'viz'),
                     'articles_etapes.json')['articles']
intervs = open_json(os.path.join(sourcedir, 'viz'), 'interventions.json')
good_steps = {}
for _, a in articles.iteritems():
    for s in a['steps']:
        stepid = s['directory']
        if stepid not in good_steps:
            good_steps[stepid] = int(s['id_step'][:2])

for i, s in enumerate(procedure['steps']):
    s['debats_order'] = None
    if 'has_interventions' in s and s['has_interventions'] and s[
            'directory'] not in intervs:
        print >> sys.stderr, "WARNING: removing nearly empty interventions steps for %s" % s[
            'directory'].encode('utf-8')
        s['has_interventions'] = False
    if 'directory' in s:
        if i == len(procedure['steps']) - 1 and not s['enddate']:
예제 #19
0
def process(OUTPUT_DIR, procedure):
    context = Context([0, OUTPUT_DIR], load_parls=True)

    #['Indéfini', 'Adopté', 'Irrecevable', 'Rejeté', 'Retiré', 'Tombe', 'Non soutenu', 'Retiré avant séance', 'Rectifié', 'Favorable' ,'Satisfait']
    def simplify_sort(sort):
        sort = sort.lower()
        if sort in "adopté favorable":
            return "adopté"
        if sort in "rejeté ":
            return "rejeté"
        if sort in "indéfini":
            return "en attente"
        return "non-voté"

    re_clean_first = re.compile(r'^(.*?)(,| et) .*$')
    def first_author(signataires):
        if signataires is None or "gouvernement" in signataires.lower():
            return ""
        return re_clean_first.sub(r'\1, …', signataires)

    def find_groupe(amd):
        if amd['signataires'] and "gouvernement" in amd['signataires'].lower():
            return "Gouvernement"
        ct = {}
        maxc = 0
        result = ""
        for gpe in amd['groupes_parlementaires']:
            g = gpe['groupe']
            count = 1

            # the new api compact the groups
            if ':' in g:
                g, count = gpe['groupe'].split(':')
                count = int(count)

            g = slug_groupe(g)
            if g not in ct:
                ct[g] = 0
            ct[g] += count
            if ct[g] > maxc:
                maxc = ct[g]
                result = g
        return result

    def add_link(links, pA, pB, weight=1):
        p1 = min(pA, pB)
        p2 = max(pA, pB)
        linkid = "%s-%s" % (p1, p2)
        if linkid not in links:
            links[linkid] = {
              "1": p1,
              "2": p2,
              "w": 0
            }
        links[linkid]["w"] += weight

    article_number_regexp = re.compile(r'article (1er.*|(\d+).*)$', re.I)
    def sort_amendements(texte, amendements):
        articles = {}
        for article in texte:
            if article['type'] == 'article':
                titre = article.get('titre')
                if titre:
                    articles[titre.lower()] = article.get('order') * 10

        def solveorder(art):
            nonlocal articles
            art = art.lower()
            order = 10000;
            if art == 'titre' or art.startswith('intitul'):
                return 0
            elif art.startswith('motion'):
                return 1
            elif art.startswith('projet') \
                or art.startswith('proposition') \
                or art.startswith('texte'):
                return 5
            else:
                m = article_number_regexp.search(art)
                if m:
                    if articles.get(m.group(1)):
                        order = articles.get(m.group(1))
                    elif articles.get(m.group(2)):
                        order = articles.get(m.group(2))
                    if 'avant' in art:
                        order -= 1
                    elif 'après' in art or 'apres' in art:
                        order += 1
            return order


        for amendement in amendements:
            amdt = amendement['amendement']
            amdt['ordre_article'] = solveorder(amdt['sujet'])

        return amendements


    CACHE_BUSTING = 'cache=%d' % time()
    if 'url_jo' in procedure:
        CACHE_BUSTING = 'cache=5feb2018' # fixed cache busting for promulgated laws
    steps = {}
    last_text_id, last_text_typeparl = None, None
    steps = procedure['steps']
    for i, step in enumerate(steps):
        print('    * step -', step.get('stage'), step.get('step'), step.get('source_url'))
        if step.get('step') not in ('commission', 'hemicycle'):
            continue
        if step.get('step') == 'commission' and step.get('stage') == 'CMP':
            continue

        if i == 0:
            continue

        last_step_index = get_previous_step(steps, i, is_old_procedure=procedure.get('use_old_procedure'))
        last_step = steps[last_step_index]
        last_step_with_good_text_number = steps[get_previous_step(steps, i,
            is_old_procedure=procedure.get('use_old_procedure'), get_depot_step=True)
        ]
        texte_url = last_step_with_good_text_number.get('source_url')

        if step.get('stage') != 'CMP' and last_step_with_good_text_number.get('institution') != step.get('institution'):
            print('ERROR - last step is from another institution', file=sys.stderr)
            continue

        # for a CMP hemicycle we have to get the right text inside the CMP commission
        if step.get('stage') == 'CMP' and step.get('step') == 'hemicycle':
            urls = [last_step.get('source_url')]
            if 'cmp_commission_other_url' in last_step:
                urls.append(last_step.get('cmp_commission_other_url'))
            an_url = [url for url in urls if 'nationale.fr' in url]
            senat_url = [url for url in urls if 'senat.fr' in url]
            if step.get('institution') == 'assemblee' and an_url:
                texte_url = an_url[0]
            elif step.get('institution') == 'senat' and senat_url:
                texte_url = senat_url[0]
            else:
                print('WARNING - missing the CMP commission text for', step.get('source_url'), file=sys.stderr)
                continue

        if texte_url is None:
            print('ERROR - no texte url', step.get('source_url'), file=sys.stderr)
            continue

        texte = open_json(os.path.join(context.sourcedir, 'procedure', last_step['directory']), 'texte/texte.json')

        amdt_url = None
        if "nationale.fr" in texte_url:
            if 'assemblee_legislature' not in procedure:
                print('         + no AN legislature - pass text')
                continue
            amdt_url = 'https://nosdeputes.fr/%s/amendements/%s/json?%s' % (procedure.get('assemblee_legislature'), get_text_id(texte_url), CACHE_BUSTING)
        elif "senat.fr" in texte_url:
            amdt_url = 'https://nossenateurs.fr/amendements/%s/json?%s' % (get_text_id(texte_url), CACHE_BUSTING)

        if amdt_url is None:
            continue

        print('      * downloading amendments:', amdt_url, 'for', texte_url)

        amendements_src = download(amdt_url).json().get('amendements', [])

        # TA texts can be zero-paded or not (TA0XXX or TAXXX), we try both
        if 'amendements/TA' in amdt_url:
            textid = get_text_id(texte_url)
            if 'TA0' in textid:
                alternative_url = amdt_url.replace(textid, 'TA' + textid.replace('TA', '').lstrip('0'))
            else:
                alternative_url = amdt_url.replace(textid, 'TA' + textid.replace('TA', '').zfill(4))
            print(' WARNING: TA - trying alternative url too', alternative_url)
            amendements_src += download(alternative_url).json().get('amendements', [])

        print('        parsing amendments:', len(amendements_src))

        # ignore amendments if they are not for the correct step
        amendements_src_filtered = []
        for amd in amendements_src:
            a = amd['amendement']
            if step.get('institution') == 'assemblee':
                # commission amendments can have two forms
                #    - /amendements/LOI/NUM.asp (13th legislature)
                #    - /amendements/LOI/COMMISSION_NAME/NUM.asp (14+ legislature)
                # hemicycle amendments are:
                #    - /amendements/LOI/NUM.asp (13th legislature)
                #    - /amendements/LOI/AN/NUM.asp (14+ legislature)
                amdt_step = 'hemicycle'
                if '/cr-' in a['source']:
                    amdt_step = 'commission'
                else:
                    url_parts = a['source'].split('amendements/')[1].split('/')
                    if len(url_parts) == 3 and url_parts[1] != 'AN':
                        amdt_step = 'commission'
            elif step.get('institution') == 'senat':
                amdt_step = 'commission' if '/commissions/' in a['source'] else 'hemicycle'
            else:
                # CMP - there's not way for now to distinguish the step
                amdt_step = step['step']
            if step['step'] != amdt_step:
                continue
            amendements_src_filtered.append(amd)

        if len(amendements_src_filtered) != len(amendements_src):
            print('WARNING: amendments ignored (not the right step) %s' %
                    (len(amendements_src) - len(amendements_src_filtered)), file=sys.stderr)
        amendements_src = amendements_src_filtered

        step['nb_amendements'] = len(amendements_src)

        if len(amendements_src) > 0:
            amendements_src = sort_amendements(texte['articles'], amendements_src)

            typeparl, urlapi = identify_room(texte_url,
                legislature=step.get('assemblee_legislature', procedure.get('assemblee_legislature')))

            sujets = {}
            groupes = {}

            fix_order = False
            orders = []
            parls = {}
            links = {}
            idents = {}
            for amd in amendements_src:
                a = amd['amendement']
                if "sort" not in a:
                    print('WARNING: amendment has no sort %s\n' % a['url_nos%ss' % typeparl], file=sys.stderr)
                    continue
                if a["sort"] == "Rectifié":
                    continue
                if "sujet" not in a or not a["sujet"]:
                    if a["sort"] not in ["Irrecevable", "Retiré avant séance"]:
                        print('WARNING: amendment has no subject %s\n' % a['url_nos%ss' % typeparl], file=sys.stderr)
                    continue
                key = a['sujet']
                if not key:
                    print('WARNING: amendment has no subject %s\n' % a['url_nos%ss' % typeparl], file=sys.stderr)
                    continue
                if key not in sujets:
                    orders.append(key)
                    sujets[key] = {
                      'titre': key,
                      'order': a['ordre_article'],
                      'amendements': []
                    }
                if a['ordre_article'] > 9000:
                    fix_order = True

                gpe = find_groupe(a)
                if not gpe:
                    if a["sort"] != "Irrecevable":
                        sys.stderr.write('WARNING: no groupe found for %s\n' % a['url_nos%ss' % typeparl])
                    gpe = "Inconnu"
                context.add_groupe(groupes, gpe, urlapi)

                sujets[key]['amendements'].append({
                  'numero': a['numero'],
                  'date': a['date'],
                  'sort': simplify_sort(a['sort']),
                  'groupe': gpe,
                  'id_api': a['id'],
                  'aut': first_author(a['signataires'])
                })

                cosign = []
                hmd5 = a["cle_unicite"]
                if hmd5 not in idents:
                    idents[hmd5] = []
                for parll in a["parlementaires"]:
                    parl = parll["parlementaire"]
                    if parl not in parls:
                        p = context.get_parlementaire(urlapi, parl)
                        parls[parl] = {
                          "i": p["id"],
                          "s": parl,
                          "a": 0,
                          "n": p["nom"],
                          "g": p["groupe_sigle"],
                          "p": p["place_en_hemicycle"]
                        }
                    pid = parls[parl]["i"]
                    parls[parl]["a"] += 1
                    for cid in cosign:
                        add_link(links, pid, cid)
                        #add_link(links, pid, cid, 2)
                    cosign.append(pid)
                    for cid in idents[hmd5]:
                        add_link(links, pid, cid)
                    idents[hmd5].append(pid)

            if fix_order:
                orders.sort(key=cmp_to_key(compare_articles))
                for i, k in enumerate(orders):
                    sujets[k]["order"] = i

            amdtsfile = os.path.join(context.sourcedir, 'viz', 'amendements_%s.json' % step['directory'])
            data = {'id_step': step['directory'],
                    'api_root_url': amdapi_link(urlapi),
                    'groupes': groupes,
                    'sujets': sujets}
            print_json(data, amdtsfile)

            linksfile = os.path.join(context.sourcedir, 'viz', 'amendements_links_%s.json' % step['directory'])
            data = {'id_step': step['directory'],
                    'links': list(links.values()),
                    'parlementaires': dict((p["i"], dict((k, p[k]) for k in "psang")) for p in list(parls.values()))}
            # print_json(data, linksfile)


        ###########  INTERVENTIONS #############
        # TODO: move this to a dedicated file

        print('      * downloading interventions')
        typeparl, urlapi = identify_room(texte_url,
            legislature=step.get('assemblee_legislature', procedure.get('assemblee_legislature')))
        inter_dir = os.path.join(context.sourcedir, 'procedure', step['directory'], 'interventions')
        commission_or_hemicycle = '?commission=1' if step.get('step') == 'commission' else '?hemicycle=1'
        # TODO: TA texts can be zero-paded or not (TA0XXX or TAXXX), we should try both
        seance_name = None
        intervention_files = []

        texts = (get_text_id(texte_url),)
        if last_text_typeparl == typeparl:
            texts = (get_text_id(texte_url), last_text_id)

        for loiid in texts:
            url_seances = 'https://{}.fr/seances/{}/json{}'.format(urlapi, loiid, commission_or_hemicycle)
            print('        * downloading seances - ', url_seances)
            for id_seance_obj in sorted(download(url_seances).json().get('seances', []), key=lambda x: x["seance"]):
                url_seance = 'https://{}.fr/seance/{}/{}/json'.format(urlapi, id_seance_obj['seance'], loiid)
                print('           downloading seance - ', url_seance)
                resp = download(url_seance).json()
                if resp.get('seance'):
                    inter = resp.get('seance')[0]['intervention']
                    seance_name = inter['date'] + 'T' + inter['heure'] + '_' + inter['seance_id']
                    print('            dumping seance -', seance_name)
                    intervention_files.append(seance_name)
                    if not os.path.exists(inter_dir):
                        os.makedirs(inter_dir)
                    print_json(resp, os.path.join(inter_dir, seance_name + '.json'))
            if seance_name:
                step['has_interventions'] = True
                step['intervention_files'] = intervention_files
                break

        last_text_id = get_text_id(texte_url)
        last_text_typeparl = typeparl

    return procedure
예제 #20
0
def process(dos):
    for step_i, step in enumerate(dos['steps']):
        articles = step.get('articles_completed', step.get('articles'))
        if not articles:
            continue

        for data in articles:
            if data["type"] == "article":
                data['liens'] = []
                for i in range(len(data["alineas"])):
                    text = data["alineas"]["%03d" % (i + 1)]
                    for candidat in metslesliens.donnelescandidats(
                            text, 'structuré'):
                        if 'texte' in candidat:
                            link = text[
                                candidat['index'][0]:candidat['index'][1]]
                            data['liens'].append(link)
                            """
                            data['liens'].append({
                                'url': 'https://duckduckgo.com/?q=!ducky+' + urllib.parse.quote_plus(link),
                                'texte': link,
                                'alinea': i,
                                # 'index': candidat['index'],
                            })
                            """
    return dos


if __name__ == '__main__':
    print_json(process(open_json(sys.argv[1])))
예제 #21
0
    myprint('NOK:', score_nok)
    myprint('OK:', score_ok)
    return score_nok, score_ok


if __name__ == '__main__':
    import glob

    sum_ok = 0
    sum_nok = 0
    missing = 0
    perfect = 0
    less_than_1 = 0

    all_doslegs = open_json(sys.argv[2])
    lafabrique_doslegs = list(sorted(glob.glob(sys.argv[1])))
    scored = []
    for file in lafabrique_doslegs:
        print('======')
        print('======')
        print(file)
        me = None
        proc = open_json(file)
        proc_url_senat = proc.get('url_dossier_senat', '').replace(
            'http://', 'https://').replace('/dossierleg/',
                                           '/dossier-legislatif/')
        for dos in all_doslegs:
            dos_url_senat = dos.get('url_dossier_senat', '').replace(
                'http://', 'https://').replace('/dossierleg/',
                                               '/dossier-legislatif/')
try:
    from .common import open_json, print_json
except:
    from common import open_json, print_json

if len(sys.argv) < 2:
    print('USAGE: `steps_as_dot.py <path_to_json>`')
    sys.exit()

mode = "detailed" if len(sys.argv) == 3 else "simple"

procedure_file = os.path.join(
    os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'doc',
    'valid_procedure.json')
procedure = open_json(procedure_file)

API_DIRECTORY = sys.argv[1]
all_senat_jo = [open_json(path) for path \
                in glob.glob(os.path.join(API_DIRECTORY, '*/viz/procedure.json'))]
all_senat_jo = [dos for dos in all_senat_jo if dos.get('end')]
# all_senat_jo = [x for x in open_json(sys.argv[1]) if len(x['steps']) > 2]
# all_senat_jo = random.sample(all_senat_jo, 5)

nodes_names_size = {}
step_trans = {}
steps_logs = ""
for dos in all_senat_jo:
    prev_step = None
    last_step = ''
    for step_i, step in enumerate(dos.get('steps', [])):
예제 #23
0
            a["order"] = order
            order += 1
            write_json(a)
        # do not keep already deleted articles but mark as deleted missing ones
        elif not re_suppr.match(a["statut"]) or texte.get('echec', ''):
            # if the last line of text was some dots, it means that we should keep
            # the articles as-is if they are not deleted
            if line['type'] == 'dots':
                # ex: https://www.senat.fr/leg/ppl09-304.html
                log("DEBUG: Recovering art as non-modifié via dots %s (leftovers)"
                    % cur)
                a["statut"] = "non modifié"
                a["order"] = order
                order += 1
                write_json(a)
            else:
                log("DEBUG: Marking art %s as supprimé (leftovers)" % cur)
                a["statut"] = "supprimé"
                a["alineas"] = dict()
                a["order"] = order
                order += 1
                write_json(a)

    return ALL_ARTICLES


if __name__ == '__main__':
    serialized = open_json(sys.argv[1])
    result = complete(**serialized)
    # print_json(result)