def find_amendements(path): for amdts_file in glob.glob(os.path.join(path, '**/amendements_*'), recursive=True): amendements = open_json(amdts_file) for subject in amendements.get('sujets', {}).values(): for amdt in subject.get('amendements', []): yield amdt
def step_walker(self, step): #Amendement treatment if "amendement_directory" in step: amdtDir = os.path.join(self.procedurePath, step["amendement_directory"]) if not os.path.exists(amdtDir): print "ERROR > No Amendements Directory " return amendements = open_json(amdtDir, "amendements.json") for amendement in amendements["amendements"]: self.computationClass.computeAmendements(amendement) # print ">>%s"% amendement["amendement"]["id"] #Intervention treatment if "intervention_directory" in step: intervDir = os.path.join(self.procedurePath, step["intervention_directory"]) if not os.path.exists(intervDir): print ">No Intervention Directory " return #interventions = open_json(amdtDir, "amendements.json") seance_files = step["intervention_files"] for seance_file in seance_files: seance = open_json(intervDir, "%s.json" % seance_file) for interv in seance["seance"]: self.computationClass.computeInterventions(interv) #Text Treatment if "working_text_directory" in step: textDir = os.path.join(self.procedurePath, step["working_text_directory"]) if not os.path.exists(textDir): print "ERROR > no Text directory" return text = open_json(textDir, "texte.json") self.computationClass.computeText(text) #Article Etape articleEtape = open_json(self.vizPath, "articles_etapes.json") self.computationClass.computeArticleEtapes(articleEtape)
def walk(self): procedure = open_json(self.procedurePath, "procedure.json") for step in procedure['steps'] : self.step_walker(step) self.computationClass.computeStep(step) self.computationClass.finalize()
def walk(self): procedure = open_json(self.procedurePath, "procedure.json") for step in procedure['steps']: self.step_walker(step) self.computationClass.computeStep(step) self.computationClass.finalize()
def step_walker(self,step): #Amendement treatment if "amendement_directory" in step: amdtDir = os.path.join(self.procedurePath, step["amendement_directory"]) if not os.path.exists(amdtDir): print "ERROR > No Amendements Directory " return; amendements = open_json(amdtDir, "amendements.json") for amendement in amendements["amendements"]: self.computationClass.computeAmendements(amendement) # print ">>%s"% amendement["amendement"]["id"] #Intervention treatment if "intervention_directory" in step: intervDir = os.path.join(self.procedurePath, step["intervention_directory"]) if not os.path.exists(intervDir): print ">No Intervention Directory " return; #interventions = open_json(amdtDir, "amendements.json") seance_files = step["intervention_files"] for seance_file in seance_files: seance = open_json(intervDir, "%s.json"%seance_file) for interv in seance["seance"]: self.computationClass.computeInterventions(interv) #Text Treatment if "working_text_directory" in step: textDir = os.path.join(self.procedurePath, step["working_text_directory"]) if not os.path.exists(textDir): print "ERROR > no Text directory" return; text = open_json(textDir, "texte.json") self.computationClass.computeText(text) #Article Etape articleEtape = open_json(self.vizPath, "articles_etapes.json") self.computationClass.computeArticleEtapes(articleEtape)
def process(output_directory): if not os.path.exists(output_directory): os.makedirs(output_directory) yesterday = time.time() - 86400 dfile = 'lois_dites.json' destfile = os.path.join(output_directory, dfile) if not os.path.exists(destfile) or os.path.getmtime(destfile) < yesterday: common_laws = { l.id_legi: l.common_name for l in LawService().common_laws() } print_json(common_laws, destfile) else: common_laws = open_json(destfile) return common_laws
import fresh_tomatoes import common # get data datas = common.open_json('data.json') # transform data to movies movies = common.dict_to_movies(datas) # get html fresh_tomatoes.open_movies_page(movies)
#!/usr/bin/env python # -*- coding: utf-8 -*- import re, csv, os, sys from difflib import ndiff, SequenceMatcher from common import json, open_json, print_json sourcedir = sys.argv[1] if not sourcedir: sys.stderr.write('Error, no input directory given') exit(1) procedure = open_json(sourcedir, 'procedure.json') def getParentFolder(root, f): abs = os.path.abspath(os.path.join(root, f)) return os.path.basename(os.path.abspath(os.path.join(abs, os.pardir))) def unifyStatus(status): status = status.encode('utf-8') status = status.lstrip().rstrip('s. ') if status.endswith('constitution') or status.startswith('sup'): return "sup" if status.startswith("nouveau"): return "new" return "none" def create_step(step_id, directory, article=None, echec_type=None): s = {} s['id_step'] = step_id s['directory'] = directory s['text'] = []
def computeStatOverFile(self, file): dossiers = open_json("data", file) for dossier in dossiers["dossiers"]: self.countDossiers += 1 self.totalDays += dossier["total_days"] self.totalAmendement += dossier["total_amendements"] self.totalAmendementParl += dossier[ "total_amendements_parlementaire"] self.totalAmendementAdoptes += dossier["total_amendements_adoptes"] self.totalAmendementParlAdoptes += dossier[ "total_amendements_parlementaire_adoptes"] self.totalIntervenant += dossier["total_intervenant"] self.totalArticles += dossier["total_articles"] self.totalArticlesModified += dossier["total_articles_modified"] self.totalAccidentProcedure += dossier["total_accident_procedure"] if dossier["total_accident_procedure"] > 0: self.nbDossiersAccidentProcedure += 1 ## self.textSizeOrig += dossier["input_text_length2"] self.textSizeFinal += dossier["output_text_length2"] #if (float(dossier["output_text_length"])/dossier["input_text_length"]) > 2.0: # self.countTextWithDoubledVolume +=1 if dossier["output_text_length"] < dossier["input_text_length"]: self.countTextReduced += 1 if (float(dossier["output_text_length2"]) / dossier["input_text_length2"]) > 2.0: self.countTextWithDoubledVolume2 += 1 if dossier["output_text_length2"] < dossier["input_text_length2"]: self.countTextReduced2 += 1 if dossier["total_amendements"] > 0: self.countDossiersAmende += 1 if dossier["ratio_texte_modif"] >= 0.5: self.countModifSup50 += 1 if (dossier["output_text_length2"] - dossier["input_text_length2"]) / float( dossier["input_text_length2"]) > 0.5: self.countInflaSup50 += 1 if (dossier["output_text_length2"] - dossier["input_text_length2"]) / float( dossier["input_text_length2"]) > 1: self.countInflaSup100 += 1 ############################################## self.textValues[dossier["id"]] = {} self.textValues[ dossier["id"]]["short_title"] = dossier["short_title"] self.textValues[dossier["id"]]["input_text_length"] = dossier[ "input_text_length2"] self.textValues[dossier["id"]]["output_text_length"] = dossier[ "output_text_length2"] self.textValues[dossier["id"]]["inflation"] = ( dossier["output_text_length2"] - dossier["input_text_length2"] ) / float(dossier["input_text_length2"]) self.textValues[ dossier["id"]]["modification"] = dossier["ratio_texte_modif"] self.textValues[ dossier["id"]]["amendement"] = dossier["total_amendements"] self.textValues[dossier["id"]]["amendementAdoptes"] = dossier[ "total_amendements_adoptes"] self.textValues[dossier["id"]][ "tauxAdoption"] = dossier["total_amendements_adoptes"] / ( float(dossier["total_amendements"] if dossier["total_amendements"] != 0 else 1)) self.textValues[ dossier["id"]]["daysBeforeAdoption"] = dossier["total_days"] self.textValues[dossier["id"]]["ProcedureAccident"] = dossier[ "total_accident_procedure"] self.textValues[ dossier["id"]]["NbIntervenants"] = dossier["total_intervenant"]
mkdirs(step_dir) for data in articles: if not data or not "type" in data: log_err("JSON badly formatted, missing field type: %s" % data) sys.exit(1) if data["type"] == "texte": textid = data["id"] alldata = dict(data) alldata['sections'] = [] alldata['articles'] = [] elif textid == "": log_err("JSON missing first line with text infos") sys.exit(1) elif data["type"] == "section": alldata['sections'].append(data) elif data["type"] == "article": alldata['articles'].append(data) elif data["type"] == "echec": alldata['expose'] = data['texte'] print_json(alldata, os.path.join(step_dir, 'texte.json')) step['texte.json'] = alldata return dos if __name__ == '__main__': print_json(process(open_json(sys.argv[1]), 'test_out'))
texts = (get_text_id(texte_url), last_text_id) for loiid in texts: url_seances = 'https://{}.fr/seances/{}/json{}'.format(urlapi, loiid, commission_or_hemicycle) print(' * downloading seances - ', url_seances) for id_seance_obj in sorted(download(url_seances).json().get('seances', []), key=lambda x: x["seance"]): url_seance = 'https://{}.fr/seance/{}/{}/json'.format(urlapi, id_seance_obj['seance'], loiid) print(' downloading seance - ', url_seance) resp = download(url_seance).json() if resp.get('seance'): inter = resp.get('seance')[0]['intervention'] seance_name = inter['date'] + 'T' + inter['heure'] + '_' + inter['seance_id'] print(' dumping seance -', seance_name) intervention_files.append(seance_name) if not os.path.exists(inter_dir): os.makedirs(inter_dir) print_json(resp, os.path.join(inter_dir, seance_name + '.json')) if seance_name: step['has_interventions'] = True step['intervention_files'] = intervention_files break last_text_id = get_text_id(texte_url) last_text_typeparl = typeparl return procedure if __name__ == '__main__': process(sys.argv[1], open_json(os.path.join(sys.argv[1], 'viz/procedure.json')))
import sys, os try: from .common import open_json except: from common import open_json procedure_file = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'doc', 'valid_procedure.json') procedure = open_json(procedure_file) def find_anomalies(dossiers, verbose=True): anomalies = 0 for dos in dossiers: prev_step = '' for step in dos['steps']: step_name = ' • '.join((x for x in (step.get('stage'), step.get('institution'), step.get('step','')) if x)) if procedure.get(prev_step, {}).get(step_name, False) is False: if verbose: print('INCORRECT', prev_step, '->', step_name) print(dos.get('url_dossier_senat'), '|',dos.get('url_dossier_assemblee')) print() anomalies += 1 #print(step_name, ' \t\t\t\t===>>', procedure.get(prev_step, {}).get(step_name)) prev_step = step_name if verbose and anomalies: print(anomalies, 'anomalies (', len(dossiers), 'doslegs)') return anomalies
def process(output_dir, dos): stats = {} intervs = open_json(os.path.join(output_dir, 'viz/interventions.json')) stats['total_mots'] = sum([ sum(i['total_mots'] for i in step['divisions'].values()) for step in intervs.values() ]) stats["total_intervenants"] = len({ orat for step in intervs.values() for orat in step['orateurs'].keys() }) stats["total_interventions"] = sum({ division['total_intervs'] for step in intervs.values() for division in step['divisions'].values() }) stats['total_amendements'] \ = stats['total_amendements'] \ = stats["total_amendements_adoptes"] \ = stats["total_amendements_parlementaire"] \ = stats["total_amendements_parlementaire_adoptes"] \ = stats["total_amendements_gouvernement"] \ = stats["total_amendements_gouvernement_adoptes"] \ = 0 for amdt in find_amendements(output_dir): stats['total_amendements'] += 1 if amdt["sort"] == "adopté": stats["total_amendements_adoptes"] += 1 if amdt["groupe"] == "Gouvernement": stats["total_amendements_gouvernement_adoptes"] += 1 else: stats["total_amendements_parlementaire_adoptes"] += 1 if amdt["groupe"] == "Gouvernement": stats["total_amendements_gouvernement"] += 1 else: stats["total_amendements_parlementaire"] += 1 stats["echecs_procedure"] = len( [step for step in dos['steps'] if step.get("echec")]) if 'end' in dos: stats["total_days"] = (datize(dos["end"]) - datize(dos["beginning"])).days + 1 first_text, first_arts, last_text, last_arts = find_first_and_last_texts( dos) stats["total_input_articles"] = len(first_arts) stats["total_output_articles"] = len(last_arts) stats["ratio_articles_growth"] = len(last_arts) / len(first_arts) stats["ratio_texte_modif"] = 1 - compute_similarity_by_articles( first_arts, last_arts) stats["input_text_length"] = len("\n".join(first_text)) stats["output_text_length"] = len("\n".join(last_text)) return stats
if amdt["groupe"] == "Gouvernement": stats["total_amendements_gouvernement"] += 1 else: stats["total_amendements_parlementaire"] += 1 stats["echecs_procedure"] = len( [step for step in dos['steps'] if step.get("echec")]) if 'end' in dos: stats["total_days"] = (datize(dos["end"]) - datize(dos["beginning"])).days + 1 first_text, first_arts, last_text, last_arts = find_first_and_last_texts( dos) stats["total_input_articles"] = len(first_arts) stats["total_output_articles"] = len(last_arts) stats["ratio_articles_growth"] = len(last_arts) / len(first_arts) stats["ratio_texte_modif"] = 1 - compute_similarity_by_articles( first_arts, last_arts) stats["input_text_length"] = len("\n".join(first_text)) stats["output_text_length"] = len("\n".join(last_text)) return stats if __name__ == '__main__': print_json(process(sys.argv[1], open_json(sys.argv[2])))
#!/usr/bin/env python # -*- coding: utf-8 -*- import re, csv, os, sys from difflib import ndiff, SequenceMatcher from common import json, open_json, print_json sourcedir = sys.argv[1] if not sourcedir: sys.stderr.write('Error, no input directory given') exit(1) procedure = open_json(sourcedir, 'procedure.json') def getParentFolder(root, f): abs = os.path.abspath(os.path.join(root, f)) return os.path.basename(os.path.abspath(os.path.join(abs, os.pardir))) def unifyStatus(status): status = status.encode('utf-8') status = status.lstrip().rstrip('s. ') if status.endswith('constitution') or status.startswith('sup'): return "sup" if status.startswith("nouveau"): return "new" return "none" def create_step(step_id, directory, article=None, echec_type=None): s = {}
#!/usr/bin/env python # -*- coding: utf-8 -*- import re, csv, os, sys from common import open_json, print_json sourcedir = sys.argv[1] if not sourcedir: sys.stderr.write('Error, no input directory given') exit(1) procedure = open_json(os.path.join(sourcedir, 'procedure'), 'procedure.json') articles = open_json(os.path.join(sourcedir, 'viz'), 'articles_etapes.json')['articles'] intervs = open_json(os.path.join(sourcedir, 'viz'), 'interventions.json') good_steps = {} for _, a in articles.iteritems(): for s in a['steps']: stepid = s['directory'] if stepid not in good_steps: good_steps[stepid] = int(s['id_step'][:2]) for i, s in enumerate(procedure['steps']): s['debats_order'] = None if 'has_interventions' in s and s['has_interventions'] and s['directory'] not in intervs: print >> sys.stderr, "WARNING: removing nearly empty interventions steps for %s" % s['directory'].encode('utf-8') s['has_interventions'] = False if 'directory' in s: if i == len(procedure['steps'])-1 and not s['enddate']: s['debats_order'] = max(good_steps.values()) + 1 else: s['debats_order'] = good_steps.get(s['directory'], None)
def computeStatOverFile(self, file): dossiers = open_json("data", file) for dossier in dossiers["dossiers"]: self.countDossiers += 1 self.totalDays += dossier["total_days"] self.totalAmendement += dossier["total_amendements"] self.totalAmendementParl += dossier["total_amendements_parlementaire"] self.totalAmendementAdoptes += dossier["total_amendements_adoptes"] self.totalAmendementParlAdoptes += dossier["total_amendements_parlementaire_adoptes"] self.totalIntervenant += dossier["total_intervenant"] self.totalArticles += dossier["total_articles"] self.totalArticlesModified += dossier["total_articles_modified"] self.totalAccidentProcedure += dossier["total_accident_procedure"] if dossier["total_accident_procedure"] > 0: self.nbDossiersAccidentProcedure += 1 ## self.textSizeOrig += dossier["input_text_length2"] self.textSizeFinal += dossier["output_text_length2"] # if (float(dossier["output_text_length"])/dossier["input_text_length"]) > 2.0: # self.countTextWithDoubledVolume +=1 if dossier["output_text_length"] < dossier["input_text_length"]: self.countTextReduced += 1 if (float(dossier["output_text_length2"]) / dossier["input_text_length2"]) > 2.0: self.countTextWithDoubledVolume2 += 1 if dossier["output_text_length2"] < dossier["input_text_length2"]: self.countTextReduced2 += 1 if dossier["total_amendements"] > 0: self.countDossiersAmende += 1 if dossier["ratio_texte_modif"] >= 0.5: self.countModifSup50 += 1 if (dossier["output_text_length2"] - dossier["input_text_length2"]) / float( dossier["input_text_length2"] ) > 0.5: self.countInflaSup50 += 1 if (dossier["output_text_length2"] - dossier["input_text_length2"]) / float( dossier["input_text_length2"] ) > 1: self.countInflaSup100 += 1 ############################################## self.textValues[dossier["id"]] = {} self.textValues[dossier["id"]]["short_title"] = dossier["short_title"] self.textValues[dossier["id"]]["input_text_length"] = dossier["input_text_length2"] self.textValues[dossier["id"]]["output_text_length"] = dossier["output_text_length2"] self.textValues[dossier["id"]]["inflation"] = ( dossier["output_text_length2"] - dossier["input_text_length2"] ) / float(dossier["input_text_length2"]) self.textValues[dossier["id"]]["modification"] = dossier["ratio_texte_modif"] self.textValues[dossier["id"]]["amendement"] = dossier["total_amendements"] self.textValues[dossier["id"]]["amendementAdoptes"] = dossier["total_amendements_adoptes"] self.textValues[dossier["id"]]["tauxAdoption"] = dossier["total_amendements_adoptes"] / ( float(dossier["total_amendements"] if dossier["total_amendements"] != 0 else 1) ) self.textValues[dossier["id"]]["daysBeforeAdoption"] = dossier["total_days"] self.textValues[dossier["id"]]["ProcedureAccident"] = dossier["total_accident_procedure"] self.textValues[dossier["id"]]["NbIntervenants"] = dossier["total_intervenant"]
#!/usr/bin/env python # -*- coding: utf-8 -*- import re, csv, os, sys from common import open_json, print_json sourcedir = sys.argv[1] if not sourcedir: sys.stderr.write('Error, no input directory given') exit(1) procedure = open_json(os.path.join(sourcedir, 'procedure'), 'procedure.json') articles = open_json(os.path.join(sourcedir, 'viz'), 'articles_etapes.json')['articles'] intervs = open_json(os.path.join(sourcedir, 'viz'), 'interventions.json') good_steps = {} for _, a in articles.iteritems(): for s in a['steps']: stepid = s['directory'] if stepid not in good_steps: good_steps[stepid] = int(s['id_step'][:2]) for i, s in enumerate(procedure['steps']): s['debats_order'] = None if 'has_interventions' in s and s['has_interventions'] and s[ 'directory'] not in intervs: print >> sys.stderr, "WARNING: removing nearly empty interventions steps for %s" % s[ 'directory'].encode('utf-8') s['has_interventions'] = False if 'directory' in s: if i == len(procedure['steps']) - 1 and not s['enddate']:
def process(OUTPUT_DIR, procedure): context = Context([0, OUTPUT_DIR], load_parls=True) #['Indéfini', 'Adopté', 'Irrecevable', 'Rejeté', 'Retiré', 'Tombe', 'Non soutenu', 'Retiré avant séance', 'Rectifié', 'Favorable' ,'Satisfait'] def simplify_sort(sort): sort = sort.lower() if sort in "adopté favorable": return "adopté" if sort in "rejeté ": return "rejeté" if sort in "indéfini": return "en attente" return "non-voté" re_clean_first = re.compile(r'^(.*?)(,| et) .*$') def first_author(signataires): if signataires is None or "gouvernement" in signataires.lower(): return "" return re_clean_first.sub(r'\1, …', signataires) def find_groupe(amd): if amd['signataires'] and "gouvernement" in amd['signataires'].lower(): return "Gouvernement" ct = {} maxc = 0 result = "" for gpe in amd['groupes_parlementaires']: g = gpe['groupe'] count = 1 # the new api compact the groups if ':' in g: g, count = gpe['groupe'].split(':') count = int(count) g = slug_groupe(g) if g not in ct: ct[g] = 0 ct[g] += count if ct[g] > maxc: maxc = ct[g] result = g return result def add_link(links, pA, pB, weight=1): p1 = min(pA, pB) p2 = max(pA, pB) linkid = "%s-%s" % (p1, p2) if linkid not in links: links[linkid] = { "1": p1, "2": p2, "w": 0 } links[linkid]["w"] += weight article_number_regexp = re.compile(r'article (1er.*|(\d+).*)$', re.I) def sort_amendements(texte, amendements): articles = {} for article in texte: if article['type'] == 'article': titre = article.get('titre') if titre: articles[titre.lower()] = article.get('order') * 10 def solveorder(art): nonlocal articles art = art.lower() order = 10000; if art == 'titre' or art.startswith('intitul'): return 0 elif art.startswith('motion'): return 1 elif art.startswith('projet') \ or art.startswith('proposition') \ or art.startswith('texte'): return 5 else: m = article_number_regexp.search(art) if m: if articles.get(m.group(1)): order = articles.get(m.group(1)) elif articles.get(m.group(2)): order = articles.get(m.group(2)) if 'avant' in art: order -= 1 elif 'après' in art or 'apres' in art: order += 1 return order for amendement in amendements: amdt = amendement['amendement'] amdt['ordre_article'] = solveorder(amdt['sujet']) return amendements CACHE_BUSTING = 'cache=%d' % time() if 'url_jo' in procedure: CACHE_BUSTING = 'cache=5feb2018' # fixed cache busting for promulgated laws steps = {} last_text_id, last_text_typeparl = None, None steps = procedure['steps'] for i, step in enumerate(steps): print(' * step -', step.get('stage'), step.get('step'), step.get('source_url')) if step.get('step') not in ('commission', 'hemicycle'): continue if step.get('step') == 'commission' and step.get('stage') == 'CMP': continue if i == 0: continue last_step_index = get_previous_step(steps, i, is_old_procedure=procedure.get('use_old_procedure')) last_step = steps[last_step_index] last_step_with_good_text_number = steps[get_previous_step(steps, i, is_old_procedure=procedure.get('use_old_procedure'), get_depot_step=True) ] texte_url = last_step_with_good_text_number.get('source_url') if step.get('stage') != 'CMP' and last_step_with_good_text_number.get('institution') != step.get('institution'): print('ERROR - last step is from another institution', file=sys.stderr) continue # for a CMP hemicycle we have to get the right text inside the CMP commission if step.get('stage') == 'CMP' and step.get('step') == 'hemicycle': urls = [last_step.get('source_url')] if 'cmp_commission_other_url' in last_step: urls.append(last_step.get('cmp_commission_other_url')) an_url = [url for url in urls if 'nationale.fr' in url] senat_url = [url for url in urls if 'senat.fr' in url] if step.get('institution') == 'assemblee' and an_url: texte_url = an_url[0] elif step.get('institution') == 'senat' and senat_url: texte_url = senat_url[0] else: print('WARNING - missing the CMP commission text for', step.get('source_url'), file=sys.stderr) continue if texte_url is None: print('ERROR - no texte url', step.get('source_url'), file=sys.stderr) continue texte = open_json(os.path.join(context.sourcedir, 'procedure', last_step['directory']), 'texte/texte.json') amdt_url = None if "nationale.fr" in texte_url: if 'assemblee_legislature' not in procedure: print(' + no AN legislature - pass text') continue amdt_url = 'https://nosdeputes.fr/%s/amendements/%s/json?%s' % (procedure.get('assemblee_legislature'), get_text_id(texte_url), CACHE_BUSTING) elif "senat.fr" in texte_url: amdt_url = 'https://nossenateurs.fr/amendements/%s/json?%s' % (get_text_id(texte_url), CACHE_BUSTING) if amdt_url is None: continue print(' * downloading amendments:', amdt_url, 'for', texte_url) amendements_src = download(amdt_url).json().get('amendements', []) # TA texts can be zero-paded or not (TA0XXX or TAXXX), we try both if 'amendements/TA' in amdt_url: textid = get_text_id(texte_url) if 'TA0' in textid: alternative_url = amdt_url.replace(textid, 'TA' + textid.replace('TA', '').lstrip('0')) else: alternative_url = amdt_url.replace(textid, 'TA' + textid.replace('TA', '').zfill(4)) print(' WARNING: TA - trying alternative url too', alternative_url) amendements_src += download(alternative_url).json().get('amendements', []) print(' parsing amendments:', len(amendements_src)) # ignore amendments if they are not for the correct step amendements_src_filtered = [] for amd in amendements_src: a = amd['amendement'] if step.get('institution') == 'assemblee': # commission amendments can have two forms # - /amendements/LOI/NUM.asp (13th legislature) # - /amendements/LOI/COMMISSION_NAME/NUM.asp (14+ legislature) # hemicycle amendments are: # - /amendements/LOI/NUM.asp (13th legislature) # - /amendements/LOI/AN/NUM.asp (14+ legislature) amdt_step = 'hemicycle' if '/cr-' in a['source']: amdt_step = 'commission' else: url_parts = a['source'].split('amendements/')[1].split('/') if len(url_parts) == 3 and url_parts[1] != 'AN': amdt_step = 'commission' elif step.get('institution') == 'senat': amdt_step = 'commission' if '/commissions/' in a['source'] else 'hemicycle' else: # CMP - there's not way for now to distinguish the step amdt_step = step['step'] if step['step'] != amdt_step: continue amendements_src_filtered.append(amd) if len(amendements_src_filtered) != len(amendements_src): print('WARNING: amendments ignored (not the right step) %s' % (len(amendements_src) - len(amendements_src_filtered)), file=sys.stderr) amendements_src = amendements_src_filtered step['nb_amendements'] = len(amendements_src) if len(amendements_src) > 0: amendements_src = sort_amendements(texte['articles'], amendements_src) typeparl, urlapi = identify_room(texte_url, legislature=step.get('assemblee_legislature', procedure.get('assemblee_legislature'))) sujets = {} groupes = {} fix_order = False orders = [] parls = {} links = {} idents = {} for amd in amendements_src: a = amd['amendement'] if "sort" not in a: print('WARNING: amendment has no sort %s\n' % a['url_nos%ss' % typeparl], file=sys.stderr) continue if a["sort"] == "Rectifié": continue if "sujet" not in a or not a["sujet"]: if a["sort"] not in ["Irrecevable", "Retiré avant séance"]: print('WARNING: amendment has no subject %s\n' % a['url_nos%ss' % typeparl], file=sys.stderr) continue key = a['sujet'] if not key: print('WARNING: amendment has no subject %s\n' % a['url_nos%ss' % typeparl], file=sys.stderr) continue if key not in sujets: orders.append(key) sujets[key] = { 'titre': key, 'order': a['ordre_article'], 'amendements': [] } if a['ordre_article'] > 9000: fix_order = True gpe = find_groupe(a) if not gpe: if a["sort"] != "Irrecevable": sys.stderr.write('WARNING: no groupe found for %s\n' % a['url_nos%ss' % typeparl]) gpe = "Inconnu" context.add_groupe(groupes, gpe, urlapi) sujets[key]['amendements'].append({ 'numero': a['numero'], 'date': a['date'], 'sort': simplify_sort(a['sort']), 'groupe': gpe, 'id_api': a['id'], 'aut': first_author(a['signataires']) }) cosign = [] hmd5 = a["cle_unicite"] if hmd5 not in idents: idents[hmd5] = [] for parll in a["parlementaires"]: parl = parll["parlementaire"] if parl not in parls: p = context.get_parlementaire(urlapi, parl) parls[parl] = { "i": p["id"], "s": parl, "a": 0, "n": p["nom"], "g": p["groupe_sigle"], "p": p["place_en_hemicycle"] } pid = parls[parl]["i"] parls[parl]["a"] += 1 for cid in cosign: add_link(links, pid, cid) #add_link(links, pid, cid, 2) cosign.append(pid) for cid in idents[hmd5]: add_link(links, pid, cid) idents[hmd5].append(pid) if fix_order: orders.sort(key=cmp_to_key(compare_articles)) for i, k in enumerate(orders): sujets[k]["order"] = i amdtsfile = os.path.join(context.sourcedir, 'viz', 'amendements_%s.json' % step['directory']) data = {'id_step': step['directory'], 'api_root_url': amdapi_link(urlapi), 'groupes': groupes, 'sujets': sujets} print_json(data, amdtsfile) linksfile = os.path.join(context.sourcedir, 'viz', 'amendements_links_%s.json' % step['directory']) data = {'id_step': step['directory'], 'links': list(links.values()), 'parlementaires': dict((p["i"], dict((k, p[k]) for k in "psang")) for p in list(parls.values()))} # print_json(data, linksfile) ########### INTERVENTIONS ############# # TODO: move this to a dedicated file print(' * downloading interventions') typeparl, urlapi = identify_room(texte_url, legislature=step.get('assemblee_legislature', procedure.get('assemblee_legislature'))) inter_dir = os.path.join(context.sourcedir, 'procedure', step['directory'], 'interventions') commission_or_hemicycle = '?commission=1' if step.get('step') == 'commission' else '?hemicycle=1' # TODO: TA texts can be zero-paded or not (TA0XXX or TAXXX), we should try both seance_name = None intervention_files = [] texts = (get_text_id(texte_url),) if last_text_typeparl == typeparl: texts = (get_text_id(texte_url), last_text_id) for loiid in texts: url_seances = 'https://{}.fr/seances/{}/json{}'.format(urlapi, loiid, commission_or_hemicycle) print(' * downloading seances - ', url_seances) for id_seance_obj in sorted(download(url_seances).json().get('seances', []), key=lambda x: x["seance"]): url_seance = 'https://{}.fr/seance/{}/{}/json'.format(urlapi, id_seance_obj['seance'], loiid) print(' downloading seance - ', url_seance) resp = download(url_seance).json() if resp.get('seance'): inter = resp.get('seance')[0]['intervention'] seance_name = inter['date'] + 'T' + inter['heure'] + '_' + inter['seance_id'] print(' dumping seance -', seance_name) intervention_files.append(seance_name) if not os.path.exists(inter_dir): os.makedirs(inter_dir) print_json(resp, os.path.join(inter_dir, seance_name + '.json')) if seance_name: step['has_interventions'] = True step['intervention_files'] = intervention_files break last_text_id = get_text_id(texte_url) last_text_typeparl = typeparl return procedure
def process(dos): for step_i, step in enumerate(dos['steps']): articles = step.get('articles_completed', step.get('articles')) if not articles: continue for data in articles: if data["type"] == "article": data['liens'] = [] for i in range(len(data["alineas"])): text = data["alineas"]["%03d" % (i + 1)] for candidat in metslesliens.donnelescandidats( text, 'structuré'): if 'texte' in candidat: link = text[ candidat['index'][0]:candidat['index'][1]] data['liens'].append(link) """ data['liens'].append({ 'url': 'https://duckduckgo.com/?q=!ducky+' + urllib.parse.quote_plus(link), 'texte': link, 'alinea': i, # 'index': candidat['index'], }) """ return dos if __name__ == '__main__': print_json(process(open_json(sys.argv[1])))
myprint('NOK:', score_nok) myprint('OK:', score_ok) return score_nok, score_ok if __name__ == '__main__': import glob sum_ok = 0 sum_nok = 0 missing = 0 perfect = 0 less_than_1 = 0 all_doslegs = open_json(sys.argv[2]) lafabrique_doslegs = list(sorted(glob.glob(sys.argv[1]))) scored = [] for file in lafabrique_doslegs: print('======') print('======') print(file) me = None proc = open_json(file) proc_url_senat = proc.get('url_dossier_senat', '').replace( 'http://', 'https://').replace('/dossierleg/', '/dossier-legislatif/') for dos in all_doslegs: dos_url_senat = dos.get('url_dossier_senat', '').replace( 'http://', 'https://').replace('/dossierleg/', '/dossier-legislatif/')
try: from .common import open_json, print_json except: from common import open_json, print_json if len(sys.argv) < 2: print('USAGE: `steps_as_dot.py <path_to_json>`') sys.exit() mode = "detailed" if len(sys.argv) == 3 else "simple" procedure_file = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'doc', 'valid_procedure.json') procedure = open_json(procedure_file) API_DIRECTORY = sys.argv[1] all_senat_jo = [open_json(path) for path \ in glob.glob(os.path.join(API_DIRECTORY, '*/viz/procedure.json'))] all_senat_jo = [dos for dos in all_senat_jo if dos.get('end')] # all_senat_jo = [x for x in open_json(sys.argv[1]) if len(x['steps']) > 2] # all_senat_jo = random.sample(all_senat_jo, 5) nodes_names_size = {} step_trans = {} steps_logs = "" for dos in all_senat_jo: prev_step = None last_step = '' for step_i, step in enumerate(dos.get('steps', [])):
a["order"] = order order += 1 write_json(a) # do not keep already deleted articles but mark as deleted missing ones elif not re_suppr.match(a["statut"]) or texte.get('echec', ''): # if the last line of text was some dots, it means that we should keep # the articles as-is if they are not deleted if line['type'] == 'dots': # ex: https://www.senat.fr/leg/ppl09-304.html log("DEBUG: Recovering art as non-modifié via dots %s (leftovers)" % cur) a["statut"] = "non modifié" a["order"] = order order += 1 write_json(a) else: log("DEBUG: Marking art %s as supprimé (leftovers)" % cur) a["statut"] = "supprimé" a["alineas"] = dict() a["order"] = order order += 1 write_json(a) return ALL_ARTICLES if __name__ == '__main__': serialized = open_json(sys.argv[1]) result = complete(**serialized) # print_json(result)