Beispiel #1
0
def _get_next_documents(chambre_dico, chambre_dico_nl, document_chambre):
    if chambre_dico.get('Document(s) suivant(s)'):
        for d, d_nl in zip(document_pdf_part_cutter(chambre_dico[u'Document(s) suivant(s)']), document_pdf_part_cutter(chambre_dico_nl[u'Opvolgend(e) document(en)'])):
            logger.debug("add pdf %s" % clean_text(d[0].font.text))
            doc = OtherDocumentChambrePdf()
            doc.url = d[0].a['href'] if d[0].a else d[0].td.text
            doc.type["fr"] = clean_text(d[0].font.text)
            doc.type["nl"] = clean_text(d_nl[0].font.text)
            doc.distribution_date = d[1]('td')[-1].text
            for dep, dep_nl in zip(d[2:], d_nl[2:]):
                if dep.a:
                    lachambre_id = re.search('key=(\d+)', dep.a["href"]).groups()[0]
                    deputy = Deputy.objects.get(lachambre_id=lachambre_id)
                    doc.authors.append({"lachambre_id": deputy.lachambre_id, "id": deputy.id, "full_name": deputy.full_name, "role": {"fr": dep('td')[-1].i.text[1:-1], "nl": dep_nl('td')[-1].i.text[1:-1]}})
                else:
                    doc.authors.append({"lachambre_id": -1, "id": -1, "full_name": dep('td')[-1].contents[2].strip(), "role": {"fr": dep('td')[-1].i.text[1:-1], "nl": dep_nl('td')[-1].i.text[1:-1]}})
            doc.save()
            document_chambre.other_pdfs.append(doc)
Beispiel #2
0
def _get_next_documents(chambre_dico, chambre_dico_nl, document_chambre):
    if chambre_dico.get('Document(s) suivant(s)'):
        for d, d_nl in zip(document_pdf_part_cutter(chambre_dico[u'Document(s) suivant(s)']), document_pdf_part_cutter(chambre_dico_nl[u'Opvolgend(e) document(en)'])):
            logger.debug("add pdf %s" % clean_text(d[0].font.text))
            doc = OtherDocumentChambrePdf()
            doc.url = d[0].a['href'] if d[0].a else d[0].td.text
            doc.type["fr"] = clean_text(d[0].font.text)
            doc.type["nl"] = clean_text(d_nl[0].font.text)
            doc.distribution_date = d[1]('td')[-1].text
            for dep, dep_nl in zip(d[2:], d_nl[2:]):
                if dep.a:
                    lachambre_id = re.search('key=(\d+)', dep.a["href"]).groups()[0]
                    deputy = Deputy.objects.get(lachambre_id=lachambre_id)
                    doc.authors.append({"lachambre_id": deputy.lachambre_id, "id": deputy.id, "full_name": deputy.full_name, "role": {"fr": dep('td')[-1].i.text[1:-1], "nl": dep_nl('td')[-1].i.text[1:-1]}})
                else:
                    doc.authors.append({"lachambre_id": -1, "id": -1, "full_name": dep('td')[-1].contents[2].strip(), "role": {"fr": dep('td')[-1].i.text[1:-1], "nl": dep_nl('td')[-1].i.text[1:-1]}})
            doc.save()
            document_chambre.other_pdfs.append(doc)
Beispiel #3
0
def _get_document_senat(dico, dico_nl, document):
    if not dico.get(u"Document Sénat"):
        return

    senat_dico = dico[u"Document Sénat"]
    senat_dico_nl = dico_nl[u"Document Senaat"]

    document_senat = DocumentSenat()
    document_senat.deposition_date = senat_dico[u"Date de dépôt"].text
    document_senat.ending_date = get_text_else_blank(senat_dico, u"Date de fin")
    document_senat.type["fr"] = senat_dico[u"Type de document"].text
    document_senat.type["nl"] = senat_dico_nl[u"Document type"].text
    document_senat.comments["fr"] = get_text_else_blank(senat_dico, u'Commentaire').split(' - ')
    document_senat.comments["nl"] = get_text_else_blank(senat_dico_nl, u'Commentaar').split(' - ')
    document_senat.author = clean_text(get_text_else_blank(senat_dico, u"Auteur(s)"))
    document_senat.status["fr"] = get_text_else_blank(senat_dico, u'Statut')
    document_senat.status["nl"] = get_text_else_blank(senat_dico_nl, u'Status')

    url, tipe, session = clean_text(str(senat_dico[u'head']).replace("&#160;", "")).split("<br />")
    _, tipe_nl, _ = clean_text(str(senat_dico_nl[u'head']).replace("&#160;", "")).split("<br />")
    url = re.search('href="([^"]+)', url).groups()[0] if "href" in url else url
    document_senat.pdf = DocumentSenatPdf.objects.create(url=url, type={"fr": tipe.strip(), "nl": tipe_nl.strip()}, session=session.split()[-2])

    if senat_dico.get('Document(s) suivant(s)'):
        for d, d_nl in zip(document_pdf_part_cutter(senat_dico[u'Document(s) suivant(s)']), document_pdf_part_cutter(senat_dico_nl[u'Opvolgend(e) document(en)'])):
            logger.debug("add pdf %s" % clean_text(d[0].font.text))
            doc = OtherDocumentSenatPdf()
            doc.url = d[0].a['href'] if d[0].a else d[0].td.text
            doc.type["fr"] = clean_text(d[0].font.text)
            doc.type["nl"] = clean_text(d_nl[0].font.text)
            doc.date = d[0]('td')[-1].contents[0]
            doc.authors = []
            for dep, dep_nl in zip(d[1:], d_nl[1:]):
                doc.authors.append({"full_name": unicode(dep('td')[-1].contents[2]).strip(), "role": {"fr": dep('td')[-1].i.text[1:-1], "nl": dep_nl('td')[-1].i.text[1:-1]}})
            doc.save()
            document_senat.other_pdfs.append(doc)

    document_senat.save()
    document.document_senat = document_senat
Beispiel #4
0
def _get_document_senat(dico, dico_nl, document):
    if not dico.get(u"Document Sénat"):
        return

    senat_dico = dico[u"Document Sénat"]
    senat_dico_nl = dico_nl[u"Document Senaat"]

    document_senat = DocumentSenat()
    document_senat.deposition_date = senat_dico[u"Date de dépôt"].text
    document_senat.ending_date = get_text_else_blank(senat_dico, u"Date de fin")
    document_senat.type["fr"] = senat_dico[u"Type de document"].text
    document_senat.type["nl"] = senat_dico_nl[u"Document type"].text
    document_senat.comments["fr"] = get_text_else_blank(senat_dico, u'Commentaire').split(' - ')
    document_senat.comments["nl"] = get_text_else_blank(senat_dico_nl, u'Commentaar').split(' - ')
    document_senat.author = clean_text(get_text_else_blank(senat_dico, u"Auteur(s)"))
    document_senat.status["fr"] = get_text_else_blank(senat_dico, u'Statut')
    document_senat.status["nl"] = get_text_else_blank(senat_dico_nl, u'Status')

    url, tipe, session = clean_text(str(senat_dico[u'head']).replace("&#160;", "")).split("<br />")
    _, tipe_nl, _ = clean_text(str(senat_dico_nl[u'head']).replace("&#160;", "")).split("<br />")
    url = re.search('href="([^"]+)', url).groups()[0] if "href" in url else url
    document_senat.pdf = DocumentSenatPdf.objects.create(url=url, type={"fr": tipe.strip(), "nl": tipe_nl.strip()}, session=session.split()[-2])

    if senat_dico.get('Document(s) suivant(s)'):
        for d, d_nl in zip(document_pdf_part_cutter(senat_dico[u'Document(s) suivant(s)']), document_pdf_part_cutter(senat_dico_nl[u'Opvolgend(e) document(en)'])):
            logger.debug("add pdf %s" % clean_text(d[0].font.text))
            doc = OtherDocumentSenatPdf()
            doc.url = d[0].a['href'] if d[0].a else d[0].td.text
            doc.type["fr"] = clean_text(d[0].font.text)
            doc.type["nl"] = clean_text(d_nl[0].font.text)
            doc.date = d[0]('td')[-1].contents[0]
            doc.authors = []
            for dep, dep_nl in zip(d[1:], d_nl[1:]):
                doc.authors.append({"full_name": unicode(dep('td')[-1].contents[2]).strip(), "role": {"fr": dep('td')[-1].i.text[1:-1], "nl": dep_nl('td')[-1].i.text[1:-1]}})
            doc.save()
            document_senat.other_pdfs.append(doc)

    document_senat.save()
    document.document_senat = document_senat