예제 #1
0
 def get_context_data(self, **kwargs):
     context = super().get_context_data(**kwargs)
     wetsvoorstel_ids = Dossier.get_active_dossier_ids()
     wetsvoorstel_ids += Dossier.get_inactive_dossier_ids()
     context['dossiers_no_wetsvoorstel'] = Dossier.objects.exclude(
         dossier_id__in=wetsvoorstel_ids)
     return context
예제 #2
0
def create_wetsvoorstellen_all(skip_existing=False, max_tries=3):
    logger.info('BEGIN')
    dossier_ids = Dossier.get_dossier_ids()
    dossier_ids.reverse()
    failed_dossiers = create_wetsvoorstellen(dossier_ids, skip_existing=skip_existing, max_tries=max_tries)
    logger.info('END')
    return failed_dossiers
 def do(self):
     dossiers = get_dossier_ids()
     dossier_main_ids = [
         Dossier.create_dossier_id(dossier.dossier_id,
                                   dossier.dossier_sub_id)
         for dossier in dossiers
     ]
     Dossier.objects.exclude(dossier_id__in=dossier_main_ids).delete()
예제 #4
0
 def handle(self, *args, **options):
     dossier_ids = Dossier.get_dossier_ids()
     if len(dossier_ids) < 1500:
         logger.error('Less than 1500 dossiers found, something wrong, abort!')
         return
     dossiers_to_delete = Dossier.objects.exclude(dossier_id__in=dossier_ids)
     logger.info('Deleting ' + str(len(dossiers_to_delete)) + ' dossiers and related items')
     dossiers_to_delete.delete()
예제 #5
0
 def create_votings(self, dossier_id):
     logger.info('BEGIN')
     logger.info('dossier id: ' + str(dossier_id))
     dossier_id_main, dossier_id_sub = Dossier.split_dossier_id(dossier_id)
     tk_besluiten = queries.get_dossier_besluiten_with_stemmingen(nummer=dossier_id_main, toevoeging=dossier_id_sub)
     for tk_besluit in tk_besluiten:
         self.create_votings_dossier_besluit(tk_besluit, dossier_id)
     logger.info('END')
예제 #6
0
 def handle(self, *args, **options):
     dossier_ids = Dossier.get_dossier_ids()
     if len(dossier_ids) < 1500:
         logger.error(
             'Less than 1500 dossiers found, something wrong, abort!')
         return
     dossiers_to_delete = Dossier.objects.exclude(
         dossier_id__in=dossier_ids)
     logger.info('Deleting ' + str(len(dossiers_to_delete)) +
                 ' dossiers and related items')
     dossiers_to_delete.delete()
예제 #7
0
def create_wetsvoorstellen_active(skip_existing=False, max_tries=3):
    logger.info('BEGIN')
    dossier_ids = Dossier.get_dossier_ids()
    dossier_ids_inactive = get_inactive_dossier_ids()
    dossier_ids_active = []
    for dossier_id in dossier_ids:
        if dossier_id not in dossier_ids_inactive:
            dossier_ids_active.append(dossier_id)
    failed_dossiers = create_wetsvoorstellen(dossier_ids_active, skip_existing=skip_existing, max_tries=max_tries)
    logger.info('END')
    return failed_dossiers
예제 #8
0
def create_wetsvoorstellen_active(skip_existing=False, max_tries=3):
    logger.info('BEGIN')
    dossier_ids = Dossier.get_dossier_ids()
    dossier_ids_inactive = get_inactive_dossier_ids()
    dossier_ids_active = []
    for dossier_id in dossier_ids:
        if dossier_id not in dossier_ids_inactive:
            dossier_ids_active.append(dossier_id)
    dossier_ids_active.reverse()
    failed_dossiers = create_wetsvoorstellen(dossier_ids_active, skip_existing=skip_existing, max_tries=max_tries)
    logger.info('END')
    return failed_dossiers
예제 #9
0
    def handle(self, *args, **options):
        dossiers = get_dossier_ids()
        if len(dossiers) < 1500:
            logger.error(
                'Less than 1500 dossiers found, something wrong, abort!')
            return
        dossiers_valid = []
        for dossier in dossiers:
            dossier_id = Dossier.create_dossier_id(dossier.dossier_id,
                                                   dossier.dossier_sub_id)
            dossiers_valid.append(dossier_id)

        dossiers_to_delete = Dossier.objects.exclude(
            dossier_id__in=dossiers_valid)
        logger.info('Deleting ' + str(len(dossiers_to_delete)) +
                    ' dossiers and related items')
        dossiers_to_delete.delete()
예제 #10
0
def get_inactive_dossier_ids(year=None) -> List[DossierId]:
    dossier_ids_inactive = list(
        Dossier.objects.filter(status__in=[
            Dossier.VERWORPEN, Dossier.AANGENOMEN, Dossier.INGETROKKEN,
            Dossier.CONTROVERSIEEL
        ]).values_list('dossier_id', flat=True))
    if year is not None:
        dossier_ids_inactive_year = []
        for dossier_id in dossier_ids_inactive:
            dossier = Dossier.objects.get(dossier_id=dossier_id)
            if dossier.start_date and dossier.start_date.year == int(year):
                dossier_ids_inactive_year.append(dossier_id)
        dossier_ids_inactive = dossier_ids_inactive_year
    return [
        DossierId(*Dossier.split_dossier_id(dossier_id))
        for dossier_id in dossier_ids_inactive
    ]
예제 #11
0
def get_verlag_algemeen_overleg_infos(year):
    lines = Dossier.get_lines_from_url(
        'https://raw.githubusercontent.com/openkamer/ok-tk-data/master/verslagen/verslagen_algemeen_overleg_' + str(year) + '.csv')
    lines.pop(0)  # remove table headers
    verslagen_info = []
    for line in lines:
        colums = line.split(',')
        if colums[4] == '':  # no document url
            continue
        info = {
            'date_published': colums[0],
            'dossier_id': colums[1],
            'dossier_extra_id': colums[2],
            'kamerstuk_nr': colums[3],
            'document_url': colums[4],
        }
        verslagen_info.append(info)
    return verslagen_info
예제 #12
0
def create_or_update_dossier(dossier_id):
    logger.info('BEGIN - dossier id: {}'.format(dossier_id))
    Dossier.objects.filter(dossier_id=dossier_id).delete()
    dossier_url = 'https://zoek.officielebekendmakingen.nl/dossier/{}'.format(
        dossier_id)
    dossier_id_main, dossier_id_sub = Dossier.split_dossier_id(dossier_id)

    dossier_filter = TKDossier.create_filter()
    dossier_filter.filter_nummer(dossier_id_main)
    if dossier_id_sub:
        dossier_filter.filter_toevoeging(dossier_id_sub)
    dossiers = TKApi.get_dossiers(filter=dossier_filter)

    if len(dossiers) != 1:
        logger.error('{} dossiers found while one expected for {}'.format(
            len(dossiers), dossier_id))

    tk_dossier = dossiers[0]

    # TODO BR: create a list of related dossier decisions instead of one, see dossier 34792 for example
    logger.info('dossier id main: {} | dossier id sub: {}'.format(
        dossier_id_main, dossier_id_sub))
    last_besluit = get_besluit_last_with_voting(dossier_id_main,
                                                dossier_id_sub)
    if not last_besluit:
        last_besluit = get_besluit_last(dossier_id_main, dossier_id_sub)

    decision_text = 'Onbekend'
    if last_besluit:
        decision_text = last_besluit.tekst.replace('.', '')

    dossier_new = Dossier.objects.create(dossier_id=dossier_id,
                                         dossier_main_id=dossier_id_main,
                                         dossier_sub_id=dossier_id_sub,
                                         title=tk_dossier.titel,
                                         url=dossier_url,
                                         decision_text=decision_text)
    create_dossier_documents(dossier_new, dossier_id)
    create_dossier_decisions(dossier_id_main, dossier_id_sub, dossier_new)
    voting_factory = VotingFactory()
    voting_factory.create_votings(dossier_id)
    dossier_new.set_derived_fields()
    logger.info('END - dossier id: ' + str(dossier_id))
    return dossier_new
예제 #13
0
def get_verlag_algemeen_overleg_infos(year):
    lines = Dossier.get_lines_from_url(
        'https://raw.githubusercontent.com/openkamer/ok-tk-data/master/verslagen/verslagen_algemeen_overleg_'
        + str(year) + '.csv')
    lines.pop(0)  # remove table headers
    verslagen_info = []
    for line in lines:
        colums = line.split(',')
        if colums[4] == '':  # no document url
            continue
        info = {
            'date_published': colums[0],
            'dossier_id': colums[1],
            'dossier_extra_id': colums[2],
            'kamerstuk_nr': colums[3],
            'document_url': colums[4],
        }
        verslagen_info.append(info)
    return verslagen_info
예제 #14
0
def create_wetsvoorstellen(dossier_ids: List[DossierId],
                           skip_existing=False,
                           max_tries=3):
    logger.info('BEGIN')
    failed_dossiers = []
    for dossier in dossier_ids:
        dossier_id = Dossier.create_dossier_id(dossier.dossier_id,
                                               dossier.dossier_sub_id)
        logger.info('dossier id: {}'.format(dossier_id))
        dossiers = Dossier.objects.filter(dossier_id=dossier_id)
        if skip_existing and dossiers.exists():
            logger.info('dossier already exists, skip')
            continue
        try:
            create_dossier_retry_on_error(dossier_id=dossier_id,
                                          max_tries=max_tries)
        except Exception as error:
            failed_dossiers.append(dossier_id)
            logger.exception('error for dossier id: ' + str(dossier_id))
    logger.info('END')
    return failed_dossiers
예제 #15
0
def create_verslag_ao(tk_verslag, skip_if_exists=False):
    dossier = tk_verslag.dossiers[0]
    dossier_id = Dossier.create_dossier_id(dossier.nummer, dossier.toevoeging)
    name = tk_verslag.voortouwcommissie_namen[
        0] if tk_verslag.voortouwcommissie_namen else ''
    logger.info('commissie name: {}'.format(name))
    name_short = Commissie.create_short_name(name)
    slug = Commissie.create_slug(name_short)
    commissie, created = Commissie.objects.get_or_create(name=name,
                                                         name_short=name_short,
                                                         slug=slug)
    commissie_document = create_verslag(
        tk_document=tk_verslag,
        overheidnl_document_id=tk_verslag.document_url.replace(
            'https://zoek.officielebekendmakingen.nl/', ''),
        dossier_id=dossier_id,
        kamerstuk_nr=tk_verslag.volgnummer,
        commissie=commissie,
        skip_if_exists=skip_if_exists,
    )
    return commissie_document
예제 #16
0
 def get_context_data(self, **kwargs):
     context = super().get_context_data(**kwargs)
     wetsvoorstel_ids = Dossier.get_dossier_ids()
     context['dossiers_no_wetsvoorstel'] = Dossier.objects.exclude(dossier_id__in=wetsvoorstel_ids)
     return context
예제 #17
0
 def test_get_active_ids(self):
     active_ids = Dossier.get_active_dossier_ids()
     active_ids_cached = Dossier.get_active_dossier_ids()
     self.assertEqual(len(active_ids), len(active_ids_cached))
예제 #18
0
def create_wetsvoorstellen_inactive(skip_existing=False, max_tries=3):
    logger.info('BEGIN')
    dossier_ids = Dossier.get_inactive_dossier_ids()
    failed_dossiers = create_wetsvoorstellen(dossier_ids, skip_existing=skip_existing, max_tries=max_tries)
    logger.info('END')
    return failed_dossiers
 def do(self):
     wetsvoorstel_ids = Dossier.get_active_dossier_ids()
     wetsvoorstel_ids += Dossier.get_inactive_dossier_ids()
     Dossier.objects.exclude(dossier_id__in=wetsvoorstel_ids).delete()
 def do(self):
     wetsvoorstel_ids = Dossier.get_dossier_ids()
     Dossier.objects.exclude(dossier_id__in=wetsvoorstel_ids).delete()
예제 #21
0
def create_or_update_dossier(dossier_id):
    logger.info('BEGIN - dossier id: ' + str(dossier_id))
    Dossier.objects.filter(dossier_id=dossier_id).delete()
    dossier_url = scraper.dossiers.search_dossier_url(dossier_id)
    decision = scraper.dossiers.get_dossier_decision(dossier_url)
    dossier_new = Dossier.objects.create(
        dossier_id=dossier_id,
        is_active=Dossier.is_active_id(dossier_id),
        url=dossier_url,
        decision=decision
    )
    search_results = scraper.documents.search_politieknl_dossier(dossier_id)
    for result in search_results:
        # skip eerste kamer documents, first focus on the tweede kamer
        # TODO: handle eerste kamer documents
        if 'eerste kamer' in result['publisher'].lower():
            logger.info('skipping Eerste Kamer document')
            continue
        # skip documents of some types and/or sources, no models implemented yet
        # TODO: handle all document types
        if 'Staatscourant' in result['type']:
            logger.info('Staatscourant, skip for now')
            continue

        document_id, content_html, title = scraper.documents.get_document_id_and_content(result['page_url'])
        if not document_id:
            logger.error('No document id found for url: ' + result['page_url'] + ' - will not create document')
            continue

        metadata = scraper.documents.get_metadata(document_id)

        if metadata['date_published']:
            date_published = metadata['date_published']
        else:
            date_published = result['date_published']

        if 'submitter' not in metadata:
            metadata['submitter'] = 'undefined'

        if 'dossier_id' in metadata:
            main_dossier_id = metadata['dossier_id'].split(';')[0].strip()
            main_dossier_id = main_dossier_id.split('-')[0]  # remove rijkswetdossier id, for example 34158-(R2048)
            if main_dossier_id != '' and str(main_dossier_id) != str(dossier_id):
                dossier_for_document, created = Dossier.objects.get_or_create(dossier_id=main_dossier_id)
            else:
                dossier_for_document = dossier_new

        documents = Document.objects.filter(document_id=document_id)
        if documents.exists():
            logger.warning('document with id: ' + document_id + ' already exists, skip creating document.')
            continue

        content_html = update_document_html_links(content_html)

        document = Document.objects.create(
            dossier=dossier_for_document,
            document_id=document_id,
            title_full=metadata['title_full'],
            title_short=metadata['title_short'],
            publication_type=metadata['publication_type'],
            publisher=metadata['publisher'],
            date_published=date_published,
            source_url=result['page_url'],
            content_html=content_html,
        )
        category_list = get_categories(text=metadata['category'], category_class=CategoryDocument, sep_char='|')
        document.categories.add(*category_list)

        submitters = metadata['submitter'].split('|')
        for submitter in submitters:
            create_submitter(document, submitter, date_published)

        if metadata['is_kamerstuk']:
            is_attachement = "Bijlage" in result['type']
            if not Kamerstuk.objects.filter(id_main=dossier_id, id_sub=metadata['id_sub']).exists():
                create_kamerstuk(document, dossier_for_document.dossier_id, title, metadata, is_attachement)
                category_list = get_categories(text=metadata['category'], category_class=CategoryDossier, sep_char='|')
                dossier_for_document.categories.add(*category_list)

        if metadata['is_agenda']:
            create_agenda(document, metadata)

    create_votings(dossier_id)
    dossier_new.set_derived_fields()
    logger.info('END - dossier id: ' + str(dossier_id))
    return dossier_new