def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) wetsvoorstel_ids = Dossier.get_active_dossier_ids() wetsvoorstel_ids += Dossier.get_inactive_dossier_ids() context['dossiers_no_wetsvoorstel'] = Dossier.objects.exclude( dossier_id__in=wetsvoorstel_ids) return context
def create_wetsvoorstellen_all(skip_existing=False, max_tries=3): logger.info('BEGIN') dossier_ids = Dossier.get_dossier_ids() dossier_ids.reverse() failed_dossiers = create_wetsvoorstellen(dossier_ids, skip_existing=skip_existing, max_tries=max_tries) logger.info('END') return failed_dossiers
def do(self): dossiers = get_dossier_ids() dossier_main_ids = [ Dossier.create_dossier_id(dossier.dossier_id, dossier.dossier_sub_id) for dossier in dossiers ] Dossier.objects.exclude(dossier_id__in=dossier_main_ids).delete()
def handle(self, *args, **options): dossier_ids = Dossier.get_dossier_ids() if len(dossier_ids) < 1500: logger.error('Less than 1500 dossiers found, something wrong, abort!') return dossiers_to_delete = Dossier.objects.exclude(dossier_id__in=dossier_ids) logger.info('Deleting ' + str(len(dossiers_to_delete)) + ' dossiers and related items') dossiers_to_delete.delete()
def create_votings(self, dossier_id): logger.info('BEGIN') logger.info('dossier id: ' + str(dossier_id)) dossier_id_main, dossier_id_sub = Dossier.split_dossier_id(dossier_id) tk_besluiten = queries.get_dossier_besluiten_with_stemmingen(nummer=dossier_id_main, toevoeging=dossier_id_sub) for tk_besluit in tk_besluiten: self.create_votings_dossier_besluit(tk_besluit, dossier_id) logger.info('END')
def handle(self, *args, **options): dossier_ids = Dossier.get_dossier_ids() if len(dossier_ids) < 1500: logger.error( 'Less than 1500 dossiers found, something wrong, abort!') return dossiers_to_delete = Dossier.objects.exclude( dossier_id__in=dossier_ids) logger.info('Deleting ' + str(len(dossiers_to_delete)) + ' dossiers and related items') dossiers_to_delete.delete()
def create_wetsvoorstellen_active(skip_existing=False, max_tries=3): logger.info('BEGIN') dossier_ids = Dossier.get_dossier_ids() dossier_ids_inactive = get_inactive_dossier_ids() dossier_ids_active = [] for dossier_id in dossier_ids: if dossier_id not in dossier_ids_inactive: dossier_ids_active.append(dossier_id) failed_dossiers = create_wetsvoorstellen(dossier_ids_active, skip_existing=skip_existing, max_tries=max_tries) logger.info('END') return failed_dossiers
def create_wetsvoorstellen_active(skip_existing=False, max_tries=3): logger.info('BEGIN') dossier_ids = Dossier.get_dossier_ids() dossier_ids_inactive = get_inactive_dossier_ids() dossier_ids_active = [] for dossier_id in dossier_ids: if dossier_id not in dossier_ids_inactive: dossier_ids_active.append(dossier_id) dossier_ids_active.reverse() failed_dossiers = create_wetsvoorstellen(dossier_ids_active, skip_existing=skip_existing, max_tries=max_tries) logger.info('END') return failed_dossiers
def handle(self, *args, **options): dossiers = get_dossier_ids() if len(dossiers) < 1500: logger.error( 'Less than 1500 dossiers found, something wrong, abort!') return dossiers_valid = [] for dossier in dossiers: dossier_id = Dossier.create_dossier_id(dossier.dossier_id, dossier.dossier_sub_id) dossiers_valid.append(dossier_id) dossiers_to_delete = Dossier.objects.exclude( dossier_id__in=dossiers_valid) logger.info('Deleting ' + str(len(dossiers_to_delete)) + ' dossiers and related items') dossiers_to_delete.delete()
def get_inactive_dossier_ids(year=None) -> List[DossierId]: dossier_ids_inactive = list( Dossier.objects.filter(status__in=[ Dossier.VERWORPEN, Dossier.AANGENOMEN, Dossier.INGETROKKEN, Dossier.CONTROVERSIEEL ]).values_list('dossier_id', flat=True)) if year is not None: dossier_ids_inactive_year = [] for dossier_id in dossier_ids_inactive: dossier = Dossier.objects.get(dossier_id=dossier_id) if dossier.start_date and dossier.start_date.year == int(year): dossier_ids_inactive_year.append(dossier_id) dossier_ids_inactive = dossier_ids_inactive_year return [ DossierId(*Dossier.split_dossier_id(dossier_id)) for dossier_id in dossier_ids_inactive ]
def get_verlag_algemeen_overleg_infos(year): lines = Dossier.get_lines_from_url( 'https://raw.githubusercontent.com/openkamer/ok-tk-data/master/verslagen/verslagen_algemeen_overleg_' + str(year) + '.csv') lines.pop(0) # remove table headers verslagen_info = [] for line in lines: colums = line.split(',') if colums[4] == '': # no document url continue info = { 'date_published': colums[0], 'dossier_id': colums[1], 'dossier_extra_id': colums[2], 'kamerstuk_nr': colums[3], 'document_url': colums[4], } verslagen_info.append(info) return verslagen_info
def create_or_update_dossier(dossier_id): logger.info('BEGIN - dossier id: {}'.format(dossier_id)) Dossier.objects.filter(dossier_id=dossier_id).delete() dossier_url = 'https://zoek.officielebekendmakingen.nl/dossier/{}'.format( dossier_id) dossier_id_main, dossier_id_sub = Dossier.split_dossier_id(dossier_id) dossier_filter = TKDossier.create_filter() dossier_filter.filter_nummer(dossier_id_main) if dossier_id_sub: dossier_filter.filter_toevoeging(dossier_id_sub) dossiers = TKApi.get_dossiers(filter=dossier_filter) if len(dossiers) != 1: logger.error('{} dossiers found while one expected for {}'.format( len(dossiers), dossier_id)) tk_dossier = dossiers[0] # TODO BR: create a list of related dossier decisions instead of one, see dossier 34792 for example logger.info('dossier id main: {} | dossier id sub: {}'.format( dossier_id_main, dossier_id_sub)) last_besluit = get_besluit_last_with_voting(dossier_id_main, dossier_id_sub) if not last_besluit: last_besluit = get_besluit_last(dossier_id_main, dossier_id_sub) decision_text = 'Onbekend' if last_besluit: decision_text = last_besluit.tekst.replace('.', '') dossier_new = Dossier.objects.create(dossier_id=dossier_id, dossier_main_id=dossier_id_main, dossier_sub_id=dossier_id_sub, title=tk_dossier.titel, url=dossier_url, decision_text=decision_text) create_dossier_documents(dossier_new, dossier_id) create_dossier_decisions(dossier_id_main, dossier_id_sub, dossier_new) voting_factory = VotingFactory() voting_factory.create_votings(dossier_id) dossier_new.set_derived_fields() logger.info('END - dossier id: ' + str(dossier_id)) return dossier_new
def create_wetsvoorstellen(dossier_ids: List[DossierId], skip_existing=False, max_tries=3): logger.info('BEGIN') failed_dossiers = [] for dossier in dossier_ids: dossier_id = Dossier.create_dossier_id(dossier.dossier_id, dossier.dossier_sub_id) logger.info('dossier id: {}'.format(dossier_id)) dossiers = Dossier.objects.filter(dossier_id=dossier_id) if skip_existing and dossiers.exists(): logger.info('dossier already exists, skip') continue try: create_dossier_retry_on_error(dossier_id=dossier_id, max_tries=max_tries) except Exception as error: failed_dossiers.append(dossier_id) logger.exception('error for dossier id: ' + str(dossier_id)) logger.info('END') return failed_dossiers
def create_verslag_ao(tk_verslag, skip_if_exists=False): dossier = tk_verslag.dossiers[0] dossier_id = Dossier.create_dossier_id(dossier.nummer, dossier.toevoeging) name = tk_verslag.voortouwcommissie_namen[ 0] if tk_verslag.voortouwcommissie_namen else '' logger.info('commissie name: {}'.format(name)) name_short = Commissie.create_short_name(name) slug = Commissie.create_slug(name_short) commissie, created = Commissie.objects.get_or_create(name=name, name_short=name_short, slug=slug) commissie_document = create_verslag( tk_document=tk_verslag, overheidnl_document_id=tk_verslag.document_url.replace( 'https://zoek.officielebekendmakingen.nl/', ''), dossier_id=dossier_id, kamerstuk_nr=tk_verslag.volgnummer, commissie=commissie, skip_if_exists=skip_if_exists, ) return commissie_document
def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) wetsvoorstel_ids = Dossier.get_dossier_ids() context['dossiers_no_wetsvoorstel'] = Dossier.objects.exclude(dossier_id__in=wetsvoorstel_ids) return context
def test_get_active_ids(self): active_ids = Dossier.get_active_dossier_ids() active_ids_cached = Dossier.get_active_dossier_ids() self.assertEqual(len(active_ids), len(active_ids_cached))
def create_wetsvoorstellen_inactive(skip_existing=False, max_tries=3): logger.info('BEGIN') dossier_ids = Dossier.get_inactive_dossier_ids() failed_dossiers = create_wetsvoorstellen(dossier_ids, skip_existing=skip_existing, max_tries=max_tries) logger.info('END') return failed_dossiers
def do(self): wetsvoorstel_ids = Dossier.get_active_dossier_ids() wetsvoorstel_ids += Dossier.get_inactive_dossier_ids() Dossier.objects.exclude(dossier_id__in=wetsvoorstel_ids).delete()
def do(self): wetsvoorstel_ids = Dossier.get_dossier_ids() Dossier.objects.exclude(dossier_id__in=wetsvoorstel_ids).delete()
def create_or_update_dossier(dossier_id): logger.info('BEGIN - dossier id: ' + str(dossier_id)) Dossier.objects.filter(dossier_id=dossier_id).delete() dossier_url = scraper.dossiers.search_dossier_url(dossier_id) decision = scraper.dossiers.get_dossier_decision(dossier_url) dossier_new = Dossier.objects.create( dossier_id=dossier_id, is_active=Dossier.is_active_id(dossier_id), url=dossier_url, decision=decision ) search_results = scraper.documents.search_politieknl_dossier(dossier_id) for result in search_results: # skip eerste kamer documents, first focus on the tweede kamer # TODO: handle eerste kamer documents if 'eerste kamer' in result['publisher'].lower(): logger.info('skipping Eerste Kamer document') continue # skip documents of some types and/or sources, no models implemented yet # TODO: handle all document types if 'Staatscourant' in result['type']: logger.info('Staatscourant, skip for now') continue document_id, content_html, title = scraper.documents.get_document_id_and_content(result['page_url']) if not document_id: logger.error('No document id found for url: ' + result['page_url'] + ' - will not create document') continue metadata = scraper.documents.get_metadata(document_id) if metadata['date_published']: date_published = metadata['date_published'] else: date_published = result['date_published'] if 'submitter' not in metadata: metadata['submitter'] = 'undefined' if 'dossier_id' in metadata: main_dossier_id = metadata['dossier_id'].split(';')[0].strip() main_dossier_id = main_dossier_id.split('-')[0] # remove rijkswetdossier id, for example 34158-(R2048) if main_dossier_id != '' and str(main_dossier_id) != str(dossier_id): dossier_for_document, created = Dossier.objects.get_or_create(dossier_id=main_dossier_id) else: dossier_for_document = dossier_new documents = Document.objects.filter(document_id=document_id) if documents.exists(): logger.warning('document with id: ' + document_id + ' already exists, skip creating document.') continue content_html = update_document_html_links(content_html) document = Document.objects.create( dossier=dossier_for_document, document_id=document_id, title_full=metadata['title_full'], title_short=metadata['title_short'], publication_type=metadata['publication_type'], publisher=metadata['publisher'], date_published=date_published, source_url=result['page_url'], content_html=content_html, ) category_list = get_categories(text=metadata['category'], category_class=CategoryDocument, sep_char='|') document.categories.add(*category_list) submitters = metadata['submitter'].split('|') for submitter in submitters: create_submitter(document, submitter, date_published) if metadata['is_kamerstuk']: is_attachement = "Bijlage" in result['type'] if not Kamerstuk.objects.filter(id_main=dossier_id, id_sub=metadata['id_sub']).exists(): create_kamerstuk(document, dossier_for_document.dossier_id, title, metadata, is_attachement) category_list = get_categories(text=metadata['category'], category_class=CategoryDossier, sep_char='|') dossier_for_document.categories.add(*category_list) if metadata['is_agenda']: create_agenda(document, metadata) create_votings(dossier_id) dossier_new.set_derived_fields() logger.info('END - dossier id: ' + str(dossier_id)) return dossier_new