def send_validations(node=None): validation_task = models.ValidationReportTask.objects.create() timeout = TasksConfig.get_solo().validation_timeout enqueue_job_with_timeout('reports', validation_run, timeout, args=(validation_task, ), kwargs={'node': node})
def test_federation_validation_takes_value_from_config( self, mock_validation): node = Node.objects.get(catalog_id='id3') datajson = DataJson(self.get_sample('missing_dataset_title.json')) config = TasksConfig.get_solo() config.federation_url_check = False config.save() catalog_report = get_catalog_report(datajson) sort_datasets_by_condition(node, catalog_report) mock_validation.assert_called_with(broken_links=False)
def indicators_run(_node=None): # El framework de tareas de django_datajsonar pasa para todos los casos un # nodo por parámetro. Esta tarea no hace uso de ese parámtro por el # momento task = IndicatorsGenerationTask.objects.create() timeout = TasksConfig.get_solo().indicators_timeout enqueue_job_with_timeout('indicators', generate_indicators, timeout, args=(task, ))
def generate_indicators(task): url_check_timeout = TasksConfig.get_solo().url_check_timeout data_json = DataJson(url_check_timeout=url_check_timeout) catalogs = load_catalogs(task, Node.objects.filter(indexable=True)) validate_urls = TasksConfig.get_solo().indicators_url_check urls_check_threads = TasksConfig.get_solo().url_check_threads try: central_node = CentralNode.objects.get() central_catalog = urljoin(central_node.node.url, 'data.json') except (CentralNode.DoesNotExist, AttributeError): central_catalog = CENTRAL indics, network_indics = data_json.generate_catalogs_indicators( catalogs, central_catalog, identifier_search=True, broken_links=validate_urls, broken_links_threads=urls_check_threads) save_indicators(indics, task) save_network_indics(network_indics, 'RED', task) federator_catalogs = load_catalogs( task, HarvestingNode.objects.filter(enabled=True), harvesting=True) federator_indics, _ = data_json.generate_catalogs_indicators( federator_catalogs, identifier_search=True, broken_links=validate_urls, broken_links_threads=urls_check_threads) save_indicators(federator_indics, task, harvesting_nodes=True) # Creo columnas default si no existen if not TableColumn.objects.count(): init_columns() write_time_series_files.delay() zip_indicators_csv.delay() task.refresh_from_db() task.status = IndicatorsGenerationTask.FINISHED task.finished = timezone.localtime() task.save()
def generate_email(self, node=None): if not node: # No genera mail de staff return None url_check_timeout = TasksConfig.get_solo().url_check_timeout catalog = DataJson(node.catalog_url, catalog_format=node.catalog_format, verify_ssl=node.verify_ssl, url_check_timeout=url_check_timeout) validate_urls = TasksConfig.get_solo().get_validation_config_for_node( node) url_check_threads = TasksConfig.get_solo().url_check_threads validation = catalog.validate_catalog( only_errors=True, broken_links=validate_urls, broken_links_threads=url_check_threads) validation_time = self._format_date(timezone.now()) if validation['status'] == 'OK': msg = "Catálogo {} válido.".format(node.catalog_id) self.report_task.info(self.report_task, msg) return None context = { 'validation_time': validation_time, 'status': validation['status'], 'catalog': validation['error']['catalog'], 'dataset_list': validation['error']['dataset'] } mail = self.render_templates(context) subject = '[{}] Validacion de catálogo {}: {}'.format( settings.ENV_TYPE, node.catalog_id, validation_time) mail.subject = subject with NamedTemporaryFile(suffix='.xlsx') as tmpfile: catalog.validate_catalog(export_path=tmpfile.name) mail.attach('reporte_validacion_{}.xlsx'.format(node.catalog_id), tmpfile.read()) return mail
def federate_catalog(node, portal_url, apikey, task_id): task = FederationTask.objects.get(pk=task_id) catalog = get_catalog_from_node(node) catalog_id = node.catalog_id msg = f"Catálogo: {node.catalog_id}\n" if not catalog: msg += UNREACHABLE_CATALOG.format(node.catalog_id) FederationTask.info(task, msg) LOGGER.warning(msg) return msg catalog.generate_distribution_ids() catalog_report = catalog.validate_catalog( broken_links=TasksConfig.get_solo().federation_url_check, broken_links_threads=TasksConfig.get_solo().url_check_threads) valid, invalid, missing = sort_datasets_by_condition(node, catalog_report) try: harvested_ids, federation_errors = harvest_catalog_to_ckan( catalog, portal_url, apikey, catalog_id, list(valid), origin_tz=node.timezone, dst_tz=task.harvesting_node.timezone) msg += generate_task_log(catalog_report, catalog_id, invalid, missing, harvested_ids, federation_errors) FederationTask.info(task, msg) LOGGER.warning(msg) return msg except Exception as e: msg += TASK_ERROR.format(catalog_id, list(valid), e) FederationTask.info(task, msg) LOGGER.warning(msg) return msg
def get_catalog_from_node(node): try: url_check_timeout = TasksConfig.get_solo().url_check_timeout catalog = DataJson(node.catalog_url, catalog_format=node.catalog_format, verify_ssl=node.verify_ssl, url_check_timeout=url_check_timeout) return catalog except Exception: dictionary = json.loads(node.catalog) if dictionary: catalog = DataJson(dictionary) return catalog return None
def setUpTestData(cls): HarvestingNode.objects.create( name='aName', url='harvest_url', apikey='apikey', enabled=True) Node.objects.create(catalog_id='id1', catalog_url=cls.get_sample('full_data.json'), indexable=True) Node.objects.create(catalog_id='id2', catalog_url=cls.get_sample('minimum_data.json'), indexable=True) HarvestingNode.objects.create( catalog_id='idx1', name='indexador1', url=cls.get_sample('catalogo_justicia.json'), apikey='apikey', enabled=True) HarvestingNode.objects.create( catalog_id='idx2', name='indexador2', url=cls.get_sample('full_data.json'), apikey='apikey', enabled=True) task = IndicatorsGenerationTask.objects.create() cls.catalogs = load_catalogs(task, Node.objects.all()) # Quiero que los cargue por el path, no como url. Uso harvesting=False cls.indexing_catalogs = load_catalogs(task, HarvestingNode.objects.all()) central = DataJson(cls.get_sample('full_data.json')) cls.indicators, cls.network_indicators = \ DataJson().generate_catalogs_indicators(cls.catalogs, central_catalog=central, identifier_search=True, broken_links=True) cls.indexing_indicators, _ = \ DataJson().generate_catalogs_indicators(cls.indexing_catalogs, identifier_search=True, broken_links=True) config = TasksConfig.get_solo() config.indicators_url_check = True config.save() cls.dj = DataJson() with patch('monitoreo.apps.dashboard.indicators_tasks.CENTRAL', cls.get_sample('full_data.json')): call_command('indicadores')
def get_catalog_report(datajson): url_validation = TasksConfig.get_solo().federation_url_check return datajson.validate_catalog(broken_links=url_validation)
def setUpTestData(cls): cls.tasks_config = TasksConfig.get_solo() cls.node = Node.objects.create(catalog_url='http://test.catalog.com', indexable=True, catalog_id='test_catalog')