def send_validations(node=None):
    validation_task = models.ValidationReportTask.objects.create()
    timeout = TasksConfig.get_solo().validation_timeout
    enqueue_job_with_timeout('reports',
                             validation_run,
                             timeout,
                             args=(validation_task, ),
                             kwargs={'node': node})
 def test_federation_validation_takes_value_from_config(
         self, mock_validation):
     node = Node.objects.get(catalog_id='id3')
     datajson = DataJson(self.get_sample('missing_dataset_title.json'))
     config = TasksConfig.get_solo()
     config.federation_url_check = False
     config.save()
     catalog_report = get_catalog_report(datajson)
     sort_datasets_by_condition(node, catalog_report)
     mock_validation.assert_called_with(broken_links=False)
Example #3
0
def indicators_run(_node=None):
    # El framework de tareas de django_datajsonar pasa para todos los casos un
    # nodo por parámetro. Esta tarea no hace uso de ese parámtro por el
    # momento
    task = IndicatorsGenerationTask.objects.create()
    timeout = TasksConfig.get_solo().indicators_timeout
    enqueue_job_with_timeout('indicators',
                             generate_indicators,
                             timeout,
                             args=(task, ))
Example #4
0
def generate_indicators(task):
    url_check_timeout = TasksConfig.get_solo().url_check_timeout
    data_json = DataJson(url_check_timeout=url_check_timeout)
    catalogs = load_catalogs(task, Node.objects.filter(indexable=True))
    validate_urls = TasksConfig.get_solo().indicators_url_check
    urls_check_threads = TasksConfig.get_solo().url_check_threads
    try:
        central_node = CentralNode.objects.get()
        central_catalog = urljoin(central_node.node.url, 'data.json')
    except (CentralNode.DoesNotExist, AttributeError):
        central_catalog = CENTRAL
    indics, network_indics = data_json.generate_catalogs_indicators(
        catalogs,
        central_catalog,
        identifier_search=True,
        broken_links=validate_urls,
        broken_links_threads=urls_check_threads)

    save_indicators(indics, task)
    save_network_indics(network_indics, 'RED', task)

    federator_catalogs = load_catalogs(
        task, HarvestingNode.objects.filter(enabled=True), harvesting=True)
    federator_indics, _ = data_json.generate_catalogs_indicators(
        federator_catalogs,
        identifier_search=True,
        broken_links=validate_urls,
        broken_links_threads=urls_check_threads)

    save_indicators(federator_indics, task, harvesting_nodes=True)
    # Creo columnas default si no existen
    if not TableColumn.objects.count():
        init_columns()

    write_time_series_files.delay()
    zip_indicators_csv.delay()

    task.refresh_from_db()
    task.status = IndicatorsGenerationTask.FINISHED
    task.finished = timezone.localtime()
    task.save()
Example #5
0
    def generate_email(self, node=None):
        if not node:
            # No genera mail de staff
            return None
        url_check_timeout = TasksConfig.get_solo().url_check_timeout
        catalog = DataJson(node.catalog_url,
                           catalog_format=node.catalog_format,
                           verify_ssl=node.verify_ssl,
                           url_check_timeout=url_check_timeout)
        validate_urls = TasksConfig.get_solo().get_validation_config_for_node(
            node)
        url_check_threads = TasksConfig.get_solo().url_check_threads
        validation = catalog.validate_catalog(
            only_errors=True,
            broken_links=validate_urls,
            broken_links_threads=url_check_threads)
        validation_time = self._format_date(timezone.now())
        if validation['status'] == 'OK':
            msg = "Catálogo {} válido.".format(node.catalog_id)
            self.report_task.info(self.report_task, msg)
            return None
        context = {
            'validation_time': validation_time,
            'status': validation['status'],
            'catalog': validation['error']['catalog'],
            'dataset_list': validation['error']['dataset']
        }

        mail = self.render_templates(context)
        subject = '[{}] Validacion de catálogo {}: {}'.format(
            settings.ENV_TYPE, node.catalog_id, validation_time)
        mail.subject = subject

        with NamedTemporaryFile(suffix='.xlsx') as tmpfile:
            catalog.validate_catalog(export_path=tmpfile.name)
            mail.attach('reporte_validacion_{}.xlsx'.format(node.catalog_id),
                        tmpfile.read())

        return mail
def federate_catalog(node, portal_url, apikey, task_id):
    task = FederationTask.objects.get(pk=task_id)
    catalog = get_catalog_from_node(node)
    catalog_id = node.catalog_id
    msg = f"Catálogo: {node.catalog_id}\n"
    if not catalog:
        msg += UNREACHABLE_CATALOG.format(node.catalog_id)
        FederationTask.info(task, msg)
        LOGGER.warning(msg)
        return msg
    catalog.generate_distribution_ids()
    catalog_report = catalog.validate_catalog(
        broken_links=TasksConfig.get_solo().federation_url_check,
        broken_links_threads=TasksConfig.get_solo().url_check_threads)
    valid, invalid, missing = sort_datasets_by_condition(node, catalog_report)

    try:
        harvested_ids, federation_errors = harvest_catalog_to_ckan(
            catalog,
            portal_url,
            apikey,
            catalog_id,
            list(valid),
            origin_tz=node.timezone,
            dst_tz=task.harvesting_node.timezone)
        msg += generate_task_log(catalog_report, catalog_id, invalid, missing,
                                 harvested_ids, federation_errors)
        FederationTask.info(task, msg)
        LOGGER.warning(msg)
        return msg

    except Exception as e:
        msg += TASK_ERROR.format(catalog_id, list(valid), e)
        FederationTask.info(task, msg)
        LOGGER.warning(msg)
        return msg
def get_catalog_from_node(node):
    try:
        url_check_timeout = TasksConfig.get_solo().url_check_timeout
        catalog = DataJson(node.catalog_url,
                           catalog_format=node.catalog_format,
                           verify_ssl=node.verify_ssl,
                           url_check_timeout=url_check_timeout)
        return catalog

    except Exception:
        dictionary = json.loads(node.catalog)
        if dictionary:
            catalog = DataJson(dictionary)
            return catalog

        return None
    def setUpTestData(cls):
        HarvestingNode.objects.create(
            name='aName', url='harvest_url', apikey='apikey', enabled=True)
        Node.objects.create(catalog_id='id1',
                            catalog_url=cls.get_sample('full_data.json'),
                            indexable=True)
        Node.objects.create(catalog_id='id2',
                            catalog_url=cls.get_sample('minimum_data.json'),
                            indexable=True)
        HarvestingNode.objects.create(
            catalog_id='idx1',
            name='indexador1',
            url=cls.get_sample('catalogo_justicia.json'),
            apikey='apikey',
            enabled=True)
        HarvestingNode.objects.create(
            catalog_id='idx2',
            name='indexador2',
            url=cls.get_sample('full_data.json'),
            apikey='apikey',
            enabled=True)
        task = IndicatorsGenerationTask.objects.create()
        cls.catalogs = load_catalogs(task, Node.objects.all())
        # Quiero que los cargue por el path, no como url. Uso harvesting=False
        cls.indexing_catalogs = load_catalogs(task,
                                              HarvestingNode.objects.all())
        central = DataJson(cls.get_sample('full_data.json'))
        cls.indicators, cls.network_indicators = \
            DataJson().generate_catalogs_indicators(cls.catalogs,
                                                    central_catalog=central,
                                                    identifier_search=True,
                                                    broken_links=True)
        cls.indexing_indicators, _ = \
            DataJson().generate_catalogs_indicators(cls.indexing_catalogs,
                                                    identifier_search=True,
                                                    broken_links=True)
        config = TasksConfig.get_solo()
        config.indicators_url_check = True
        config.save()

        cls.dj = DataJson()
        with patch('monitoreo.apps.dashboard.indicators_tasks.CENTRAL',
                   cls.get_sample('full_data.json')):
            call_command('indicadores')
def get_catalog_report(datajson):
    url_validation = TasksConfig.get_solo().federation_url_check
    return datajson.validate_catalog(broken_links=url_validation)
Example #10
0
 def setUpTestData(cls):
     cls.tasks_config = TasksConfig.get_solo()
     cls.node = Node.objects.create(catalog_url='http://test.catalog.com',
                                    indexable=True,
                                    catalog_id='test_catalog')