Esempio n. 1
0
 def setUp(self):
     ts_catalog = DataJson(self.get_sample('time_series_data.json'))
     full_catalog = DataJson(self.get_sample('full_data.json'))
     self.ts_dataset = ts_catalog.datasets[0]
     self.non_ts_datasets = full_catalog.datasets[0]
     self.ts_distribution = ts_catalog.distributions[1]
     self.non_ts_distribution = full_catalog.distributions[0]
def generate_indicators(task):
    data_json = DataJson()
    catalogs = load_catalogs(task, Node.objects.filter(indexable=True))
    try:
        central_node = CentralNode.objects.get()
        central_catalog = urljoin(central_node.node.url, 'data.json')
    except (CentralNode.DoesNotExist, AttributeError):
        central_catalog = CENTRAL
    indics, network_indics = data_json.generate_catalogs_indicators(
        catalogs, central_catalog, identifier_search=True)

    save_indicators(indics, task)
    save_network_indics(network_indics, 'RED', task)

    federator_catalogs = load_catalogs(
        task, HarvestingNode.objects.filter(enabled=True), harvesting=True)
    federator_indics, _ = data_json.generate_catalogs_indicators(
        federator_catalogs, identifier_search=True)

    save_indicators(federator_indics, task, harvesting_nodes=True)
    # Creo columnas default si no existen
    if not TableColumn.objects.count():
        init_columns()

    write_time_series_files.delay()
    zip_indicators_csv.delay()

    task.refresh_from_db()
    task.status = IndicatorsGenerationTask.FINISHED
    task.finished = timezone.localtime()
    task.save()
Esempio n. 3
0
 def test_get_dataset_lists_return_correct_ids(self):
     node1 = Node.objects.get(catalog_id='id1')
     datajson = DataJson(self.get_sample('full_data.json'))
     valid, _, _ = sort_datasets_by_condition(node1, datajson)
     self.assertSetEqual(
         {
             '99db6631-d1c9-470b-a73e-c62daa32c777',
             '99db6631-d1c9-470b-a73e-c62daa32c420'
         }, valid)
     dataset = Dataset.objects.get(
         catalog__identifier='id1',
         identifier='99db6631-d1c9-470b-a73e-c62daa32c777')
     dataset.identifier = 'new_identifier'
     dataset.save()
     dataset = datajson.get_dataset(
         identifier='99db6631-d1c9-470b-a73e-c62daa32c777')
     dataset['identifier'] = 'new_identifier'
     valid, _, _ = sort_datasets_by_condition(node1, datajson)
     self.assertSetEqual(
         {'new_identifier', '99db6631-d1c9-470b-a73e-c62daa32c420'}, valid)
     dataset = Dataset.objects.get(catalog__identifier='id1',
                                   identifier='new_identifier')
     dataset.indexable = False
     dataset.save()
     valid, _, _ = sort_datasets_by_condition(node1, datajson)
     self.assertSetEqual({'99db6631-d1c9-470b-a73e-c62daa32c420'}, valid)
Esempio n. 4
0
 def test_unindexable_datasets_dont_get_harvested(self, mock_harvest):
     Dataset.objects.all().update(indexable=False)
     mock_harvest.return_value = ([], {})
     federation_run()
     mock_harvest.assert_any_call(DataJson(
         self.get_sample('full_data.json')),
                                  'harvest_url',
                                  'apikey',
                                  'id1', [],
                                  origin_tz=DEFAULT_TIMEZONE,
                                  dst_tz=DEFAULT_TIMEZONE)
     mock_harvest.assert_any_call(DataJson(
         self.get_sample('minimum_data.json')),
                                  'harvest_url',
                                  'apikey',
                                  'id2', [],
                                  origin_tz=DEFAULT_TIMEZONE,
                                  dst_tz=DEFAULT_TIMEZONE)
     mock_harvest.assert_any_call(DataJson(
         self.get_sample('missing_dataset_title.json')),
                                  'harvest_url',
                                  'apikey',
                                  'id3', [],
                                  origin_tz=DEFAULT_TIMEZONE,
                                  dst_tz=DEFAULT_TIMEZONE)
    def setUpTestData(cls):
        # set mock env
        settings.ENV_TYPE = 'tst'

        config = DynamicEmailConfiguration.get_solo()
        config.from_email = '*****@*****.**'
        config.save()

        # set mock nodes
        cls.node1 = Node.objects.create(
            catalog_id='id1',
            catalog_url=cls.get_sample('several_assorted_errors.json'),
            indexable=True)
        cls.node2 = Node.objects.create(
            catalog_id='id2',
            catalog_url=cls.get_sample('full_data.json'),
            indexable=True)

        cls.node1.admins.create(username='******',
                                password='******',
                                email='*****@*****.**',
                                is_staff=False)
        cls.node2.admins.create(username='******',
                                password='******',
                                email='*****@*****.**',
                                is_staff=False)

        cls.report_task = ValidationReportTask.objects.create()

        cls.validation_report_generator = ValidationReportGenerator(
            cls.report_task)

        catalog = DataJson(cls.get_sample('several_assorted_errors.json'))
        cls.report = catalog.validate_catalog(only_errors=True)
Esempio n. 6
0
    def generate_email(self, node=None):
        if not node:
            # No genera mail de staff
            return None
        catalog = DataJson(node.catalog_url,
                           catalog_format=node.catalog_format)
        validation = catalog.validate_catalog(only_errors=True)
        validation_time = self._format_date(timezone.now())
        if validation['status'] == 'OK':
            msg = "Catálogo {} válido.".format(node.catalog_id)
            self.report_task.info(self.report_task, msg)
            return None
        context = {
            'validation_time': validation_time,
            'status': validation['status'],
            'catalog': validation['error']['catalog'],
            'dataset_list': validation['error']['dataset']
        }

        mail = self.render_templates(context)
        subject = u'[{}] Validacion de catálogo {}: {}'.format(
            settings.ENV_TYPE, node.catalog_id, validation_time)
        mail.subject = subject

        with NamedTemporaryFile(suffix='.xlsx') as tmpfile:
            catalog.validate_catalog(export_path=tmpfile.name)
            mail.attach('reporte_validacion_{}.xlsx'.format(node.catalog_id),
                        tmpfile.read())

        return mail
 def test_xlsx_write_missing_optional_fields_and_themes(self):
     with NamedTemporaryFile(suffix='.xlsx') as tempfile:
         catalog = DataJson(
             os.path.join(self.SAMPLES_DIR, "minimum_data.json"))
         catalog.to_xlsx(tempfile.name)
         written_datajson = DataJson(tempfile.name)
     written_dataset = written_datajson.datasets[0]
     written_distribution = written_datajson.distributions[0]
     self.assertTrue('theme' not in written_dataset)
     self.assertTrue('field' not in written_distribution)
Esempio n. 8
0
def get_distribution_metadata(resource_id, package_id):
    # Se importa 'datajson_actions' en la función para evitar dependencias circulares con 'config_controller'
    import ckanext.gobar_theme.lib.datajson_actions as datajson_actions
    json_dict = datajson_actions.get_data_json_contents()
    parser = HTMLParser()
    json_dict = parser.unescape(json_dict)
    json_dict = json.loads(json_dict)
    datajson = DataJson(json_dict)
    dist = datajson.get_distribution(resource_id)
    return dist
Esempio n. 9
0
 def test_get_dataset_does_not_return_invalid_datasets(self):
     node = Node.objects.get(catalog_id='id3')
     datajson = DataJson(self.get_sample('missing_dataset_title.json'))
     valid, invalid, _ = sort_datasets_by_condition(node, datajson)
     self.assertSetEqual(set(), valid)
     self.assertSetEqual({'99db6631-d1c9-470b-a73e-c62daa32c777'}, invalid)
     dataset = datajson.get_dataset(
         identifier='99db6631-d1c9-470b-a73e-c62daa32c777')
     dataset['title'] = 'aTitle'
     valid, invalid, _ = sort_datasets_by_condition(node, datajson)
     self.assertSetEqual({'99db6631-d1c9-470b-a73e-c62daa32c777'}, valid)
     self.assertSetEqual(set(), invalid)
Esempio n. 10
0
def get_catalog_from_node(node):
    try:
        catalog = DataJson(node.catalog_url,
                           catalog_format=node.catalog_format)
        return catalog

    except Exception:
        dictionary = json.loads(node.catalog)
        if dictionary:
            catalog = DataJson(dictionary)
            return catalog

        return None
def get_catalog_from_node(node):
    try:
        url_check_timeout = TasksConfig.get_solo().url_check_timeout
        catalog = DataJson(node.catalog_url,
                           catalog_format=node.catalog_format,
                           verify_ssl=node.verify_ssl,
                           url_check_timeout=url_check_timeout)
        return catalog

    except Exception:
        dictionary = json.loads(node.catalog)
        if dictionary:
            catalog = DataJson(dictionary)
            return catalog

        return None
Esempio n. 12
0
 def test_dataset_list_returns_empty_if_no_related_datasets(self):
     new_node = Node(catalog_id='id4',
                     catalog_url=self.get_sample('full_data.json'),
                     indexable=True)
     valid, _, _ = sort_datasets_by_condition(
         new_node, DataJson(self.get_sample('full_data.json')))
     self.assertSetEqual(set(), valid)
Esempio n. 13
0
 def test_get_dataset_does_not_return_missing_datasets(self):
     node = Node.objects.get(catalog_id='id1')
     datajson = DataJson(self.get_sample('full_data.json'))
     datajson.datasets.pop(0)
     valid, _, missing = sort_datasets_by_condition(node, datajson)
     self.assertSetEqual({'99db6631-d1c9-470b-a73e-c62daa32c420'}, valid)
     self.assertSetEqual({'99db6631-d1c9-470b-a73e-c62daa32c777'}, missing)
 def setUp(cls):
     ensure_dir_exists(cls.SAMPLES_DIR)
     ensure_dir_exists(cls.RESULTS_DIR)
     ensure_dir_exists(cls.TEMP_DIR)
     cls.dj = DataJson()
     cls.maxDiff = None
     cls.longMessage = True
Esempio n. 15
0
 def test_no_title_nor_identifier_catalog(self):
     catalog = DataJson(
         os.path.join(self.SAMPLES_DIR, "missing_catalog_title.json"))
     del catalog['identifier']
     indics = self.dj.generate_catalogs_indicators(catalog)[0][0]
     assert_equal(indics['title'], 'no-title')
     assert_equal(indics['identifier'], 'no-id')
Esempio n. 16
0
 def test_federation_validation_takes_value_from_config(
         self, mock_validation):
     node = Node.objects.get(catalog_id='id3')
     datajson = DataJson(self.get_sample('missing_dataset_title.json'))
     config = TasksConfig.get_solo()
     config.federation_url_check = False
     config.save()
     catalog_report = get_catalog_report(datajson)
     sort_datasets_by_condition(node, catalog_report)
     mock_validation.assert_called_with(broken_links=False)
Esempio n. 17
0
 def setUp(cls):
     cls.dj = DataJson(cls.get_sample("full_data.json"))
     cls.catalog = readers.read_catalog(
         cls.get_sample("full_data.json"))
     cls.maxDiff = None
     cls.longMessage = True
     cls.requests_mock = requests_mock.Mocker()
     cls.requests_mock.start()
     cls.requests_mock.get(requests_mock.ANY, real_http=True)
     cls.requests_mock.head(requests_mock.ANY, status_code=200)
Esempio n. 18
0
 def test_invalid_datasets_dont_get_harvested(self, mock_harvest):
     mock_harvest.return_value = ([], {})
     federation_run()
     mock_harvest.assert_any_call(DataJson(
         self.get_sample('missing_dataset_title.json')),
                                  'harvest_url',
                                  'apikey',
                                  'id3', [],
                                  origin_tz=DEFAULT_TIMEZONE,
                                  dst_tz=DEFAULT_TIMEZONE)
Esempio n. 19
0
 def test_indexable_datasets_get_harvested(self, mock_harvest):
     mock_harvest.return_value = ([], {})
     federation_run()
     mock_harvest.assert_any_call(DataJson(
         self.get_sample('minimum_data.json')),
                                  'harvest_url',
                                  'apikey',
                                  'id2',
                                  ['99db6631-d1c9-470b-a73e-c62daa32c777'],
                                  origin_tz=DEFAULT_TIMEZONE,
                                  dst_tz=DEFAULT_TIMEZONE)
    def test_read_written_xlsx_catalog(self):
        """read_catalog puede leer XLSX creado por write_xlsx_catalog"""
        original_catalog = DataJson(
            os.path.join(self.SAMPLES_DIR, "catalogo_justicia.json"))

        tmp_xlsx = os.path.join(self.TEMP_DIR, "xlsx_catalog.xlsx")
        pydatajson.writers.write_xlsx_catalog(original_catalog, tmp_xlsx)

        try:
            pydatajson.readers.read_xlsx_catalog(tmp_xlsx)
        except NonParseableCatalog:
            self.fail("No se pudo leer archivo XLSX")
Esempio n. 21
0
 def test_federation_run_receives_node_tz_as_origin_tz(self, mock_harvest):
     node = Node.objects.get(catalog_id='id2')
     node.timezone = "Africa/Abidjan"
     node.save()
     mock_harvest.return_value = ([], {})
     federation_run()
     mock_harvest.assert_any_call(DataJson(
         self.get_sample('minimum_data.json')),
                                  'harvest_url',
                                  'apikey',
                                  'id2',
                                  ['99db6631-d1c9-470b-a73e-c62daa32c777'],
                                  origin_tz="Africa/Abidjan",
                                  dst_tz=DEFAULT_TIMEZONE)
Esempio n. 22
0
 def test_federation_validation_is_false_by_default(self, mock_validation):
     node = Node.objects.get(catalog_id='id3')
     datajson = DataJson(self.get_sample('missing_dataset_title.json'))
     catalog_report = get_catalog_report(datajson)
     sort_datasets_by_condition(node, catalog_report)
     mock_validation.assert_called_with(broken_links=False)
    def test_read_write_both_formats_yields_the_same(self):
        for suffix in ['xlsx', 'json']:
            catalog = DataJson(
                os.path.join(self.SAMPLES_DIR, "catalogo_justicia." + suffix))
            catalog.to_json(os.path.join(self.TEMP_DIR, "saved_catalog.json"))
            catalog.to_xlsx(os.path.join(self.TEMP_DIR, "saved_catalog.xlsx"))
            catalog_json = DataJson(
                os.path.join(self.TEMP_DIR, "saved_catalog.xlsx"))
            catalog_xlsx = DataJson(
                os.path.join(self.TEMP_DIR, "saved_catalog.xlsx"))
            self.assertEqual(catalog_json, catalog_xlsx)

            # la llamada to_xlsx() genera los indices en el catalogo original
            # aplicarla a los catalogos generados debería dejarlos igual al
            # original
            catalog_xlsx.to_xlsx(os.path.join(self.TEMP_DIR, "otro.xlsx"))
            catalog_json.to_xlsx(os.path.join(self.TEMP_DIR, "otro.xlsx"))

            self.assertEqual(catalog_json, catalog)
            self.assertEqual(catalog_xlsx, catalog)
Esempio n. 24
0
 def setUpClass(cls):
     cls.expected_dj = DataJson(get_sample('full_data.json'))
     cls.dj = DataJson(read_ckan_catalog('full_data.json'))
Esempio n. 25
0
 def __init__(self, sample):
     self.data_json = DataJson(get_sample(sample))
Esempio n. 26
0
 def setUp(cls):
     cls.dj = DataJson(cls.get_sample("full_data.json"))
     cls.catalog = readers.read_catalog(cls.get_sample("full_data.json"))
     cls.maxDiff = None
     cls.longMessage = True