def setUp(self): ts_catalog = DataJson(self.get_sample('time_series_data.json')) full_catalog = DataJson(self.get_sample('full_data.json')) self.ts_dataset = ts_catalog.datasets[0] self.non_ts_datasets = full_catalog.datasets[0] self.ts_distribution = ts_catalog.distributions[1] self.non_ts_distribution = full_catalog.distributions[0]
def generate_indicators(task): data_json = DataJson() catalogs = load_catalogs(task, Node.objects.filter(indexable=True)) try: central_node = CentralNode.objects.get() central_catalog = urljoin(central_node.node.url, 'data.json') except (CentralNode.DoesNotExist, AttributeError): central_catalog = CENTRAL indics, network_indics = data_json.generate_catalogs_indicators( catalogs, central_catalog, identifier_search=True) save_indicators(indics, task) save_network_indics(network_indics, 'RED', task) federator_catalogs = load_catalogs( task, HarvestingNode.objects.filter(enabled=True), harvesting=True) federator_indics, _ = data_json.generate_catalogs_indicators( federator_catalogs, identifier_search=True) save_indicators(federator_indics, task, harvesting_nodes=True) # Creo columnas default si no existen if not TableColumn.objects.count(): init_columns() write_time_series_files.delay() zip_indicators_csv.delay() task.refresh_from_db() task.status = IndicatorsGenerationTask.FINISHED task.finished = timezone.localtime() task.save()
def test_get_dataset_lists_return_correct_ids(self): node1 = Node.objects.get(catalog_id='id1') datajson = DataJson(self.get_sample('full_data.json')) valid, _, _ = sort_datasets_by_condition(node1, datajson) self.assertSetEqual( { '99db6631-d1c9-470b-a73e-c62daa32c777', '99db6631-d1c9-470b-a73e-c62daa32c420' }, valid) dataset = Dataset.objects.get( catalog__identifier='id1', identifier='99db6631-d1c9-470b-a73e-c62daa32c777') dataset.identifier = 'new_identifier' dataset.save() dataset = datajson.get_dataset( identifier='99db6631-d1c9-470b-a73e-c62daa32c777') dataset['identifier'] = 'new_identifier' valid, _, _ = sort_datasets_by_condition(node1, datajson) self.assertSetEqual( {'new_identifier', '99db6631-d1c9-470b-a73e-c62daa32c420'}, valid) dataset = Dataset.objects.get(catalog__identifier='id1', identifier='new_identifier') dataset.indexable = False dataset.save() valid, _, _ = sort_datasets_by_condition(node1, datajson) self.assertSetEqual({'99db6631-d1c9-470b-a73e-c62daa32c420'}, valid)
def test_unindexable_datasets_dont_get_harvested(self, mock_harvest): Dataset.objects.all().update(indexable=False) mock_harvest.return_value = ([], {}) federation_run() mock_harvest.assert_any_call(DataJson( self.get_sample('full_data.json')), 'harvest_url', 'apikey', 'id1', [], origin_tz=DEFAULT_TIMEZONE, dst_tz=DEFAULT_TIMEZONE) mock_harvest.assert_any_call(DataJson( self.get_sample('minimum_data.json')), 'harvest_url', 'apikey', 'id2', [], origin_tz=DEFAULT_TIMEZONE, dst_tz=DEFAULT_TIMEZONE) mock_harvest.assert_any_call(DataJson( self.get_sample('missing_dataset_title.json')), 'harvest_url', 'apikey', 'id3', [], origin_tz=DEFAULT_TIMEZONE, dst_tz=DEFAULT_TIMEZONE)
def setUpTestData(cls): # set mock env settings.ENV_TYPE = 'tst' config = DynamicEmailConfiguration.get_solo() config.from_email = '*****@*****.**' config.save() # set mock nodes cls.node1 = Node.objects.create( catalog_id='id1', catalog_url=cls.get_sample('several_assorted_errors.json'), indexable=True) cls.node2 = Node.objects.create( catalog_id='id2', catalog_url=cls.get_sample('full_data.json'), indexable=True) cls.node1.admins.create(username='******', password='******', email='*****@*****.**', is_staff=False) cls.node2.admins.create(username='******', password='******', email='*****@*****.**', is_staff=False) cls.report_task = ValidationReportTask.objects.create() cls.validation_report_generator = ValidationReportGenerator( cls.report_task) catalog = DataJson(cls.get_sample('several_assorted_errors.json')) cls.report = catalog.validate_catalog(only_errors=True)
def generate_email(self, node=None): if not node: # No genera mail de staff return None catalog = DataJson(node.catalog_url, catalog_format=node.catalog_format) validation = catalog.validate_catalog(only_errors=True) validation_time = self._format_date(timezone.now()) if validation['status'] == 'OK': msg = "Catálogo {} válido.".format(node.catalog_id) self.report_task.info(self.report_task, msg) return None context = { 'validation_time': validation_time, 'status': validation['status'], 'catalog': validation['error']['catalog'], 'dataset_list': validation['error']['dataset'] } mail = self.render_templates(context) subject = u'[{}] Validacion de catálogo {}: {}'.format( settings.ENV_TYPE, node.catalog_id, validation_time) mail.subject = subject with NamedTemporaryFile(suffix='.xlsx') as tmpfile: catalog.validate_catalog(export_path=tmpfile.name) mail.attach('reporte_validacion_{}.xlsx'.format(node.catalog_id), tmpfile.read()) return mail
def test_xlsx_write_missing_optional_fields_and_themes(self): with NamedTemporaryFile(suffix='.xlsx') as tempfile: catalog = DataJson( os.path.join(self.SAMPLES_DIR, "minimum_data.json")) catalog.to_xlsx(tempfile.name) written_datajson = DataJson(tempfile.name) written_dataset = written_datajson.datasets[0] written_distribution = written_datajson.distributions[0] self.assertTrue('theme' not in written_dataset) self.assertTrue('field' not in written_distribution)
def get_distribution_metadata(resource_id, package_id): # Se importa 'datajson_actions' en la función para evitar dependencias circulares con 'config_controller' import ckanext.gobar_theme.lib.datajson_actions as datajson_actions json_dict = datajson_actions.get_data_json_contents() parser = HTMLParser() json_dict = parser.unescape(json_dict) json_dict = json.loads(json_dict) datajson = DataJson(json_dict) dist = datajson.get_distribution(resource_id) return dist
def test_get_dataset_does_not_return_invalid_datasets(self): node = Node.objects.get(catalog_id='id3') datajson = DataJson(self.get_sample('missing_dataset_title.json')) valid, invalid, _ = sort_datasets_by_condition(node, datajson) self.assertSetEqual(set(), valid) self.assertSetEqual({'99db6631-d1c9-470b-a73e-c62daa32c777'}, invalid) dataset = datajson.get_dataset( identifier='99db6631-d1c9-470b-a73e-c62daa32c777') dataset['title'] = 'aTitle' valid, invalid, _ = sort_datasets_by_condition(node, datajson) self.assertSetEqual({'99db6631-d1c9-470b-a73e-c62daa32c777'}, valid) self.assertSetEqual(set(), invalid)
def get_catalog_from_node(node): try: catalog = DataJson(node.catalog_url, catalog_format=node.catalog_format) return catalog except Exception: dictionary = json.loads(node.catalog) if dictionary: catalog = DataJson(dictionary) return catalog return None
def get_catalog_from_node(node): try: url_check_timeout = TasksConfig.get_solo().url_check_timeout catalog = DataJson(node.catalog_url, catalog_format=node.catalog_format, verify_ssl=node.verify_ssl, url_check_timeout=url_check_timeout) return catalog except Exception: dictionary = json.loads(node.catalog) if dictionary: catalog = DataJson(dictionary) return catalog return None
def test_dataset_list_returns_empty_if_no_related_datasets(self): new_node = Node(catalog_id='id4', catalog_url=self.get_sample('full_data.json'), indexable=True) valid, _, _ = sort_datasets_by_condition( new_node, DataJson(self.get_sample('full_data.json'))) self.assertSetEqual(set(), valid)
def test_get_dataset_does_not_return_missing_datasets(self): node = Node.objects.get(catalog_id='id1') datajson = DataJson(self.get_sample('full_data.json')) datajson.datasets.pop(0) valid, _, missing = sort_datasets_by_condition(node, datajson) self.assertSetEqual({'99db6631-d1c9-470b-a73e-c62daa32c420'}, valid) self.assertSetEqual({'99db6631-d1c9-470b-a73e-c62daa32c777'}, missing)
def setUp(cls): ensure_dir_exists(cls.SAMPLES_DIR) ensure_dir_exists(cls.RESULTS_DIR) ensure_dir_exists(cls.TEMP_DIR) cls.dj = DataJson() cls.maxDiff = None cls.longMessage = True
def test_no_title_nor_identifier_catalog(self): catalog = DataJson( os.path.join(self.SAMPLES_DIR, "missing_catalog_title.json")) del catalog['identifier'] indics = self.dj.generate_catalogs_indicators(catalog)[0][0] assert_equal(indics['title'], 'no-title') assert_equal(indics['identifier'], 'no-id')
def test_federation_validation_takes_value_from_config( self, mock_validation): node = Node.objects.get(catalog_id='id3') datajson = DataJson(self.get_sample('missing_dataset_title.json')) config = TasksConfig.get_solo() config.federation_url_check = False config.save() catalog_report = get_catalog_report(datajson) sort_datasets_by_condition(node, catalog_report) mock_validation.assert_called_with(broken_links=False)
def setUp(cls): cls.dj = DataJson(cls.get_sample("full_data.json")) cls.catalog = readers.read_catalog( cls.get_sample("full_data.json")) cls.maxDiff = None cls.longMessage = True cls.requests_mock = requests_mock.Mocker() cls.requests_mock.start() cls.requests_mock.get(requests_mock.ANY, real_http=True) cls.requests_mock.head(requests_mock.ANY, status_code=200)
def test_invalid_datasets_dont_get_harvested(self, mock_harvest): mock_harvest.return_value = ([], {}) federation_run() mock_harvest.assert_any_call(DataJson( self.get_sample('missing_dataset_title.json')), 'harvest_url', 'apikey', 'id3', [], origin_tz=DEFAULT_TIMEZONE, dst_tz=DEFAULT_TIMEZONE)
def test_indexable_datasets_get_harvested(self, mock_harvest): mock_harvest.return_value = ([], {}) federation_run() mock_harvest.assert_any_call(DataJson( self.get_sample('minimum_data.json')), 'harvest_url', 'apikey', 'id2', ['99db6631-d1c9-470b-a73e-c62daa32c777'], origin_tz=DEFAULT_TIMEZONE, dst_tz=DEFAULT_TIMEZONE)
def test_read_written_xlsx_catalog(self): """read_catalog puede leer XLSX creado por write_xlsx_catalog""" original_catalog = DataJson( os.path.join(self.SAMPLES_DIR, "catalogo_justicia.json")) tmp_xlsx = os.path.join(self.TEMP_DIR, "xlsx_catalog.xlsx") pydatajson.writers.write_xlsx_catalog(original_catalog, tmp_xlsx) try: pydatajson.readers.read_xlsx_catalog(tmp_xlsx) except NonParseableCatalog: self.fail("No se pudo leer archivo XLSX")
def test_federation_run_receives_node_tz_as_origin_tz(self, mock_harvest): node = Node.objects.get(catalog_id='id2') node.timezone = "Africa/Abidjan" node.save() mock_harvest.return_value = ([], {}) federation_run() mock_harvest.assert_any_call(DataJson( self.get_sample('minimum_data.json')), 'harvest_url', 'apikey', 'id2', ['99db6631-d1c9-470b-a73e-c62daa32c777'], origin_tz="Africa/Abidjan", dst_tz=DEFAULT_TIMEZONE)
def test_federation_validation_is_false_by_default(self, mock_validation): node = Node.objects.get(catalog_id='id3') datajson = DataJson(self.get_sample('missing_dataset_title.json')) catalog_report = get_catalog_report(datajson) sort_datasets_by_condition(node, catalog_report) mock_validation.assert_called_with(broken_links=False)
def test_read_write_both_formats_yields_the_same(self): for suffix in ['xlsx', 'json']: catalog = DataJson( os.path.join(self.SAMPLES_DIR, "catalogo_justicia." + suffix)) catalog.to_json(os.path.join(self.TEMP_DIR, "saved_catalog.json")) catalog.to_xlsx(os.path.join(self.TEMP_DIR, "saved_catalog.xlsx")) catalog_json = DataJson( os.path.join(self.TEMP_DIR, "saved_catalog.xlsx")) catalog_xlsx = DataJson( os.path.join(self.TEMP_DIR, "saved_catalog.xlsx")) self.assertEqual(catalog_json, catalog_xlsx) # la llamada to_xlsx() genera los indices en el catalogo original # aplicarla a los catalogos generados debería dejarlos igual al # original catalog_xlsx.to_xlsx(os.path.join(self.TEMP_DIR, "otro.xlsx")) catalog_json.to_xlsx(os.path.join(self.TEMP_DIR, "otro.xlsx")) self.assertEqual(catalog_json, catalog) self.assertEqual(catalog_xlsx, catalog)
def setUpClass(cls): cls.expected_dj = DataJson(get_sample('full_data.json')) cls.dj = DataJson(read_ckan_catalog('full_data.json'))
def __init__(self, sample): self.data_json = DataJson(get_sample(sample))
def setUp(cls): cls.dj = DataJson(cls.get_sample("full_data.json")) cls.catalog = readers.read_catalog(cls.get_sample("full_data.json")) cls.maxDiff = None cls.longMessage = True