Example #1
0
    def test_multiple_indices_returns_present_if_distribution_is_presesnt(self):
        self.distribution.field_set \
            .create(metadata=json.dumps({constants.SPECIAL_TYPE: constants.TIME_INDEX}), present=False)
        index = self.distribution.field_set \
            .create(metadata=json.dumps({constants.SPECIAL_TYPE: constants.TIME_INDEX}), present=True)

        self.assertEqual(DistributionRepository(self.distribution).get_time_index_series(), index)
 def error_log(self, node=None):
     errored_distributions = DistributionRepository.get_all_errored()
     if node:
         errored_distributions = errored_distributions.filter(
             dataset__catalog__identifier=node.catalog_id)
     errors = errored_distributions.values_list('error_msg', flat=True)
     return "\n".join(errors)
Example #3
0
    def test_multiple_indices_returns_last_if_distribution_is_not_present(self):
        self.distribution.field_set \
            .create(metadata=json.dumps({constants.SPECIAL_TYPE: constants.TIME_INDEX}), present=False, title='serie_nopresent_1')
        last = self.distribution.field_set \
            .create(metadata=json.dumps({constants.SPECIAL_TYPE: constants.TIME_INDEX}), present=False, title='serie_nopresent_2')

        self.distribution.present = False
        self.assertEqual(DistributionRepository(self.distribution).get_time_index_series(), last)
Example #4
0
 def update_distribution_indexation_metadata(self, distribution):
     time_index = DistributionRepository(distribution).get_time_index_series()
     for field in SeriesRepository.get_present_series(distribution=distribution).exclude(id=time_index.id):
         field.enhanced_meta.update_or_create(key=meta_keys.AVAILABLE, value='true')
     # Cálculo de metadatos adicionales sobre cada serie
     distribution.enhanced_meta.update_or_create(key=meta_keys.PERIODICITY,
                                                 defaults={
                                                     'value': get_distribution_time_index_periodicity(time_index)})
    def test_get_errored_distributions(self):
        parse_catalog('test_catalog',
                      os.path.join(SAMPLES_DIR, 'test_catalog.json'))
        Distribution.objects.filter(dataset__catalog__identifier='test_catalog'). \
            update(error=True, error_msg="Error!")

        self.assertEqual(
            DistributionRepository.get_all_errored().first().error_msg,
            "Error!")
    def test_get_time_index(self):
        time_index_field = Mock(metadata=json.dumps(
            {constants.SPECIAL_TYPE: constants.TIME_INDEX}))
        distribution = Mock()
        distribution.field_set.all.return_value = [time_index_field]

        self.assertEqual(
            DistributionRepository(distribution).get_time_index_series(),
            time_index_field)
    def test_get_node(self):
        distribution = Mock()
        distribution.dataset.catalog.identifier = 'test_node'

        node = Node(catalog_id='test_node')
        with patch(
                'series_tiempo_ar_api.libs.datajsonar_repositories.distribution_repository.Node'
        ) as fake_node:
            fake_node.objects.get.return_value = node
            self.assertTrue(
                fake_node.objects.get.called_with(catalog_id='test_node'))
            self.assertEqual(
                DistributionRepository(distribution).get_node(), node)
    def test_read_csv_as_dataframe(self):
        time_index_title = 'indice_tiempo'
        time_index_field = Mock(metadata=json.dumps(
            {constants.SPECIAL_TYPE: constants.TIME_INDEX}),
                                title=time_index_title)

        distribution = Mock()
        distribution.field_set.all.return_value = [time_index_field]

        csv_reader = Mock()
        DistributionRepository(
            distribution,
            csv_reader=csv_reader).read_csv_as_time_series_dataframe()
        csv_reader.assert_called_with(distribution, time_index_title)
 def write_distribution(self, distribution: Distribution,
                        writer: csv.writer):
     # noinspection PyBroadException
     try:
         fields = distribution.field_set.all()
         fields = {field.title: field.identifier for field in fields}
         periodicity = meta_keys.get(distribution, meta_keys.PERIODICITY)
         index_col = DistributionRepository(
             distribution).get_time_index_series().title
         df = DistributionCsvReader(distribution, index_col).read()
         df.apply(self.write_serie, args=(periodicity, fields, writer))
     except Exception as e:
         msg = f'[{self.tag} Error en la distribución {distribution.identifier}: {e.__class__}: {e}'
         GenerateDumpTask.info(self.task, msg)
         logger.warning(msg)
Example #10
0
    def generate_es_actions(self, distribution):
        time_index = DistributionRepository(distribution).get_time_index_series()
        df = init_df(distribution, time_index)

        if not df.columns.any():
            logger.warning(strings.NO_SERIES,
                           distribution.identifier,
                           distribution.dataset.catalog.identifier)
            return []

        es_actions = [process_column(df[col], self.index_name) for col in list(df.columns)]

        # List flatten: si el resultado son múltiples listas las junto en una sola
        actions = reduce(lambda x, y: x + y, es_actions) if isinstance(es_actions[0], list) else es_actions
        self.add_catalog_keyword(actions, distribution)
        return actions
 def generate_context(self, node):
     distribution_errors = DistributionRepository.get_all_errored(
     ).order_by('dataset__catalog__identifier', 'identifier')
     if node:
         distribution_errors = distribution_errors.filter(
             dataset__catalog__identifier=node.catalog_id)
     context = {
         'distribution_errors': distribution_errors,
         'finish_time': self._format_date(self.task.finished),
         'queries': self.get_queries(),
         'node': node,
     }
     context.update({
         indicator: self._get_indicator_value(indicator, node=node)
         for indicator, _ in Indicator.TYPE_CHOICES
     })
     return context
Example #12
0
def update_distribution_metadata(changed, distribution_model):
    time_index = DistributionRepository(
        distribution_model).get_time_index_series()
    df = init_df(distribution_model, time_index)

    periodicity = get_distribution_time_index_periodicity(time_index)
    new_metadata = []
    metas_to_delete = []
    field_content_type = ContentType.objects.get_for_model(Field)
    for serie in list(df.columns):
        meta = calculate_enhanced_meta(df[serie], periodicity)

        field = distribution_model.field_set.get(identifier=serie,
                                                 present=True)
        for meta_key, value in meta.items():
            new_metadata.append(
                Metadata(content_type=field_content_type,
                         object_id=field.id,
                         key=meta_key,
                         value=value))

        metas_to_delete.extend(
            Metadata.objects.filter(object_id=field.id,
                                    key__in=list(meta.keys())).values_list(
                                        'id', flat=True))
    with transaction.atomic():
        Metadata.objects.filter(id__in=metas_to_delete).delete()
        Metadata.objects.bulk_create(new_metadata)

    distribution_model.enhanced_meta.update_or_create(
        key=meta_keys.LAST_HASH,
        defaults={'value': distribution_model.data_hash})
    distribution_model.enhanced_meta.update_or_create(
        key=meta_keys.CHANGED, defaults={'value': str(changed)})
    update_popularity_metadata(distribution_model)
    remove_duplicated_fields(distribution_model)
Example #13
0
 def test_get_data_json(self):
     data_json = DistributionRepository(self.distribution).get_data_json()
     self.assertTrue(data_json.get_distributions())
 def test_get_time_index_none_exists(self):
     distribution = Mock()
     distribution.field_set.all.return_value = []
     DistributionRepository(distribution).get_time_index_series()
Example #15
0
    def test_get_time_index(self):
        time_index_field = self.distribution.field_set \
            .create(metadata=json.dumps({constants.SPECIAL_TYPE: constants.TIME_INDEX}))

        self.assertEqual(DistributionRepository(self.distribution).get_time_index_series(), time_index_field)
 def test_get_data_json(self, repository, fake_node):
     distribution = Mock()
     node = Node(catalog_id='test_node')
     fake_node.objects.get.return_value = node
     DistributionRepository(distribution).get_data_json()
     self.assertTrue(repository.called_with(node))
Example #17
0
 def test_get_time_index_none_exists(self):
     DistributionRepository(self.distribution).get_time_index_series()
Example #18
0
 def test_get_node(self):
     self.assertEqual(DistributionRepository(self.distribution).get_node(), self.node)
Example #19
0
    def test_non_present_time_index(self):
        self.distribution.field_set \
            .create(metadata=json.dumps({constants.SPECIAL_TYPE: constants.TIME_INDEX}), present=False)

        DistributionRepository(self.distribution).get_time_index_series()
Example #20
0
    def test_get_errored_distributions_is_empty_if_all_ok(self):
        parse_catalog('test_catalog', os.path.join(SAMPLES_DIR, 'test_catalog.json'))

        self.assertFalse(DistributionRepository.get_all_errored())
Example #21
0
 def test_read_csv_as_dataframe(self):
     parse_catalog('test_catalog', os.path.join(SAMPLES_DIR, 'test_catalog.json'))
     distribution = Distribution.objects.last()
     df = DistributionRepository(distribution).read_csv_as_time_series_dataframe()
     self.assertTrue(list(df.columns))