Esempio n. 1
0
    def test_first_time_distribution_indexable(self, *_):
        read_datajson(self.task, whitelist=True, read_local=True)
        index_catalog(self.node, self.mgmt_task, read_local=True, )

        distribution = Distribution.objects.get(identifier='212.1')

        self.assertEqual(distribution.enhanced_meta.get(key=meta_keys.CHANGED).value, 'True')
    def test_format_is_passed_to_data_json(self, data_json, *_):
        read_datajson(self.task, whitelist=True)
        self.node.catalog_format = 'xlsx'
        index_catalog(self.node, self.mgmt_task)

        self.assertEqual(data_json.call_args[1]['catalog_format'],
                         self.node.catalog_format)
Esempio n. 3
0
    def _index(self,
               catalog_id,
               catalog_url,
               periodicity='R/P1D',
               set_availables=True,
               set_error=False,
               set_present=True):
        node = Node.objects.create(
            catalog_id=catalog_id,
            catalog_url=os.path.join(SAMPLES_DIR, catalog_url),
            indexable=True,
        )

        read_datajson(self.task, whitelist=True, read_local=True)
        if set_availables:
            for field in datajsonar_Field.objects.all():
                field.enhanced_meta.create(key=meta_keys.AVAILABLE,
                                           value='true')
                field.enhanced_meta.create(key=meta_keys.HITS_90_DAYS,
                                           value='0')
                field.enhanced_meta.create(key=meta_keys.PERIODICITY,
                                           value=periodicity)

        datajsonar_Field.objects.update(error=set_error, present=set_present)

        index_ok = CatalogMetadataIndexer(node, self.meta_task,
                                          self.fake_index._name).index()
        if index_ok:
            connections.get_connection().indices.forcemerge()
        return index_ok
Esempio n. 4
0
    def test_error_distribution_logs(self, *_):
        catalog = os.path.join(SAMPLES_DIR, 'distribution_missing_downloadurl.json')
        self.node.catalog_url = catalog
        self.node.save()
        read_datajson(self.task, whitelist=True, read_local=True)
        index_catalog(self.node, self.mgmt_task, read_local=True)

        self.assertGreater(len(ReadDataJsonTask.objects.get(id=self.task.id).logs), 10)
Esempio n. 5
0
    def test_index_same_series_different_catalogs(self, *_):
        read_datajson(self.task, whitelist=True, read_local=True)
        index_catalog(self.node, self.mgmt_task, read_local=True)
        read_datajson(self.task, whitelist=True, read_local=True)
        index_catalog(self.node, self.mgmt_task, read_local=True)

        count = Field.objects.filter(identifier='212.1_PSCIOS_ERN_0_0_25').count()

        self.assertEqual(count, 1)
    def handle(self, *args, **options):
        if ReadDataJsonTask.objects.filter(status=ReadDataJsonTask.RUNNING):
            logger.info(u'Ya está corriendo una indexación')
            return

        task = ReadDataJsonTask()
        task.save()

        read_datajson(task, whitelist=options['whitelist'])
Esempio n. 7
0
    def test_dont_index_same_distribution_twice(self, *_):
        read_datajson(self.task, whitelist=True, read_local=True)
        index_catalog(self.node, self.mgmt_task, read_local=True)
        read_datajson(self.task, whitelist=True, read_local=True)
        index_catalog(self.node, self.mgmt_task, read_local=True)

        distribution = Distribution.objects.get(identifier='212.1')

        # La distribucion es marcada como no indexable hasta que cambien sus datos
        self.assertEqual(distribution.enhanced_meta.get(key=meta_keys.CHANGED).value, 'False')
 def test_read(self):
     identifier = 'test_id'
     Node(catalog_id=identifier,
          catalog_url=os.path.join(dir_path, 'sample_data.json'),
          indexable=True).save()
     task = ReadDataJsonTask()
     task.save()
     read_datajson(task, whitelist=True)
     self.assertTrue(
         Field.objects.filter(
             distribution__dataset__catalog__identifier=identifier))
    def test_custom_validation_options(self, *_):
        # Fallarán todas las validaciones
        config = DistributionValidatorConfig.get_solo()
        config.max_field_title_len = 0
        config.save()

        read_datajson(self.task, whitelist=True)
        index_catalog(self.node, self.mgmt_task)

        distribution = Distribution.objects.get(identifier='212.1')
        self.assertTrue(distribution.error)
Esempio n. 10
0
    def test_read_datajson_one_node_only_calls_task_for_that_node(
            self, index_catalog):
        Node(catalog_id='one_catalog',
             catalog_url='http://one_url.com',
             indexable=True).save()
        node = Node.objects.create(catalog_id='other_catalog',
                                   catalog_url='http://other_url.com',
                                   indexable=True)

        task = ReadDataJsonTask.objects.create(node=node)
        read_datajson(task)
        self.assertEqual(index_catalog.delay.call_count, 1)
Esempio n. 11
0
    def test_read_datajson_several_nodes_call_index_catalog_once_per_node(
            self, index_catalog):
        Node(catalog_id='one_catalog',
             catalog_url='http://one_url.com',
             indexable=True).save()
        Node(catalog_id='other_catalog',
             catalog_url='http://other_url.com',
             indexable=True).save()

        task = ReadDataJsonTask.objects.create()
        read_datajson(task)
        self.assertEqual(index_catalog.delay.call_count, 2)
Esempio n. 12
0
def parse_catalog(catalog_id, catalog_path, node=None):
    if not node:
        node = Node.objects.create(catalog_id=catalog_id,
                                   catalog_url=catalog_path,
                                   indexable=True)
    catalog = DataJson(node.catalog_url)
    node.catalog = json.dumps(catalog)
    node.save()
    task = ReadDataJsonTask()
    task.save()
    read_datajson(task, whitelist=True)
    return node
    def test_index_YYYY_distribution(self, *_):
        catalog = os.path.join(SAMPLES_DIR, 'single_data_yyyy.json')
        self.node.catalog_url = catalog
        self.node.save()

        read_datajson(self.task, whitelist=True)
        index_catalog(self.node, self.mgmt_task)

        distribution = Distribution.objects.get(identifier='102.1')

        self.assertEqual(
            distribution.enhanced_meta.get(key=meta_keys.CHANGED).value,
            'True')
    def test_significant_figures(self, *_):
        Catalog.objects.all().delete()
        catalog = os.path.join(SAMPLES_DIR, 'ipc_data.json')
        self.node.catalog_url = catalog
        self.node.save()

        read_datajson(self.task, whitelist=True)
        index_catalog(self.node, self.mgmt_task)

        field = Field.objects.get(
            identifier='serie_inflacion')  # Sacado del data.json
        self.assertEqual(
            field.enhanced_meta.get(key='significant_figures').value, '4')
Esempio n. 15
0
    def test_index_same_distribution_if_data_changed(self, *_):
        read_datajson(self.task, whitelist=True, read_local=True)
        index_catalog(self.node, self.mgmt_task, read_local=True, )
        new_catalog = os.path.join(SAMPLES_DIR, 'full_ts_data_changed.json')
        self.node.catalog_url = new_catalog
        self.node.save()
        read_datajson(self.task, whitelist=True, read_local=True)
        index_catalog(self.node, self.mgmt_task, read_local=True)

        distribution = Distribution.objects.get(identifier='212.1')

        # La distribución fue indexada nuevamente, está marcada como indexable
        self.assertEqual(distribution.enhanced_meta.get(key=meta_keys.CHANGED).value, 'True')
    def _index_catalog(self, catalog_path):
        Node.objects.create(catalog_id='test_catalog',
                            catalog_url=catalog_path,
                            indexable=True)
        task = ReadDataJsonTask.objects.create()

        read_datajson(task, whitelist=True)
        with mock.patch(
                'series_tiempo_ar_api.libs.indexing.indexer.distribution_indexer.parallel_bulk'
        ):
            distributions = Distribution.objects.all()

            for distribution in distributions:
                DistributionIndexer('some_index').reindex(distribution)
Esempio n. 17
0
    def _index(self, catalog_id, catalog_url, set_availables=True):
        node = Node.objects.create(
            catalog_id=catalog_id,
            catalog_url=os.path.join(SAMPLES_DIR, catalog_url),
            indexable=True,
        )

        read_datajson(self.task, whitelist=True, read_local=True)
        if set_availables:
            for field in datajsonar_Field.objects.all():
                field.enhanced_meta.create(key=meta_keys.AVAILABLE,
                                           value='true')

        CatalogMetadataIndexer(node, self.meta_task, self.FakeField).index()
        self.elastic.indices.forcemerge()
Esempio n. 18
0
    def _index(self, catalog_id, catalog_url, set_availables=True):
        node = Node.objects.create(
            catalog_id=catalog_id,
            catalog_url=os.path.join(SAMPLES_DIR, catalog_url),
            indexable=True,
        )

        read_datajson(self.task, whitelist=True, read_local=True)
        if set_availables:
            for field in datajsonar_Field.objects.all():
                field.enhanced_meta.create(key=meta_keys.AVAILABLE, value='true')

        index_ok = CatalogMetadataIndexer(node, self.meta_task, fake_index._name).index()
        if index_ok:
            connections.get_connection().indices.forcemerge()
        return index_ok
Esempio n. 19
0
def index_catalog(catalog_id, catalog_path, index, node=None):
    """Indexa un catálogo. Útil para tests"""
    if not node:
        node = Node(catalog_id=catalog_id,
                    catalog_url=catalog_path,
                    indexable=True)

    catalog = DataJson(node.catalog_url)
    node.catalog = json.dumps(catalog)
    node.save()
    task = ReadDataJsonTask()
    task.save()

    read_datajson(task, read_local=True, whitelist=True)
    for distribution in Distribution.objects.filter(
            dataset__catalog__identifier=catalog_id):
        DistributionIndexer(index=index).run(distribution)
    ElasticInstance.get().indices.forcemerge(index=index)
    def read_data(self, catalog_path):
        Node.objects.create(catalog_id='test_catalog', catalog_url=catalog_path, indexable=True)
        task = ReadDataJsonTask.objects.create()

        read_datajson(task, whitelist=True)