def setUp(self):
     self.elastic = ElasticInstance.get()
     self.task = ReadDataJsonTask.objects.create()
     self.meta_task = IndexMetadataTask.objects.create()
     fake_index.doc_type(self.FakeField)
     fake_index.create()
     self.FakeField.init(using=self.elastic)
Beispiel #2
0
class MetadataConfig(AppConfig):
    name = 'series_tiempo_ar_api.apps.metadata'

    es_configurations = settings.ES_CONFIGURATION
    es_urls = es_configurations["ES_URLS"]
    client_options = es_configurations["CONNECTIONS"]["default"]
    ElasticInstance.init(es_urls, client_options)
Beispiel #3
0
def init_index(index_name):
    elastic_instance = ElasticInstance.get()
    index = Index(index_name, using=elastic_instance)
    add_analyzer(index)
    if not index.exists():
        index.create()
    Metadata.init(using=elastic_instance, index=index_name)
    return index
Beispiel #4
0
def index_catalog(catalog_id, catalog_path, index, node=None):
    """Indexa un catálogo. Útil para tests"""
    if not node:
        node = Node(catalog_id=catalog_id,
                    catalog_url=catalog_path,
                    indexable=True)

    catalog = DataJson(node.catalog_url)
    node.catalog = json.dumps(catalog)
    node.save()
    task = ReadDataJsonTask()
    task.save()

    read_datajson(task, read_local=True, whitelist=True)
    for distribution in Distribution.objects.filter(
            dataset__catalog__identifier=catalog_id):
        DistributionIndexer(index=index).run(distribution)
    ElasticInstance.get().indices.forcemerge(index=index)
Beispiel #5
0
 def __init__(self, index, series_id, rep_mode, args, collapse_agg=None):
     self.index = index
     self.elastic = ElasticInstance.get()
     self.series_id = series_id
     self.rep_mode = rep_mode
     self.args = args.copy()
     self.collapse_agg = collapse_agg or constants.API_DEFAULT_VALUES[
         constants.PARAM_COLLAPSE_AGG]
     self.search = self.init_search()
Beispiel #6
0
 def __init__(self,
              task,
              index=settings.TS_INDEX,
              output_directory=constants.DUMP_DIR):
     self.task = task
     self.index = index
     self.output_directory = output_directory
     self.fields = {}
     self.catalog_themes = {}
     self.elastic = ElasticInstance.get()
     self.init_fields_dict()
def scheduler():
    task = ReadDataJsonTask.objects.last()
    if task.status == task.FINISHED:
        return

    if not get_queue('indexing').jobs:
        ReportGenerator(task).generate()

    elastic = ElasticInstance.get()
    if elastic.indices.exists(index=settings.TS_INDEX):
        elastic.indices.forcemerge(index=settings.TS_INDEX)
 def __init__(self, node: Node, task: IndexMetadataTask, doc_type):
     self.node = node
     self.task = task
     self.doc_type = doc_type
     self.elastic = ElasticInstance.get()
     self.fields_meta = {}
     self.init_fields_meta_cache()
     try:
         data_json = DataJson(node.catalog_url)
         themes = data_json['themeTaxonomy']
         self.themes = self.get_themes(themes)
     except Exception:
         raise ValueError("Error de lectura de los themes del catálogo")
    def setUpClass(cls):
        super(ViewTests, cls).setUpClass()
        es_client = ElasticInstance.get()
        if es_client.indices.exists(cls.index):
            es_client.indices.delete(cls.index)
        es_client.indices.create(cls.index, body=INDEX_CREATION_BODY)

        cls.catalog_id = 'csv_dump_test_catalog'
        path = os.path.join(samples_dir, 'distribution_daily_periodicity.json')
        index_catalog(cls.catalog_id, path, cls.index)
        cls.task = CSVDumpTask()
        cls.task.save()
        gen = CSVDumpGenerator(cls.task,
                               index=cls.index,
                               output_directory=cls.directory)
        gen.generate()
Beispiel #10
0
    def __init__(self, node: Node, task: IndexMetadataTask, index: str):
        self.node = node
        self.task = task
        self.index_name = index
        self.elastic: Elasticsearch = ElasticInstance.get()

        if not self.elastic.indices.exists(self.index_name):
            init_index(self.index_name)

        self.fields_meta = {}
        self.init_fields_meta_cache()
        try:
            data_json = DataJson(node.catalog_url)
            themes = data_json['themeTaxonomy']
            self.themes = self.get_themes(themes)
        except Exception:
            raise ValueError("Error de lectura de los themes del catálogo")
Beispiel #11
0
    def __init__(self, index):
        """
        args:
            index (str): Índice de Elasticsearch a ejecutar las queries.
        """
        self.index = index
        self.series = []
        self.elastic = ElasticInstance()
        self.data = None
        self.count = None

        # Parámetros que deben ser guardados y accedidos varias veces
        self.args = {
            constants.PARAM_START:
            constants.API_DEFAULT_VALUES[constants.PARAM_START],
            constants.PARAM_LIMIT:
            constants.API_DEFAULT_VALUES[constants.PARAM_LIMIT],
            constants.PARAM_SORT:
            constants.API_DEFAULT_VALUES[constants.PARAM_SORT]
        }
Beispiel #12
0
def tseries_index(name: str) -> Index:
    index = Index(name, using=ElasticInstance.get())

    # Fija el límite superior de valores en una respuesta. Si filtramos por serie, sería
    # la cantidad de valores máximas que puede tener una única serie temporal.
    index.settings(max_result_window=settings.MAX_SERIES_VALUES)

    if not index.exists():
        index.create()
        index.put_mapping(doc_type=settings.TS_DOC_TYPE,
                          body=constants.MAPPING)

    index.save()
    # Actualizo el mapping
    mapping = index.get_mapping(doc_type=settings.TS_DOC_TYPE)

    doc_properties = mapping[name]['mappings'][
        settings.TS_DOC_TYPE]['properties']
    if not doc_properties.get('raw_value'):
        index.put_mapping(doc_type=settings.TS_DOC_TYPE,
                          body=constants.MAPPING)

    return index
Beispiel #13
0
def get_fields_meta_index():
    fields_meta = Index(constants.FIELDS_INDEX, using=ElasticInstance.get())

    add_analyzer(fields_meta)
    return fields_meta
Beispiel #14
0
 def tearDownClass(cls):
     super(CSVDumpCommandTests, cls).tearDownClass()
     ElasticInstance.get().indices.delete(cls.index)
     Catalog.objects.all().delete()
     Node.objects.all().delete()
Beispiel #15
0
 def tearDownClass(cls):
     ElasticInstance.get().indices.delete(cls.index)
     Node.objects.all().delete()
Beispiel #16
0
 class Meta:
     dynamic = MetaField('strict')
     doc_type = constants.METADATA_DOC_TYPE
     using = ElasticInstance.get()
Beispiel #17
0
 def setUpClass(cls):
     Catalog.objects.all().delete()
     cls.elastic = ElasticInstance()
     super(IndexerTests, cls).setUpClass()
Beispiel #18
0
 def tearDownClass(cls):
     super(ViewTests, cls).tearDownClass()
     ElasticInstance.get().indices.delete(cls.index)
     DumpFile.objects.all().delete()
     Node.objects.all().delete()
Beispiel #19
0
#! coding: utf-8
from django.conf import settings

from series_tiempo_ar_api.libs.indexing.elastic import ElasticInstance
from .helpers import setup_database
from .support.generate_data import get_generator

elastic = ElasticInstance.get()


def setup():
    if not elastic.indices.exists(settings.TEST_INDEX):
        generator = get_generator()
        generator.run()
        setup_database()


def teardown():
    elastic.indices.delete(settings.TEST_INDEX)
Beispiel #20
0
    def execute(self):
        """Ejecuta la query. Devuelve un diccionario con el siguiente formato
        {
            "limit": 20,
            "offset": 0,
            "count": 1,
            "data": [
                {
                    "title": "foo",
                    "description": "bar",
                    "id": "if-foo",
               }
            ]
        }
        """

        self.validate()

        if self.errors:
            self.response['errors'] = self.errors
            return self.response

        es_client = ElasticInstance.get()
        search = Metadata.search(using=es_client, index=constants.METADATA_ALIAS)

        querystring = self.args.get(constants.PARAM_QUERYSTRING)
        if querystring is not None:
            search = search.query('match', all=querystring)

        offset = self.args[constants.PARAM_OFFSET]
        limit = self.args[constants.PARAM_LIMIT]
        search = search[offset:limit + offset]

        for arg, field in constants.FILTER_ARGS.items():
            search = self.add_filters(search, arg, field)

        response = search.execute()
        self.response = {
            'data': [],
            'count': response.hits.total
        }
        for hit in response:
            start_date = getattr(hit, 'start_date', None)
            if start_date:
                start_date = start_date.date()

            end_date = getattr(hit, 'end_date', None)
            if end_date:
                end_date = end_date.date()

            self.response['data'].append({
                'field': {
                    'id': getattr(hit, 'id', None),
                    'description': getattr(hit, 'description', None),
                    'title': getattr(hit, 'title', None),
                    'frequency': getattr(hit, 'periodicity', None),
                    'time_index_start': start_date,
                    'time_index_end': end_date,
                    'units': getattr(hit, 'units', None),
                },
                'dataset': {
                    'title': getattr(hit, 'dataset_title', None),
                    'publisher': {
                        'name': getattr(hit, 'dataset_publisher_name', None),
                    },
                    'source': getattr(hit, 'dataset_source', None),
                    'theme': getattr(hit, 'dataset_theme', None),
                }
            })

        self.response[constants.PARAM_LIMIT] = self.args[constants.PARAM_LIMIT]
        self.response[constants.PARAM_OFFSET] = self.args[constants.PARAM_OFFSET]

        return self.response
Beispiel #21
0
 def __init__(self, task):
     self.elastic: Elasticsearch = ElasticInstance.get()
     self.task = task
from elasticsearch_dsl import Index
from django.test import TestCase
from django_datajsonar.tasks import read_datajson
from django_datajsonar.models import ReadDataJsonTask, Node, Field as datajsonar_Field

from series_tiempo_ar_api.apps.metadata.indexer.catalog_meta_indexer import CatalogMetadataIndexer
from series_tiempo_ar_api.apps.metadata.indexer.doc_types import Field
from series_tiempo_ar_api.apps.metadata.indexer.index import add_analyzer
from series_tiempo_ar_api.apps.metadata.models import IndexMetadataTask
from series_tiempo_ar_api.libs.indexing.elastic import ElasticInstance
from series_tiempo_ar_api.apps.management import meta_keys
SAMPLES_DIR = os.path.join(os.path.dirname(__file__), 'samples')

fake = faker.Faker()

fake_index = Index(fake.word(), using=ElasticInstance.get())
add_analyzer(fake_index)


class IndexerTests(TestCase):
    class FakeField(Field):
        class Meta:
            index = fake_index._name

    def setUp(self):
        self.elastic = ElasticInstance.get()
        self.task = ReadDataJsonTask.objects.create()
        self.meta_task = IndexMetadataTask.objects.create()
        fake_index.doc_type(self.FakeField)
        fake_index.create()
        self.FakeField.init(using=self.elastic)
def delete_metadata(fields: list):
    es_instance = ElasticInstance.get()

    search = Search(using=es_instance, index=constants.METADATA_ALIAS)
    return search.filter('terms',
                         id=[field.identifier for field in fields]).delete()
Beispiel #24
0
import faker
from elasticsearch_dsl import Index, Search
from django.test import TestCase
from django_datajsonar.tasks import read_datajson
from django_datajsonar.models import ReadDataJsonTask, Node, Field as datajsonar_Field

from series_tiempo_ar_api.apps.metadata.indexer.catalog_meta_indexer import CatalogMetadataIndexer
from series_tiempo_ar_api.apps.metadata.indexer.index import add_analyzer
from series_tiempo_ar_api.apps.metadata.models import IndexMetadataTask
from series_tiempo_ar_api.libs.indexing.elastic import ElasticInstance
from series_tiempo_ar_api.apps.management import meta_keys
SAMPLES_DIR = os.path.join(os.path.dirname(__file__), 'samples')

fake = faker.Faker()

fake_index = Index(fake.pystr(max_chars=50).lower(), using=ElasticInstance.get())
add_analyzer(fake_index)


class IndexerTests(TestCase):

    def setUp(self):
        self.elastic = ElasticInstance.get()
        self.task = ReadDataJsonTask.objects.create()
        self.meta_task = IndexMetadataTask.objects.create()

    def test_index(self):
        index_ok = self._index(catalog_id='test_catalog', catalog_url='single_distribution.json')
        search = Search(
            index=fake_index._name,
            using=self.elastic
Beispiel #25
0
 def setUp(self):
     self.elastic = ElasticInstance.get()
     self.task = ReadDataJsonTask.objects.create()
     self.meta_task = IndexMetadataTask.objects.create()
Beispiel #26
0
 def tearDownClass(cls):
     super(CSVTest, cls).tearDownClass()
     ElasticInstance.get().indices.delete(cls.index)
     Node.objects.all().delete()
Beispiel #27
0
 def setUpClass(cls):
     cls.elastic = ElasticInstance()
     super(IndexerTests, cls).setUpClass()
Beispiel #28
0
 def __init__(self, index: str):
     self.elastic: Elasticsearch = ElasticInstance.get()
     self.index_name = index
     self.index = tseries_index(index)
Beispiel #29
0
 def tearDown(self):
     ElasticInstance.get().indices.delete(self.index)
     Catalog.objects.all().delete()
     DumpFile.objects.all().delete()
     Node.objects.all().delete()
Beispiel #30
0
 def tearDownClass(cls):
     super(SQLGeneratorTests, cls).tearDownClass()
     ElasticInstance.get().indices.delete(cls.index)