def setUp(self): self.elastic = ElasticInstance.get() self.task = ReadDataJsonTask.objects.create() self.meta_task = IndexMetadataTask.objects.create() fake_index.doc_type(self.FakeField) fake_index.create() self.FakeField.init(using=self.elastic)
class MetadataConfig(AppConfig): name = 'series_tiempo_ar_api.apps.metadata' es_configurations = settings.ES_CONFIGURATION es_urls = es_configurations["ES_URLS"] client_options = es_configurations["CONNECTIONS"]["default"] ElasticInstance.init(es_urls, client_options)
def init_index(index_name): elastic_instance = ElasticInstance.get() index = Index(index_name, using=elastic_instance) add_analyzer(index) if not index.exists(): index.create() Metadata.init(using=elastic_instance, index=index_name) return index
def index_catalog(catalog_id, catalog_path, index, node=None): """Indexa un catálogo. Útil para tests""" if not node: node = Node(catalog_id=catalog_id, catalog_url=catalog_path, indexable=True) catalog = DataJson(node.catalog_url) node.catalog = json.dumps(catalog) node.save() task = ReadDataJsonTask() task.save() read_datajson(task, read_local=True, whitelist=True) for distribution in Distribution.objects.filter( dataset__catalog__identifier=catalog_id): DistributionIndexer(index=index).run(distribution) ElasticInstance.get().indices.forcemerge(index=index)
def __init__(self, index, series_id, rep_mode, args, collapse_agg=None): self.index = index self.elastic = ElasticInstance.get() self.series_id = series_id self.rep_mode = rep_mode self.args = args.copy() self.collapse_agg = collapse_agg or constants.API_DEFAULT_VALUES[ constants.PARAM_COLLAPSE_AGG] self.search = self.init_search()
def __init__(self, task, index=settings.TS_INDEX, output_directory=constants.DUMP_DIR): self.task = task self.index = index self.output_directory = output_directory self.fields = {} self.catalog_themes = {} self.elastic = ElasticInstance.get() self.init_fields_dict()
def scheduler(): task = ReadDataJsonTask.objects.last() if task.status == task.FINISHED: return if not get_queue('indexing').jobs: ReportGenerator(task).generate() elastic = ElasticInstance.get() if elastic.indices.exists(index=settings.TS_INDEX): elastic.indices.forcemerge(index=settings.TS_INDEX)
def __init__(self, node: Node, task: IndexMetadataTask, doc_type): self.node = node self.task = task self.doc_type = doc_type self.elastic = ElasticInstance.get() self.fields_meta = {} self.init_fields_meta_cache() try: data_json = DataJson(node.catalog_url) themes = data_json['themeTaxonomy'] self.themes = self.get_themes(themes) except Exception: raise ValueError("Error de lectura de los themes del catálogo")
def setUpClass(cls): super(ViewTests, cls).setUpClass() es_client = ElasticInstance.get() if es_client.indices.exists(cls.index): es_client.indices.delete(cls.index) es_client.indices.create(cls.index, body=INDEX_CREATION_BODY) cls.catalog_id = 'csv_dump_test_catalog' path = os.path.join(samples_dir, 'distribution_daily_periodicity.json') index_catalog(cls.catalog_id, path, cls.index) cls.task = CSVDumpTask() cls.task.save() gen = CSVDumpGenerator(cls.task, index=cls.index, output_directory=cls.directory) gen.generate()
def __init__(self, node: Node, task: IndexMetadataTask, index: str): self.node = node self.task = task self.index_name = index self.elastic: Elasticsearch = ElasticInstance.get() if not self.elastic.indices.exists(self.index_name): init_index(self.index_name) self.fields_meta = {} self.init_fields_meta_cache() try: data_json = DataJson(node.catalog_url) themes = data_json['themeTaxonomy'] self.themes = self.get_themes(themes) except Exception: raise ValueError("Error de lectura de los themes del catálogo")
def __init__(self, index): """ args: index (str): Índice de Elasticsearch a ejecutar las queries. """ self.index = index self.series = [] self.elastic = ElasticInstance() self.data = None self.count = None # Parámetros que deben ser guardados y accedidos varias veces self.args = { constants.PARAM_START: constants.API_DEFAULT_VALUES[constants.PARAM_START], constants.PARAM_LIMIT: constants.API_DEFAULT_VALUES[constants.PARAM_LIMIT], constants.PARAM_SORT: constants.API_DEFAULT_VALUES[constants.PARAM_SORT] }
def tseries_index(name: str) -> Index: index = Index(name, using=ElasticInstance.get()) # Fija el límite superior de valores en una respuesta. Si filtramos por serie, sería # la cantidad de valores máximas que puede tener una única serie temporal. index.settings(max_result_window=settings.MAX_SERIES_VALUES) if not index.exists(): index.create() index.put_mapping(doc_type=settings.TS_DOC_TYPE, body=constants.MAPPING) index.save() # Actualizo el mapping mapping = index.get_mapping(doc_type=settings.TS_DOC_TYPE) doc_properties = mapping[name]['mappings'][ settings.TS_DOC_TYPE]['properties'] if not doc_properties.get('raw_value'): index.put_mapping(doc_type=settings.TS_DOC_TYPE, body=constants.MAPPING) return index
def get_fields_meta_index(): fields_meta = Index(constants.FIELDS_INDEX, using=ElasticInstance.get()) add_analyzer(fields_meta) return fields_meta
def tearDownClass(cls): super(CSVDumpCommandTests, cls).tearDownClass() ElasticInstance.get().indices.delete(cls.index) Catalog.objects.all().delete() Node.objects.all().delete()
def tearDownClass(cls): ElasticInstance.get().indices.delete(cls.index) Node.objects.all().delete()
class Meta: dynamic = MetaField('strict') doc_type = constants.METADATA_DOC_TYPE using = ElasticInstance.get()
def setUpClass(cls): Catalog.objects.all().delete() cls.elastic = ElasticInstance() super(IndexerTests, cls).setUpClass()
def tearDownClass(cls): super(ViewTests, cls).tearDownClass() ElasticInstance.get().indices.delete(cls.index) DumpFile.objects.all().delete() Node.objects.all().delete()
#! coding: utf-8 from django.conf import settings from series_tiempo_ar_api.libs.indexing.elastic import ElasticInstance from .helpers import setup_database from .support.generate_data import get_generator elastic = ElasticInstance.get() def setup(): if not elastic.indices.exists(settings.TEST_INDEX): generator = get_generator() generator.run() setup_database() def teardown(): elastic.indices.delete(settings.TEST_INDEX)
def execute(self): """Ejecuta la query. Devuelve un diccionario con el siguiente formato { "limit": 20, "offset": 0, "count": 1, "data": [ { "title": "foo", "description": "bar", "id": "if-foo", } ] } """ self.validate() if self.errors: self.response['errors'] = self.errors return self.response es_client = ElasticInstance.get() search = Metadata.search(using=es_client, index=constants.METADATA_ALIAS) querystring = self.args.get(constants.PARAM_QUERYSTRING) if querystring is not None: search = search.query('match', all=querystring) offset = self.args[constants.PARAM_OFFSET] limit = self.args[constants.PARAM_LIMIT] search = search[offset:limit + offset] for arg, field in constants.FILTER_ARGS.items(): search = self.add_filters(search, arg, field) response = search.execute() self.response = { 'data': [], 'count': response.hits.total } for hit in response: start_date = getattr(hit, 'start_date', None) if start_date: start_date = start_date.date() end_date = getattr(hit, 'end_date', None) if end_date: end_date = end_date.date() self.response['data'].append({ 'field': { 'id': getattr(hit, 'id', None), 'description': getattr(hit, 'description', None), 'title': getattr(hit, 'title', None), 'frequency': getattr(hit, 'periodicity', None), 'time_index_start': start_date, 'time_index_end': end_date, 'units': getattr(hit, 'units', None), }, 'dataset': { 'title': getattr(hit, 'dataset_title', None), 'publisher': { 'name': getattr(hit, 'dataset_publisher_name', None), }, 'source': getattr(hit, 'dataset_source', None), 'theme': getattr(hit, 'dataset_theme', None), } }) self.response[constants.PARAM_LIMIT] = self.args[constants.PARAM_LIMIT] self.response[constants.PARAM_OFFSET] = self.args[constants.PARAM_OFFSET] return self.response
def __init__(self, task): self.elastic: Elasticsearch = ElasticInstance.get() self.task = task
from elasticsearch_dsl import Index from django.test import TestCase from django_datajsonar.tasks import read_datajson from django_datajsonar.models import ReadDataJsonTask, Node, Field as datajsonar_Field from series_tiempo_ar_api.apps.metadata.indexer.catalog_meta_indexer import CatalogMetadataIndexer from series_tiempo_ar_api.apps.metadata.indexer.doc_types import Field from series_tiempo_ar_api.apps.metadata.indexer.index import add_analyzer from series_tiempo_ar_api.apps.metadata.models import IndexMetadataTask from series_tiempo_ar_api.libs.indexing.elastic import ElasticInstance from series_tiempo_ar_api.apps.management import meta_keys SAMPLES_DIR = os.path.join(os.path.dirname(__file__), 'samples') fake = faker.Faker() fake_index = Index(fake.word(), using=ElasticInstance.get()) add_analyzer(fake_index) class IndexerTests(TestCase): class FakeField(Field): class Meta: index = fake_index._name def setUp(self): self.elastic = ElasticInstance.get() self.task = ReadDataJsonTask.objects.create() self.meta_task = IndexMetadataTask.objects.create() fake_index.doc_type(self.FakeField) fake_index.create() self.FakeField.init(using=self.elastic)
def delete_metadata(fields: list): es_instance = ElasticInstance.get() search = Search(using=es_instance, index=constants.METADATA_ALIAS) return search.filter('terms', id=[field.identifier for field in fields]).delete()
import faker from elasticsearch_dsl import Index, Search from django.test import TestCase from django_datajsonar.tasks import read_datajson from django_datajsonar.models import ReadDataJsonTask, Node, Field as datajsonar_Field from series_tiempo_ar_api.apps.metadata.indexer.catalog_meta_indexer import CatalogMetadataIndexer from series_tiempo_ar_api.apps.metadata.indexer.index import add_analyzer from series_tiempo_ar_api.apps.metadata.models import IndexMetadataTask from series_tiempo_ar_api.libs.indexing.elastic import ElasticInstance from series_tiempo_ar_api.apps.management import meta_keys SAMPLES_DIR = os.path.join(os.path.dirname(__file__), 'samples') fake = faker.Faker() fake_index = Index(fake.pystr(max_chars=50).lower(), using=ElasticInstance.get()) add_analyzer(fake_index) class IndexerTests(TestCase): def setUp(self): self.elastic = ElasticInstance.get() self.task = ReadDataJsonTask.objects.create() self.meta_task = IndexMetadataTask.objects.create() def test_index(self): index_ok = self._index(catalog_id='test_catalog', catalog_url='single_distribution.json') search = Search( index=fake_index._name, using=self.elastic
def setUp(self): self.elastic = ElasticInstance.get() self.task = ReadDataJsonTask.objects.create() self.meta_task = IndexMetadataTask.objects.create()
def tearDownClass(cls): super(CSVTest, cls).tearDownClass() ElasticInstance.get().indices.delete(cls.index) Node.objects.all().delete()
def setUpClass(cls): cls.elastic = ElasticInstance() super(IndexerTests, cls).setUpClass()
def __init__(self, index: str): self.elastic: Elasticsearch = ElasticInstance.get() self.index_name = index self.index = tseries_index(index)
def tearDown(self): ElasticInstance.get().indices.delete(self.index) Catalog.objects.all().delete() DumpFile.objects.all().delete() Node.objects.all().delete()
def tearDownClass(cls): super(SQLGeneratorTests, cls).tearDownClass() ElasticInstance.get().indices.delete(cls.index)