Python ElasticInstance 예제들, series_tiempo_ar_api.libs.indexing.elastic.ElasticInstance Python 예제들

예제 #1

0

파일 보기

파일: indexer_tests.py 프로젝트: kant/series-tiempo-ar-api

 def setUp(self):
     self.elastic = ElasticInstance.get()
     self.task = ReadDataJsonTask.objects.create()
     self.meta_task = IndexMetadataTask.objects.create()
     fake_index.doc_type(self.FakeField)
     fake_index.create()
     self.FakeField.init(using=self.elastic)

예제 #2

0

파일 보기

class MetadataConfig(AppConfig):
    name = 'series_tiempo_ar_api.apps.metadata'

    es_configurations = settings.ES_CONFIGURATION
    es_urls = es_configurations["ES_URLS"]
    client_options = es_configurations["CONNECTIONS"]["default"]
    ElasticInstance.init(es_urls, client_options)

예제 #3

0

파일 보기

def init_index(index_name):
    elastic_instance = ElasticInstance.get()
    index = Index(index_name, using=elastic_instance)
    add_analyzer(index)
    if not index.exists():
        index.create()
    Metadata.init(using=elastic_instance, index=index_name)
    return index

예제 #4

0

파일 보기

def index_catalog(catalog_id, catalog_path, index, node=None):
    """Indexa un catálogo. Útil para tests"""
    if not node:
        node = Node(catalog_id=catalog_id,
                    catalog_url=catalog_path,
                    indexable=True)

    catalog = DataJson(node.catalog_url)
    node.catalog = json.dumps(catalog)
    node.save()
    task = ReadDataJsonTask()
    task.save()

    read_datajson(task, read_local=True, whitelist=True)
    for distribution in Distribution.objects.filter(
            dataset__catalog__identifier=catalog_id):
        DistributionIndexer(index=index).run(distribution)
    ElasticInstance.get().indices.forcemerge(index=index)

예제 #5

0

파일 보기

 def __init__(self, index, series_id, rep_mode, args, collapse_agg=None):
     self.index = index
     self.elastic = ElasticInstance.get()
     self.series_id = series_id
     self.rep_mode = rep_mode
     self.args = args.copy()
     self.collapse_agg = collapse_agg or constants.API_DEFAULT_VALUES[
         constants.PARAM_COLLAPSE_AGG]
     self.search = self.init_search()

예제 #6

0

파일 보기

 def __init__(self,
              task,
              index=settings.TS_INDEX,
              output_directory=constants.DUMP_DIR):
     self.task = task
     self.index = index
     self.output_directory = output_directory
     self.fields = {}
     self.catalog_themes = {}
     self.elastic = ElasticInstance.get()
     self.init_fields_dict()

예제 #7

0

파일 보기

파일: tasks.py 프로젝트: maggieadet/series-tiempo-ar-api

def scheduler():
    task = ReadDataJsonTask.objects.last()
    if task.status == task.FINISHED:
        return

    if not get_queue('indexing').jobs:
        ReportGenerator(task).generate()

    elastic = ElasticInstance.get()
    if elastic.indices.exists(index=settings.TS_INDEX):
        elastic.indices.forcemerge(index=settings.TS_INDEX)

예제 #8

0

파일 보기

파일: catalog_meta_indexer.py 프로젝트: kant/series-tiempo-ar-api

 def __init__(self, node: Node, task: IndexMetadataTask, doc_type):
     self.node = node
     self.task = task
     self.doc_type = doc_type
     self.elastic = ElasticInstance.get()
     self.fields_meta = {}
     self.init_fields_meta_cache()
     try:
         data_json = DataJson(node.catalog_url)
         themes = data_json['themeTaxonomy']
         self.themes = self.get_themes(themes)
     except Exception:
         raise ValueError("Error de lectura de los themes del catálogo")

예제 #9

0

파일 보기

파일: view_tests.py 프로젝트: kant/series-tiempo-ar-api

    def setUpClass(cls):
        super(ViewTests, cls).setUpClass()
        es_client = ElasticInstance.get()
        if es_client.indices.exists(cls.index):
            es_client.indices.delete(cls.index)
        es_client.indices.create(cls.index, body=INDEX_CREATION_BODY)

        cls.catalog_id = 'csv_dump_test_catalog'
        path = os.path.join(samples_dir, 'distribution_daily_periodicity.json')
        index_catalog(cls.catalog_id, path, cls.index)
        cls.task = CSVDumpTask()
        cls.task.save()
        gen = CSVDumpGenerator(cls.task,
                               index=cls.index,
                               output_directory=cls.directory)
        gen.generate()

예제 #10

0

파일 보기

    def __init__(self, node: Node, task: IndexMetadataTask, index: str):
        self.node = node
        self.task = task
        self.index_name = index
        self.elastic: Elasticsearch = ElasticInstance.get()

        if not self.elastic.indices.exists(self.index_name):
            init_index(self.index_name)

        self.fields_meta = {}
        self.init_fields_meta_cache()
        try:
            data_json = DataJson(node.catalog_url)
            themes = data_json['themeTaxonomy']
            self.themes = self.get_themes(themes)
        except Exception:
            raise ValueError("Error de lectura de los themes del catálogo")

예제 #11

0

파일 보기

파일: es_query.py 프로젝트: kant/series-tiempo-ar-api

    def __init__(self, index):
        """
        args:
            index (str): Índice de Elasticsearch a ejecutar las queries.
        """
        self.index = index
        self.series = []
        self.elastic = ElasticInstance()
        self.data = None
        self.count = None

        # Parámetros que deben ser guardados y accedidos varias veces
        self.args = {
            constants.PARAM_START:
            constants.API_DEFAULT_VALUES[constants.PARAM_START],
            constants.PARAM_LIMIT:
            constants.API_DEFAULT_VALUES[constants.PARAM_LIMIT],
            constants.PARAM_SORT:
            constants.API_DEFAULT_VALUES[constants.PARAM_SORT]
        }

예제 #12

0

파일 보기

def tseries_index(name: str) -> Index:
    index = Index(name, using=ElasticInstance.get())

    # Fija el límite superior de valores en una respuesta. Si filtramos por serie, sería
    # la cantidad de valores máximas que puede tener una única serie temporal.
    index.settings(max_result_window=settings.MAX_SERIES_VALUES)

    if not index.exists():
        index.create()
        index.put_mapping(doc_type=settings.TS_DOC_TYPE,
                          body=constants.MAPPING)

    index.save()
    # Actualizo el mapping
    mapping = index.get_mapping(doc_type=settings.TS_DOC_TYPE)

    doc_properties = mapping[name]['mappings'][
        settings.TS_DOC_TYPE]['properties']
    if not doc_properties.get('raw_value'):
        index.put_mapping(doc_type=settings.TS_DOC_TYPE,
                          body=constants.MAPPING)

    return index

예제 #13

0

파일 보기

파일: index.py 프로젝트: kant/series-tiempo-ar-api

def get_fields_meta_index():
    fields_meta = Index(constants.FIELDS_INDEX, using=ElasticInstance.get())

    add_analyzer(fields_meta)
    return fields_meta

예제 #14

0

파일 보기

 def tearDownClass(cls):
     super(CSVDumpCommandTests, cls).tearDownClass()
     ElasticInstance.get().indices.delete(cls.index)
     Catalog.objects.all().delete()
     Node.objects.all().delete()

예제 #15

0

파일 보기

파일: view_tests.py 프로젝트: kant/series-tiempo-ar-api

 def tearDownClass(cls):
     ElasticInstance.get().indices.delete(cls.index)
     Node.objects.all().delete()

예제 #16

0

파일 보기

 class Meta:
     dynamic = MetaField('strict')
     doc_type = constants.METADATA_DOC_TYPE
     using = ElasticInstance.get()

예제 #17

0

파일 보기

 def setUpClass(cls):
     Catalog.objects.all().delete()
     cls.elastic = ElasticInstance()
     super(IndexerTests, cls).setUpClass()

예제 #18

0

파일 보기

 def tearDownClass(cls):
     super(ViewTests, cls).tearDownClass()
     ElasticInstance.get().indices.delete(cls.index)
     DumpFile.objects.all().delete()
     Node.objects.all().delete()

예제 #19

0

파일 보기

#! coding: utf-8
from django.conf import settings

from series_tiempo_ar_api.libs.indexing.elastic import ElasticInstance
from .helpers import setup_database
from .support.generate_data import get_generator

elastic = ElasticInstance.get()


def setup():
    if not elastic.indices.exists(settings.TEST_INDEX):
        generator = get_generator()
        generator.run()
        setup_database()


def teardown():
    elastic.indices.delete(settings.TEST_INDEX)

예제 #20

0

파일 보기

    def execute(self):
        """Ejecuta la query. Devuelve un diccionario con el siguiente formato
        {
            "limit": 20,
            "offset": 0,
            "count": 1,
            "data": [
                {
                    "title": "foo",
                    "description": "bar",
                    "id": "if-foo",
               }
            ]
        }
        """

        self.validate()

        if self.errors:
            self.response['errors'] = self.errors
            return self.response

        es_client = ElasticInstance.get()
        search = Metadata.search(using=es_client, index=constants.METADATA_ALIAS)

        querystring = self.args.get(constants.PARAM_QUERYSTRING)
        if querystring is not None:
            search = search.query('match', all=querystring)

        offset = self.args[constants.PARAM_OFFSET]
        limit = self.args[constants.PARAM_LIMIT]
        search = search[offset:limit + offset]

        for arg, field in constants.FILTER_ARGS.items():
            search = self.add_filters(search, arg, field)

        response = search.execute()
        self.response = {
            'data': [],
            'count': response.hits.total
        }
        for hit in response:
            start_date = getattr(hit, 'start_date', None)
            if start_date:
                start_date = start_date.date()

            end_date = getattr(hit, 'end_date', None)
            if end_date:
                end_date = end_date.date()

            self.response['data'].append({
                'field': {
                    'id': getattr(hit, 'id', None),
                    'description': getattr(hit, 'description', None),
                    'title': getattr(hit, 'title', None),
                    'frequency': getattr(hit, 'periodicity', None),
                    'time_index_start': start_date,
                    'time_index_end': end_date,
                    'units': getattr(hit, 'units', None),
                },
                'dataset': {
                    'title': getattr(hit, 'dataset_title', None),
                    'publisher': {
                        'name': getattr(hit, 'dataset_publisher_name', None),
                    },
                    'source': getattr(hit, 'dataset_source', None),
                    'theme': getattr(hit, 'dataset_theme', None),
                }
            })

        self.response[constants.PARAM_LIMIT] = self.args[constants.PARAM_LIMIT]
        self.response[constants.PARAM_OFFSET] = self.args[constants.PARAM_OFFSET]

        return self.response

예제 #21

0

파일 보기

 def __init__(self, task):
     self.elastic: Elasticsearch = ElasticInstance.get()
     self.task = task

예제 #22

0

파일 보기

파일: indexer_tests.py 프로젝트: kant/series-tiempo-ar-api

from elasticsearch_dsl import Index
from django.test import TestCase
from django_datajsonar.tasks import read_datajson
from django_datajsonar.models import ReadDataJsonTask, Node, Field as datajsonar_Field

from series_tiempo_ar_api.apps.metadata.indexer.catalog_meta_indexer import CatalogMetadataIndexer
from series_tiempo_ar_api.apps.metadata.indexer.doc_types import Field
from series_tiempo_ar_api.apps.metadata.indexer.index import add_analyzer
from series_tiempo_ar_api.apps.metadata.models import IndexMetadataTask
from series_tiempo_ar_api.libs.indexing.elastic import ElasticInstance
from series_tiempo_ar_api.apps.management import meta_keys
SAMPLES_DIR = os.path.join(os.path.dirname(__file__), 'samples')

fake = faker.Faker()

fake_index = Index(fake.word(), using=ElasticInstance.get())
add_analyzer(fake_index)


class IndexerTests(TestCase):
    class FakeField(Field):
        class Meta:
            index = fake_index._name

    def setUp(self):
        self.elastic = ElasticInstance.get()
        self.task = ReadDataJsonTask.objects.create()
        self.meta_task = IndexMetadataTask.objects.create()
        fake_index.doc_type(self.FakeField)
        fake_index.create()
        self.FakeField.init(using=self.elastic)

예제 #23

0

파일 보기

파일: utils.py 프로젝트: maggieadet/series-tiempo-ar-api

def delete_metadata(fields: list):
    es_instance = ElasticInstance.get()

    search = Search(using=es_instance, index=constants.METADATA_ALIAS)
    return search.filter('terms',
                         id=[field.identifier for field in fields]).delete()

예제 #24

0

파일 보기

import faker
from elasticsearch_dsl import Index, Search
from django.test import TestCase
from django_datajsonar.tasks import read_datajson
from django_datajsonar.models import ReadDataJsonTask, Node, Field as datajsonar_Field

from series_tiempo_ar_api.apps.metadata.indexer.catalog_meta_indexer import CatalogMetadataIndexer
from series_tiempo_ar_api.apps.metadata.indexer.index import add_analyzer
from series_tiempo_ar_api.apps.metadata.models import IndexMetadataTask
from series_tiempo_ar_api.libs.indexing.elastic import ElasticInstance
from series_tiempo_ar_api.apps.management import meta_keys
SAMPLES_DIR = os.path.join(os.path.dirname(__file__), 'samples')

fake = faker.Faker()

fake_index = Index(fake.pystr(max_chars=50).lower(), using=ElasticInstance.get())
add_analyzer(fake_index)


class IndexerTests(TestCase):

    def setUp(self):
        self.elastic = ElasticInstance.get()
        self.task = ReadDataJsonTask.objects.create()
        self.meta_task = IndexMetadataTask.objects.create()

    def test_index(self):
        index_ok = self._index(catalog_id='test_catalog', catalog_url='single_distribution.json')
        search = Search(
            index=fake_index._name,
            using=self.elastic

예제 #25

0

파일 보기

 def setUp(self):
     self.elastic = ElasticInstance.get()
     self.task = ReadDataJsonTask.objects.create()
     self.meta_task = IndexMetadataTask.objects.create()

예제 #26

0

파일 보기

 def tearDownClass(cls):
     super(CSVTest, cls).tearDownClass()
     ElasticInstance.get().indices.delete(cls.index)
     Node.objects.all().delete()

예제 #27

0

파일 보기

 def setUpClass(cls):
     cls.elastic = ElasticInstance()
     super(IndexerTests, cls).setUpClass()

예제 #28

0

파일 보기

 def __init__(self, index: str):
     self.elastic: Elasticsearch = ElasticInstance.get()
     self.index_name = index
     self.index = tseries_index(index)

예제 #29

0

파일 보기

 def tearDown(self):
     ElasticInstance.get().indices.delete(self.index)
     Catalog.objects.all().delete()
     DumpFile.objects.all().delete()
     Node.objects.all().delete()

예제 #30

0

파일 보기

 def tearDownClass(cls):
     super(SQLGeneratorTests, cls).tearDownClass()
     ElasticInstance.get().indices.delete(cls.index)