コード例 #1
0
def reverse_func(apps, schema_editor):
    extensions = [os.path.join(settings.ONTOLOGY_PATH, x) for x in settings.ONTOLOGY_EXT]
    management.call_command('load_ontology', source=os.path.join(settings.ONTOLOGY_PATH, settings.ONTOLOGY_BASE),
        version=settings.ONTOLOGY_BASE_VERSION, ontology_name=settings.ONTOLOGY_BASE_NAME, id=settings.ONTOLOGY_BASE_ID, extensions=','.join(extensions), verbosity=0)

    Node = apps.get_model("models", "Node")
    Edge = apps.get_model("models", "Edge")

    for node in Node.objects.all():
        node.ontologyclass = str(node.ontologyclass).split('/')[-1]
        node.save()

    for edge in Edge.objects.all():
        edge.ontologyproperty = str(edge.ontologyproperty).split('/')[-1]
        edge.save()

    # remove index for base Arches concept
    se = SearchEngineFactory().create()
    query = Query(se, start=0, limit=10000)
    query.add_query(Term(field='conceptid', term='00000000-0000-0000-0000-000000000001'))
    query.delete(index='concepts')

    try:
        DValueType = apps.get_model("models", "DValueType")
        DValueType.objects.get(valuetype='identifier').delete()
    except:
        pass
コード例 #2
0
 def append_search_filters(self, value, node, query, request):
     try:
         if value['val'] != '':
             term = True if value['val'] == 't' else False
             query.must(Term(field='tiles.data.%s' % (str(node.pk)), term=term))
     except KeyError, e:
         pass
コード例 #3
0
ファイル: concept.py プロジェクト: webjunkie/arches
 def delete_index(self):
     se = SearchEngineFactory().create()
     query = Query(se, start=0, limit=10000)
     term = Term(field='id', term=self.id)
     query.add_query(term)
     query.delete(index='concept_labels')
     se.delete_terms(self.id)
コード例 #4
0
ファイル: 0005_4_0_1.py プロジェクト: waqashamid/arches
def reverse_func(apps, schema_editor):

    Node = apps.get_model("models", "Node")
    Edge = apps.get_model("models", "Edge")

    for node in Node.objects.all():
        node.ontologyclass = str(node.ontologyclass).split("/")[-1]
        node.save()

    for edge in Edge.objects.all():
        edge.ontologyproperty = str(edge.ontologyproperty).split("/")[-1]
        edge.save()

    # remove index for base Arches concept
    se = SearchEngineFactory().create()
    query = Query(se, start=0, limit=10000)
    query.add_query(
        Term(field="conceptid", term="00000000-0000-0000-0000-000000000001"))
    query.delete(index="concepts")

    try:
        DValueType = apps.get_model("models", "DValueType")
        DValueType.objects.get(valuetype="identifier").delete()
    except Exception:
        pass
コード例 #5
0
 def delete_concept_values_index(concepts_to_delete):
     se = SearchEngineFactory().create()
     for concept in concepts_to_delete.itervalues():
         query = Query(se, start=0, limit=10000)
         term = Term(field='conceptid', term=concept.id)
         query.add_query(term)
         query.delete(index='strings', doc_type='concept')
コード例 #6
0
ファイル: concept.py プロジェクト: webjunkie/arches
 def delete_concept_values_index(concepts_to_delete):
     se = SearchEngineFactory().create()
     for concept in concepts_to_delete.itervalues():
         query = Query(se, start=0, limit=10000)
         term = Term(field='conceptid', term=concept.id)
         query.add_query(term)
         query.delete(index='concept_labels')
         for conceptvalue in concept.values:
             se.delete_terms(conceptvalue.id)
コード例 #7
0
def index_resources_by_type(resource_types, clear_index=True, index_name=None, batch_size=settings.BULK_IMPORT_BATCH_SIZE):
    """
    Indexes all resources of a given type(s)

    Arguments:
    resource_types -- array of graph ids that represent resource types

    Keyword Arguments:
    clear_index -- set to True to remove all the resources of the types passed in from the index before the reindexing operation
    index_name -- only applies to custom indexes and if given will try and just refresh the data in that index
    batch_size -- the number of records to index as a group, the larger the number to more memory required

    """
    
    status = ''
    se = SearchEngineFactory().create()
    datatype_factory = DataTypeFactory()
    node_datatypes = {str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list('nodeid', 'datatype')}

    status = ''
    for resource_type in resource_types:
        start = datetime.now()
        resources = Resource.objects.filter(graph_id=str(resource_type))
        graph_name = models.GraphModel.objects.get(graphid=str(resource_type)).name
        print("Indexing resource type '{0}'".format(graph_name))

        if index_name is None:
            q = Query(se=se)
            term = Term(field='graph_id', term=str(resource_type))
            q.add_query(term)
            if clear_index:
                q.delete(index='resources', refresh=True)

            with se.BulkIndexer(batch_size=batch_size, refresh=True) as doc_indexer:
                with se.BulkIndexer(batch_size=batch_size, refresh=True) as term_indexer:
                    for resource in resources:
                        document, terms = resource.get_documents_to_index(fetchTiles=True, datatype_factory=datatype_factory, node_datatypes=node_datatypes)
                        doc_indexer.add(index='resources', id=document['resourceinstanceid'], data=document)
                        for term in terms:
                            term_indexer.add(index='terms', id=term['_id'], data=term['_source'])

            result_summary = {'database': len(resources), 'indexed': se.count(index='resources', body=q.dsl)}
            status = 'Passed' if result_summary['database'] == result_summary['indexed'] else 'Failed'
            print("Status: {0}, Resource Type: {1}, In Database: {2}, Indexed: {3}, Took: {4} seconds".format(status, graph_name, result_summary['database'], result_summary['indexed'], (datetime.now()-start).seconds))

            for index in settings.ELASTICSEARCH_CUSTOM_INDEXES:
                es_index = import_class_from_string(index['module'])(index['name'])
                es_index.bulk_index(resources=resources, resource_type=resource_type, graph_name=graph_name, clear_index=clear_index)

        else:
            es_index = get_index(index_name)
            es_index.bulk_index(resources=resources, resource_type=resource_type, graph_name=graph_name, clear_index=clear_index)

    return status
コード例 #8
0
    def bulk_index(self,
                   resources=None,
                   resource_type=None,
                   graph_name=None,
                   clear_index=True):
        """
        Indexes a list of documents in bulk to Elastic Search

        Arguments:
        None

        Keyword Arguments:
        resources -- the list of resource instances to index
        resource_type -- the type of resources being indexed
        graph_name -- the name of the graph model that represents the resources being indexed
        clear_index -- True(default) to remove all index records of type "resource_type" before indexing, 
            assumes that a field called "graph_id" exists on the indexed documents

        Return: None
        """

        start = datetime.now()
        q = Query(se=self.se)
        if clear_index:
            term = Term(field="graph_id", term=str(resource_type))
            q.add_query(term)
            q.delete(index=self.index_name, refresh=True)

        q = Query(se=self.se)
        count_before = self.se.count(index=self.index_name, body=q.dsl)

        result_summary = {"database": len(resources), "indexed": 0}
        with self.se.BulkIndexer(batch_size=settings.BULK_IMPORT_BATCH_SIZE,
                                 refresh=True) as indexer:
            for resource in resources:
                tiles = list(
                    models.TileModel.objects.filter(resourceinstance=resource))
                document, doc_id = self.get_documents_to_index(resource, tiles)
                if document is not None and id is not None:
                    indexer.add(index=self.index_name,
                                id=doc_id,
                                data=document)

        result_summary["indexed"] = self.se.count(index=self.index_name,
                                                  body=q.dsl) - count_before
        status = "Passed" if result_summary["database"] == result_summary[
            "indexed"] else "Failed"
        print("Custom Index - %s:" % self.index_name)
        print(
            "    Status: {0}, Resource Type: {1}, In Database: {2}, Indexed: {3}, Took: {4} seconds"
            .format(status, graph_name, result_summary["database"],
                    result_summary["indexed"],
                    (datetime.now() - start).seconds))
コード例 #9
0
ファイル: map.py プロジェクト: globaldigitalheritage/arches
 def get_resource_bounds(node):
     query = Query(se, start=0, limit=0)
     search_query = Bool()
     query.add_query(search_query)
     query.add_aggregation(
         GeoBoundsAgg(field='points.point', name='bounds'))
     query.add_query(
         Term(field='graph_id', term=str(node.graph.graphid)))
     results = query.search(index='resources')
     bounds = results['aggregations']['bounds'][
         'bounds'] if 'bounds' in results['aggregations'][
             'bounds'] else None
     return bounds
コード例 #10
0
ファイル: map.py プロジェクト: msrivastava/arches
 def get_resource_bounds(node):
     query = Query(se, start=0, limit=0)
     search_query = Bool()
     query.add_query(search_query)
     query.add_aggregation(
         GeoBoundsAgg(field="points.point", name="bounds"))
     query.add_query(
         Term(field="graph_id", term=str(node.graph.graphid)))
     results = query.search(index=RESOURCES_INDEX)
     bounds = results["aggregations"]["bounds"][
         "bounds"] if "bounds" in results["aggregations"][
             "bounds"] else None
     return bounds
コード例 #11
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        querysting_params = self.request.GET.get(details['componentname'], '')

        for resouceTypeFilter in JSONDeserializer().deserialize(
                querysting_params):
            term = Term(field='graph_id',
                        term=str(resouceTypeFilter['graphid']))
            if resouceTypeFilter['inverted'] is True:
                search_query.must_not(term)
            else:
                search_query.must(term)

        search_results_object['query'].add_query(search_query)
コード例 #12
0
ファイル: url.py プロジェクト: franckcalard/arches
    def append_search_filters(self, value, node, query, request):
        # Match the label in the same manner as a String datatype

        try:
            if value["val"] != "":
                match_type = "phrase_prefix" if "~" in value["op"] else "phrase"
                if "~" in value["op"]:
                    match_query = Match(
                        field="tiles.data.%s.url" % (str(node.pk)),
                        query=value["val"],
                        type=match_type,
                    )
                if "=" in value["op"]:
                    match_query = Term(field="tiles.data.%s.url.keyword" % (str(node.pk)), term=value["val"])
                if "!" in value["op"]:
                    query.must_not(match_query)
                    query.filter(Exists(field="tiles.data.%s" % (str(node.pk))))
                else:
                    query.must(match_query)
        except KeyError as e:
            pass
コード例 #13
0
def index_resources_by_type(resource_types,
                            clear_index=True,
                            batch_size=settings.BULK_IMPORT_BATCH_SIZE,
                            quiet=False):
    """
    Indexes all resources of a given type(s)

    Arguments:
    resource_types -- array of graph ids that represent resource types

    Keyword Arguments:
    clear_index -- set to True to remove all the resources of the types passed in from the index before the reindexing operation
    batch_size -- the number of records to index as a group, the larger the number to more memory required
    quiet -- Silences the status bar output during certain operations, use in celery operations for example

    """

    status = ""
    datatype_factory = DataTypeFactory()
    node_datatypes = {
        str(nodeid): datatype
        for nodeid, datatype in models.Node.objects.values_list(
            "nodeid", "datatype")
    }
    if isinstance(resource_types, str):
        resource_types = [resource_types]

    for resource_type in resource_types:
        start = datetime.now()
        resources = Resource.objects.filter(graph_id=str(resource_type))
        graph_name = models.GraphModel.objects.get(
            graphid=str(resource_type)).name
        print("Indexing resource type '{0}'".format(graph_name))

        q = Query(se=se)
        term = Term(field="graph_id", term=str(resource_type))
        q.add_query(term)
        if clear_index:
            q.delete(index=RESOURCES_INDEX, refresh=True)

        with se.BulkIndexer(batch_size=batch_size,
                            refresh=True) as doc_indexer:
            with se.BulkIndexer(batch_size=batch_size,
                                refresh=True) as term_indexer:
                if quiet is False:
                    bar = pyprind.ProgBar(
                        len(resources),
                        bar_char="█") if len(resources) > 1 else None
                for resource in resources:
                    if quiet is False and bar is not None:
                        bar.update(item_id=resource)
                    document, terms = resource.get_documents_to_index(
                        fetchTiles=True,
                        datatype_factory=datatype_factory,
                        node_datatypes=node_datatypes)
                    doc_indexer.add(index=RESOURCES_INDEX,
                                    id=document["resourceinstanceid"],
                                    data=document)
                    for term in terms:
                        term_indexer.add(index=TERMS_INDEX,
                                         id=term["_id"],
                                         data=term["_source"])

        result_summary = {
            "database": len(resources),
            "indexed": se.count(index=RESOURCES_INDEX, body=q.dsl)
        }
        status = "Passed" if result_summary["database"] == result_summary[
            "indexed"] else "Failed"
        print(
            "Status: {0}, Resource Type: {1}, In Database: {2}, Indexed: {3}, Took: {4} seconds"
            .format(status, graph_name, result_summary["database"],
                    result_summary["indexed"],
                    (datetime.now() - start).seconds))
    return status
コード例 #14
0
ファイル: search.py プロジェクト: k-int/arches
def build_search_results_dsl(request):
    term_filter = request.GET.get('termFilter', '')
    spatial_filter = JSONDeserializer().deserialize(
        request.GET.get('mapFilter', '{}'))
    include_provisional = get_provisional_type(request)

    export = request.GET.get('export', None)
    mobile_download = request.GET.get('mobiledownload', None)
    page = 1 if request.GET.get('page') == '' else int(
        request.GET.get('page', 1))
    temporal_filter = JSONDeserializer().deserialize(
        request.GET.get('temporalFilter', '{}'))
    advanced_filters = JSONDeserializer().deserialize(
        request.GET.get('advanced', '[]'))
    search_buffer = None
    se = SearchEngineFactory().create()

    if export != None:
        limit = settings.SEARCH_EXPORT_ITEMS_PER_PAGE
    elif mobile_download != None:
        limit = settings.MOBILE_DOWNLOAD_RESOURCE_LIMIT
    else:
        limit = settings.SEARCH_ITEMS_PER_PAGE

    query = Query(se, start=limit * int(page - 1), limit=limit)
    search_query = Bool()

    nested_agg = NestedAgg(path='points', name='geo_aggs')
    nested_agg_filter = FiltersAgg(name='inner')

    if include_provisional == True:
        nested_agg_filter.add_filter(
            Terms(field='points.provisional', terms=['false', 'true']))

    else:
        provisional_resource_filter = Bool()

        if include_provisional == False:
            provisional_resource_filter.filter(
                Terms(field='provisional', terms=['false', 'partial']))
            nested_agg_filter.add_filter(
                Terms(field='points.provisional', terms=['false']))

        elif include_provisional == 'only provisional':
            provisional_resource_filter.filter(
                Terms(field='provisional', terms=['true', 'partial']))
            nested_agg_filter.add_filter(
                Terms(field='points.provisional', terms=['true']))

        search_query.must(provisional_resource_filter)

    nested_agg_filter.add_aggregation(
        GeoHashGridAgg(field='points.point',
                       name='grid',
                       precision=settings.HEX_BIN_PRECISION))
    nested_agg_filter.add_aggregation(
        GeoBoundsAgg(field='points.point', name='bounds'))
    nested_agg.add_aggregation(nested_agg_filter)
    query.add_aggregation(nested_agg)

    permitted_nodegroups = get_permitted_nodegroups(request.user)

    if term_filter != '':
        for term in JSONDeserializer().deserialize(term_filter):
            term_query = Bool()
            provisional_term_filter = Bool()
            if term['type'] == 'term' or term['type'] == 'string':
                string_filter = Bool()
                if term['type'] == 'term':
                    string_filter.must(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase'))
                elif term['type'] == 'string':
                    string_filter.should(
                        Match(field='strings.string',
                              query=term['value'],
                              type='phrase_prefix'))
                    string_filter.should(
                        Match(field='strings.string.folded',
                              query=term['value'],
                              type='phrase_prefix'))

                if include_provisional == False:
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    string_filter.must_not(
                        Match(field='strings.provisional',
                              query='false',
                              type='phrase'))

                string_filter.filter(
                    Terms(field='strings.nodegroup_id',
                          terms=permitted_nodegroups))
                nested_string_filter = Nested(path='strings',
                                              query=string_filter)
                if term['inverted']:
                    search_query.must_not(nested_string_filter)
                else:
                    search_query.must(nested_string_filter)
                    # need to set min_score because the query returns results with score 0 and those have to be removed, which I don't think it should be doing
                    query.min_score('0.01')
            elif term['type'] == 'concept':
                concept_ids = _get_child_concepts(term['value'])
                conceptid_filter = Bool()
                conceptid_filter.filter(
                    Terms(field='domains.conceptid', terms=concept_ids))
                conceptid_filter.filter(
                    Terms(field='domains.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='true',
                              type='phrase'))
                elif include_provisional == 'only provisional':
                    conceptid_filter.must_not(
                        Match(field='domains.provisional',
                              query='false',
                              type='phrase'))

                nested_conceptid_filter = Nested(path='domains',
                                                 query=conceptid_filter)
                if term['inverted']:
                    search_query.must_not(nested_conceptid_filter)
                else:
                    search_query.filter(nested_conceptid_filter)

    if 'features' in spatial_filter:

        if len(spatial_filter['features']) > 0:
            feature_geom = spatial_filter['features'][0]['geometry']
            feature_properties = {}
            if 'properties' in spatial_filter['features'][0]:
                feature_properties = spatial_filter['features'][0][
                    'properties']
            buffer = {'width': 0, 'unit': 'ft'}
            if 'buffer' in feature_properties:
                buffer = feature_properties['buffer']
            search_buffer = _buffer(feature_geom, buffer['width'],
                                    buffer['unit'])
            feature_geom = JSONDeserializer().deserialize(search_buffer.json)
            geoshape = GeoShape(field='geometries.geom.features.geometry',
                                type=feature_geom['type'],
                                coordinates=feature_geom['coordinates'])

            invert_spatial_search = False
            if 'inverted' in feature_properties:
                invert_spatial_search = feature_properties['inverted']

            spatial_query = Bool()
            if invert_spatial_search == True:
                spatial_query.must_not(geoshape)
            else:
                spatial_query.filter(geoshape)

            # get the nodegroup_ids that the user has permission to search
            spatial_query.filter(
                Terms(field='geometries.nodegroup_id',
                      terms=permitted_nodegroups))

            if include_provisional == False:
                spatial_query.filter(
                    Terms(field='geometries.provisional', terms=['false']))

            elif include_provisional == 'only provisional':
                spatial_query.filter(
                    Terms(field='geometries.provisional', terms=['true']))

            search_query.filter(Nested(path='geometries', query=spatial_query))

    if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
        now = str(datetime.utcnow())
        start_date = ExtendedDateFormat(temporal_filter['fromDate'])
        end_date = ExtendedDateFormat(temporal_filter['toDate'])
        date_nodeid = str(
            temporal_filter['dateNodeId']
        ) if 'dateNodeId' in temporal_filter and temporal_filter[
            'dateNodeId'] != '' else None
        query_inverted = False if 'inverted' not in temporal_filter else temporal_filter[
            'inverted']

        temporal_query = Bool()

        if query_inverted:
            # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
            # eg: less than START_DATE OR greater than END_DATE
            inverted_date_query = Bool()
            inverted_date_ranges_query = Bool()

            if start_date.is_valid():
                inverted_date_query.should(
                    Range(field='dates.date', lt=start_date.lower))
                inverted_date_ranges_query.should(
                    Range(field='date_ranges.date_range', lt=start_date.lower))
            if end_date.is_valid():
                inverted_date_query.should(
                    Range(field='dates.date', gt=end_date.upper))
                inverted_date_ranges_query.should(
                    Range(field='date_ranges.date_range', gt=end_date.upper))

            date_query = Bool()
            date_query.filter(inverted_date_query)
            date_query.filter(
                Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))

            if include_provisional == False:
                date_query.filter(
                    Terms(field='dates.provisional', terms=['false']))

            elif include_provisional == 'only provisional':
                date_query.filter(
                    Terms(field='dates.provisional', terms=['true']))

            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(inverted_date_ranges_query)
                date_ranges_query.filter(
                    Terms(field='date_ranges.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional',
                              terms=['false']))

                elif include_provisional == 'only provisional':
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional', terms=['true']))

                temporal_query.should(
                    Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        else:
            date_query = Bool()
            date_query.filter(
                Range(field='dates.date',
                      gte=start_date.lower,
                      lte=end_date.upper))
            date_query.filter(
                Terms(field='dates.nodegroup_id', terms=permitted_nodegroups))

            if include_provisional == False:
                date_query.filter(
                    Terms(field='dates.provisional', terms=['false']))
            elif include_provisional == 'only provisional':
                date_query.filter(
                    Terms(field='dates.provisional', terms=['true']))

            if date_nodeid:
                date_query.filter(Term(field='dates.nodeid', term=date_nodeid))
            else:
                date_ranges_query = Bool()
                date_ranges_query.filter(
                    Range(field='date_ranges.date_range',
                          gte=start_date.lower,
                          lte=end_date.upper,
                          relation='intersects'))
                date_ranges_query.filter(
                    Terms(field='date_ranges.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional == False:
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional',
                              terms=['false']))
                if include_provisional == 'only provisional':
                    date_ranges_query.filter(
                        Terms(field='date_ranges.provisional', terms=['true']))

                temporal_query.should(
                    Nested(path='date_ranges', query=date_ranges_query))
            temporal_query.should(Nested(path='dates', query=date_query))

        search_query.filter(temporal_query)

    datatype_factory = DataTypeFactory()
    if len(advanced_filters) > 0:
        advanced_query = Bool()
        grouped_query = Bool()
        grouped_queries = [grouped_query]
        for index, advanced_filter in enumerate(advanced_filters):
            tile_query = Bool()
            for key, val in advanced_filter.iteritems():
                if key != 'op':
                    node = models.Node.objects.get(pk=key)
                    if request.user.has_perm('read_nodegroup', node.nodegroup):
                        datatype = datatype_factory.get_instance(node.datatype)
                        datatype.append_search_filters(val, node, tile_query,
                                                       request)
            nested_query = Nested(path='tiles', query=tile_query)
            if advanced_filter['op'] == 'or' and index != 0:
                grouped_query = Bool()
                grouped_queries.append(grouped_query)
            grouped_query.must(nested_query)
        for grouped_query in grouped_queries:
            advanced_query.should(grouped_query)
        search_query.must(advanced_query)

    query.add_query(search_query)
    if search_buffer != None:
        search_buffer = search_buffer.geojson

    return {'query': query, 'search_buffer': search_buffer}
コード例 #15
0
ファイル: time_filter.py プロジェクト: legiongis/arches
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        querysting_params = self.request.GET.get(details["componentname"], "")
        temporal_filter = JSONDeserializer().deserialize(querysting_params)
        if "fromDate" in temporal_filter and "toDate" in temporal_filter:
            # now = str(datetime.utcnow())
            start_date = ExtendedDateFormat(temporal_filter["fromDate"])
            end_date = ExtendedDateFormat(temporal_filter["toDate"])
            date_nodeid = (str(temporal_filter["dateNodeId"])
                           if "dateNodeId" in temporal_filter
                           and temporal_filter["dateNodeId"] != "" else None)
            query_inverted = False if "inverted" not in temporal_filter else temporal_filter[
                "inverted"]

            temporal_query = Bool()

            if query_inverted:
                # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
                # eg: less than START_DATE OR greater than END_DATE
                inverted_date_query = Bool()
                inverted_date_ranges_query = Bool()

                if start_date.is_valid():
                    inverted_date_query.should(
                        Range(field="dates.date", lt=start_date.lower))
                    inverted_date_ranges_query.should(
                        Range(field="date_ranges.date_range",
                              lt=start_date.lower))
                if end_date.is_valid():
                    inverted_date_query.should(
                        Range(field="dates.date", gt=end_date.upper))
                    inverted_date_ranges_query.should(
                        Range(field="date_ranges.date_range",
                              gt=end_date.upper))

                date_query = Bool()
                date_query.filter(inverted_date_query)
                date_query.filter(
                    Terms(field="dates.nodegroup_id",
                          terms=permitted_nodegroups))

                if include_provisional is False:
                    date_query.filter(
                        Terms(field="dates.provisional", terms=["false"]))

                elif include_provisional == "only provisional":
                    date_query.filter(
                        Terms(field="dates.provisional", terms=["true"]))

                if date_nodeid:
                    date_query.filter(
                        Term(field="dates.nodeid", term=date_nodeid))
                else:
                    date_ranges_query = Bool()
                    date_ranges_query.filter(inverted_date_ranges_query)
                    date_ranges_query.filter(
                        Terms(field="date_ranges.nodegroup_id",
                              terms=permitted_nodegroups))

                    if include_provisional is False:
                        date_ranges_query.filter(
                            Terms(field="date_ranges.provisional",
                                  terms=["false"]))

                    elif include_provisional == "only provisional":
                        date_ranges_query.filter(
                            Terms(field="date_ranges.provisional",
                                  terms=["true"]))

                    temporal_query.should(
                        Nested(path="date_ranges", query=date_ranges_query))
                temporal_query.should(Nested(path="dates", query=date_query))

            else:
                date_query = Bool()
                date_query.filter(
                    Range(field="dates.date",
                          gte=start_date.lower,
                          lte=end_date.upper))
                date_query.filter(
                    Terms(field="dates.nodegroup_id",
                          terms=permitted_nodegroups))

                if include_provisional is False:
                    date_query.filter(
                        Terms(field="dates.provisional", terms=["false"]))
                elif include_provisional == "only provisional":
                    date_query.filter(
                        Terms(field="dates.provisional", terms=["true"]))

                if date_nodeid:
                    date_query.filter(
                        Term(field="dates.nodeid", term=date_nodeid))
                else:
                    date_ranges_query = Bool()
                    date_ranges_query.filter(
                        Range(field="date_ranges.date_range",
                              gte=start_date.lower,
                              lte=end_date.upper,
                              relation="intersects"))
                    date_ranges_query.filter(
                        Terms(field="date_ranges.nodegroup_id",
                              terms=permitted_nodegroups))

                    if include_provisional is False:
                        date_ranges_query.filter(
                            Terms(field="date_ranges.provisional",
                                  terms=["false"]))
                    if include_provisional == "only provisional":
                        date_ranges_query.filter(
                            Terms(field="date_ranges.provisional",
                                  terms=["true"]))

                    temporal_query.should(
                        Nested(path="date_ranges", query=date_ranges_query))
                temporal_query.should(Nested(path="dates", query=date_query))

            search_query.filter(temporal_query)

            search_results_object["query"].add_query(search_query)
コード例 #16
0
    def append_dsl(self, search_results_object, permitted_nodegroups,
                   include_provisional):
        search_query = Bool()
        querysting_params = self.request.GET.get(details['componentname'], '')
        temporal_filter = JSONDeserializer().deserialize(querysting_params)
        if 'fromDate' in temporal_filter and 'toDate' in temporal_filter:
            #now = str(datetime.utcnow())
            start_date = ExtendedDateFormat(temporal_filter['fromDate'])
            end_date = ExtendedDateFormat(temporal_filter['toDate'])
            date_nodeid = str(
                temporal_filter['dateNodeId']
            ) if 'dateNodeId' in temporal_filter and temporal_filter[
                'dateNodeId'] != '' else None
            query_inverted = False if 'inverted' not in temporal_filter else temporal_filter[
                'inverted']

            temporal_query = Bool()

            if query_inverted:
                # inverted date searches need to use an OR clause and are generally more complicated to structure (can't use ES must_not)
                # eg: less than START_DATE OR greater than END_DATE
                inverted_date_query = Bool()
                inverted_date_ranges_query = Bool()

                if start_date.is_valid():
                    inverted_date_query.should(
                        Range(field='dates.date', lt=start_date.lower))
                    inverted_date_ranges_query.should(
                        Range(field='date_ranges.date_range',
                              lt=start_date.lower))
                if end_date.is_valid():
                    inverted_date_query.should(
                        Range(field='dates.date', gt=end_date.upper))
                    inverted_date_ranges_query.should(
                        Range(field='date_ranges.date_range',
                              gt=end_date.upper))

                date_query = Bool()
                date_query.filter(inverted_date_query)
                date_query.filter(
                    Terms(field='dates.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional is False:
                    date_query.filter(
                        Terms(field='dates.provisional', terms=['false']))

                elif include_provisional == 'only provisional':
                    date_query.filter(
                        Terms(field='dates.provisional', terms=['true']))

                if date_nodeid:
                    date_query.filter(
                        Term(field='dates.nodeid', term=date_nodeid))
                else:
                    date_ranges_query = Bool()
                    date_ranges_query.filter(inverted_date_ranges_query)
                    date_ranges_query.filter(
                        Terms(field='date_ranges.nodegroup_id',
                              terms=permitted_nodegroups))

                    if include_provisional is False:
                        date_ranges_query.filter(
                            Terms(field='date_ranges.provisional',
                                  terms=['false']))

                    elif include_provisional == 'only provisional':
                        date_ranges_query.filter(
                            Terms(field='date_ranges.provisional',
                                  terms=['true']))

                    temporal_query.should(
                        Nested(path='date_ranges', query=date_ranges_query))
                temporal_query.should(Nested(path='dates', query=date_query))

            else:
                date_query = Bool()
                date_query.filter(
                    Range(field='dates.date',
                          gte=start_date.lower,
                          lte=end_date.upper))
                date_query.filter(
                    Terms(field='dates.nodegroup_id',
                          terms=permitted_nodegroups))

                if include_provisional is False:
                    date_query.filter(
                        Terms(field='dates.provisional', terms=['false']))
                elif include_provisional == 'only provisional':
                    date_query.filter(
                        Terms(field='dates.provisional', terms=['true']))

                if date_nodeid:
                    date_query.filter(
                        Term(field='dates.nodeid', term=date_nodeid))
                else:
                    date_ranges_query = Bool()
                    date_ranges_query.filter(
                        Range(field='date_ranges.date_range',
                              gte=start_date.lower,
                              lte=end_date.upper,
                              relation='intersects'))
                    date_ranges_query.filter(
                        Terms(field='date_ranges.nodegroup_id',
                              terms=permitted_nodegroups))

                    if include_provisional is False:
                        date_ranges_query.filter(
                            Terms(field='date_ranges.provisional',
                                  terms=['false']))
                    if include_provisional == 'only provisional':
                        date_ranges_query.filter(
                            Terms(field='date_ranges.provisional',
                                  terms=['true']))

                    temporal_query.should(
                        Nested(path='date_ranges', query=date_ranges_query))
                temporal_query.should(Nested(path='dates', query=date_query))

            search_query.filter(temporal_query)

            search_results_object['query'].add_query(search_query)
コード例 #17
0
 def delete_index(self):
     se = SearchEngineFactory().create()
     query = Query(se, start=0, limit=10000)
     term = Term(field='id', term=self.id)
     query.add_query(term)
     query.delete(index='strings', doc_type='concept')