Exemple #1
0
def index_resources(clear_index=True,
                    batch_size=settings.BULK_IMPORT_BATCH_SIZE,
                    quiet=False):
    """
    Indexes all resources from the database

    Keyword Arguments:
    clear_index -- set to True to remove all the resources from the index before the reindexing operation
    batch_size -- the number of records to index as a group, the larger the number to more memory required
    quiet -- Silences the status bar output during certain operations, use in celery operations for example

    """

    if clear_index:
        q = Query(se=se)
        q.delete(index=TERMS_INDEX)

    resource_types = (models.GraphModel.objects.filter(
        isresource=True).exclude(
            graphid=settings.SYSTEM_SETTINGS_RESOURCE_MODEL_ID).values_list(
                "graphid", flat=True))
    index_resources_by_type(resource_types,
                            clear_index=clear_index,
                            batch_size=batch_size,
                            quiet=quiet)
Exemple #2
0
def clear_resources():
    """Removes all resource instances from your db and elasticsearch resource index"""
    se = SearchEngineFactory().create()
    match_all_query = Query(se)
    match_all_query.delete(index=TERMS_INDEX)
    match_all_query.delete(index=RESOURCES_INDEX)
    match_all_query.delete(index=RESOURCE_RELATIONS_INDEX)

    print(
        "deleting",
        Resource.objects.exclude(
            resourceinstanceid=settings.RESOURCE_INSTANCE_ID).count(),
        "resources")
    Resource.objects.exclude(
        resourceinstanceid=settings.RESOURCE_INSTANCE_ID).delete()
    print(
        Resource.objects.exclude(
            resourceinstanceid=settings.RESOURCE_INSTANCE_ID).count(),
        "resources remaining")

    print("deleting", models.ResourceXResource.objects.count(),
          "resource relationships")
    cursor = connection.cursor()
    cursor.execute("TRUNCATE public.resource_x_resource CASCADE;")
    print(models.ResourceXResource.objects.count(),
          "resource relationships remaining")
Exemple #3
0
def find_overlapping(request):
    '''This function queries ES when called via Ajax when a new geometry is created in the Location tab. If pre-existing resources are found within the perimeter of the polygon (or the buffered zone around a point/line/polygon), an alert is raised.'''
    geomString = request.GET.get('geom', '')
    geom = GEOSGeometry(geomString, srid=4326)
    mindistance = settings.METER_RADIUS
    if not mindistance:
        mindistance = 1000  # if settings.METER_RADIUS isn't set, default to 1Km
    geom.transform(3857)
    buffered_geom = geom.buffer(mindistance)
    buffered_geom.transform(4326)
    print geom, buffered_geom
    se = SearchEngineFactory().create()
    query = Query(se)
    boolfilter = Bool()
    geoshape = GeoShape(field='geometries.value',
                        type=buffered_geom.geom_type,
                        coordinates=buffered_geom.coords)
    nested = Nested(path='geometries', query=geoshape)
    boolfilter.must(nested)
    query.add_filter(boolfilter)
    results = query.search(index='entity', doc_type='')
    overlaps = []
    for hit in results['hits']['hits']:
        overlaps.append({
            'id': hit['_id'],
            'type': hit['_type'],
            'primaryname': hit['_source']['primaryname']
        })
    return JSONResponse(overlaps)
Exemple #4
0
def reverse_func(apps, schema_editor):

    Node = apps.get_model("models", "Node")
    Edge = apps.get_model("models", "Edge")

    for node in Node.objects.all():
        node.ontologyclass = str(node.ontologyclass).split("/")[-1]
        node.save()

    for edge in Edge.objects.all():
        edge.ontologyproperty = str(edge.ontologyproperty).split("/")[-1]
        edge.save()

    # remove index for base Arches concept
    se = SearchEngineFactory().create()
    query = Query(se, start=0, limit=10000)
    query.add_query(
        Term(field="conceptid", term="00000000-0000-0000-0000-000000000001"))
    query.delete(index="concepts")

    try:
        DValueType = apps.get_model("models", "DValueType")
        DValueType.objects.get(valuetype="identifier").delete()
    except Exception:
        pass
Exemple #5
0
        def get_relations(resourceinstanceid,
                          start,
                          limit,
                          resourceinstance_graphid=None):
            query = Query(se, start=start, limit=limit)
            bool_filter = Bool()
            bool_filter.should(
                Terms(field="resourceinstanceidfrom",
                      terms=resourceinstanceid))
            bool_filter.should(
                Terms(field="resourceinstanceidto", terms=resourceinstanceid))

            if resourceinstance_graphid:
                graph_id_filter = Bool()
                graph_id_filter.should(
                    Terms(field="resourceinstancefrom_graphid",
                          terms=resourceinstance_graphid))
                graph_id_filter.should(
                    Terms(field="resourceinstanceto_graphid",
                          terms=resourceinstance_graphid))
                bool_filter.must(graph_id_filter)

            query.add_query(bool_filter)

            return query.search(index=RESOURCE_RELATIONS_INDEX)
Exemple #6
0
    def delete(self, user={}, note=''):
        """
        Deletes a single resource and any related indexed data

        """

        se = SearchEngineFactory().create()
        related_resources = self.get_related_resources(lang="en-US",
                                                       start=0,
                                                       limit=1000,
                                                       page=0)
        for rr in related_resources['resource_relationships']:
            models.ResourceXResource.objects.get(pk=rr['resourcexid']).delete()
        query = Query(se)
        bool_query = Bool()
        bool_query.filter(
            Terms(field='resourceinstanceid', terms=[self.resourceinstanceid]))
        query.add_query(bool_query)
        results = query.search(index='strings',
                               doc_type='term')['hits']['hits']
        for result in results:
            se.delete(index='strings', doc_type='term', id=result['_id'])
        se.delete(index='resource',
                  doc_type=str(self.graph_id),
                  id=self.resourceinstanceid)

        self.save_edit(edit_type='delete', user=user, note=self.displayname)
        super(Resource, self).delete()
Exemple #7
0
def index_resources(clear_index=True,
                    index_name=None,
                    batch_size=settings.BULK_IMPORT_BATCH_SIZE):
    """
    Indexes all resources from the database

    Keyword Arguments:
    clear_index -- set to True to remove all the resources from the index before the reindexing operation
    index_name -- only applies to custom indexes and if given will try and just refresh the data in that index
    batch_size -- the number of records to index as a group, the larger the number to more memory required

    """

    if clear_index and index_name is None:
        q = Query(se=se)
        q.delete(index=TERMS_INDEX)

    resource_types = (models.GraphModel.objects.filter(
        isresource=True).exclude(
            graphid=settings.SYSTEM_SETTINGS_RESOURCE_MODEL_ID).values_list(
                "graphid", flat=True))
    index_resources_by_type(resource_types,
                            clear_index=clear_index,
                            index_name=index_name,
                            batch_size=batch_size)
Exemple #8
0
    def delete(self, *args, **kwargs):
        se = SearchEngineFactory().create()
        request = kwargs.pop("request", None)
        provisional_edit_log_details = kwargs.pop("provisional_edit_log_details", None)
        for tile in self.tiles:
            tile.delete(*args, request=request, **kwargs)
        try:
            user = request.user
            user_is_reviewer = request.user.groups.filter(name="Resource Reviewer").exists()
        except AttributeError:  # no user
            user = None

        if user_is_reviewer is True or self.user_owns_provisional(user):
            query = Query(se)
            bool_query = Bool()
            bool_query.filter(Terms(field="tileid", terms=[self.tileid]))
            query.add_query(bool_query)
            results = query.search(index="terms")["hits"]["hits"]

            for result in results:
                se.delete(index="terms", id=result["_id"])

            self.__preDelete(request)
            self.save_edit(
                user=request.user, edit_type="tile delete", old_value=self.data, provisional_edit_log_details=provisional_edit_log_details
            )
            super(Tile, self).delete(*args, **kwargs)
            resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid)
            resource.index()

        else:
            self.apply_provisional_edit(user, data={}, action="delete")
            super(Tile, self).save(*args, **kwargs)
Exemple #9
0
 def get_relations(resourceinstanceid, start, limit):
     query = Query(se, start=start, limit=limit)
     bool_filter = Bool()
     bool_filter.should(Terms(field="resourceinstanceidfrom", terms=resourceinstanceid))
     bool_filter.should(Terms(field="resourceinstanceidto", terms=resourceinstanceid))
     query.add_query(bool_filter)
     return query.search(index="resource_relations")
Exemple #10
0
def get_preflabel_from_conceptid(conceptid, lang):
    ret = None
    default = {
        "category": "",
        "conceptid": "",
        "language": "",
        "value": "",
        "type": "",
        "id": ""
    }
    se = SearchEngineFactory().create()
    query = Query(se)
    bool_query = Bool()
    bool_query.must(Match(field="type", query="prefLabel", type="phrase"))
    bool_query.filter(Terms(field="conceptid", terms=[conceptid]))
    query.add_query(bool_query)
    preflabels = query.search(index="concepts")["hits"]["hits"]
    for preflabel in preflabels:
        default = preflabel["_source"]
        # get the label in the preferred language, otherwise get the label in the default language
        if preflabel["_source"]["language"] == lang:
            return preflabel["_source"]
        if preflabel["_source"]["language"].split("-")[0] == lang.split(
                "-")[0]:
            ret = preflabel["_source"]
        if preflabel["_source"][
                "language"] == settings.LANGUAGE_CODE and ret is None:
            ret = preflabel["_source"]
    return default if ret is None else ret
Exemple #11
0
def get_related_resources(resourceid, lang, limit=1000, start=0):
    ret = {
        'resource_relationships': [],
        'related_resources': []
    }
    se = SearchEngineFactory().create()

    query = Query(se, limit=limit, start=start)
    query.add_filter(Terms(field='entityid1', terms=resourceid).dsl, operator='or')
    query.add_filter(Terms(field='entityid2', terms=resourceid).dsl, operator='or')
    resource_relations = query.search(index='resource_relations', doc_type='all')
    ret['total'] = resource_relations['hits']['total']

    entityids = set()
    for relation in resource_relations['hits']['hits']:
        relation['_source']['preflabel'] = get_preflabel_from_valueid(relation['_source']['relationshiptype'], lang)
        ret['resource_relationships'].append(relation['_source'])
        entityids.add(relation['_source']['entityid1'])
        entityids.add(relation['_source']['entityid2'])
    if len(entityids) > 0:
        entityids.remove(resourceid)   

    related_resources = se.search(index='entity', doc_type='_all', id=list(entityids))
    if related_resources:
        for resource in related_resources['docs']:
            ret['related_resources'].append(resource['_source'])

    return ret
Exemple #12
0
    def test_bulk_delete(self):
        """
        Test bulk deleting of documents in Elasticsearch

        """

        se = SearchEngineFactory().create()

        for i in range(10):
            x = {
                'id': i,
                'type': 'prefLabel',
                'value': 'test pref label',
            }
            se.index_data(index='test', body=x, idfield='id', refresh=True)
            y = {
                'id': i + 100,
                'type': 'altLabel',
                'value': 'test alt label',
            }
            se.index_data(index='test', body=y, idfield='id', refresh=True)

        time.sleep(1)
        
        query = Query(se, start=0, limit=100)
        match = Match(field='type', query='altLabel')
        query.add_query(match)

        query.delete(index='test', refresh=True)

        self.assertEqual(se.count(index='test'), 10)
Exemple #13
0
def get_preflabel_from_conceptid(conceptid, lang):
    ret = None
    default = {
        "category": "",
        "conceptid": "",
        "language": "",
        "value": "",
        "type": "",
        "id": ""
    }
    se = SearchEngineFactory().create()
    query = Query(se)
    terms = Terms(field='conceptid', terms=[conceptid])
    match = Match(field='type', query='preflabel', type='phrase')
    query.add_filter(terms)
    query.add_query(match)
    preflabels = query.search(index='concept_labels')['hits']['hits']
    for preflabel in preflabels:
        default = preflabel['_source']
        # get the label in the preferred language, otherwise get the label in the default language
        if preflabel['_source']['language'] == lang:
            return preflabel['_source']
        if preflabel['_source']['language'].split('-')[0] == lang.split(
                '-')[0]:
            ret = preflabel['_source']
        if preflabel['_source'][
                'language'] == settings.LANGUAGE_CODE and ret == None:
            ret = preflabel['_source']
    return default if ret == None else ret
Exemple #14
0
 def delete_index(self):
     se = SearchEngineFactory().create()
     query = Query(se, start=0, limit=10000)
     phrase = Match(field='conceptid', query=self.conceptid, type='phrase')
     query.add_query(phrase)
     query.delete(index='concept_labels')
     se.delete_terms(self.id)
Exemple #15
0
def search_results(request):
    se = SearchEngineFactory().create()
    search_results_object = {'query': Query(se)}

    include_provisional = get_provisional_type(request)
    permitted_nodegroups = get_permitted_nodegroups(request.user)

    search_filter_factory = SearchFilterFactory(request)
    try:
        for filter_type, querystring in list(
                request.GET.items()) + [('search-results', '')]:
            search_filter = search_filter_factory.get_filter(filter_type)
            if search_filter:
                search_filter.append_dsl(search_results_object,
                                         permitted_nodegroups,
                                         include_provisional)
    except Exception as err:
        return JSONResponse(err, status=500)

    dsl = search_results_object.pop('query', None)
    dsl.include('graph_id')
    dsl.include('root_ontology_class')
    dsl.include('resourceinstanceid')
    dsl.include('points')
    dsl.include('geometries')
    dsl.include('displayname')
    dsl.include('displaydescription')
    dsl.include('map_popup')
    dsl.include('provisional_resource')
    if request.GET.get('tiles', None) is not None:
        dsl.include('tiles')

    results = dsl.search(index='resources')

    if results is not None:
        # allow filters to modify the results
        for filter_type, querystring in list(
                request.GET.items()) + [('search-results', '')]:
            search_filter = search_filter_factory.get_filter(filter_type)
            if search_filter:
                search_filter.post_search_hook(search_results_object, results,
                                               permitted_nodegroups)

        ret = {}
        ret['results'] = results

        for key, value in list(search_results_object.items()):
            ret[key] = value

        ret['reviewer'] = request.user.groups.filter(
            name='Resource Reviewer').exists()
        ret['timestamp'] = datetime.now()
        ret['total_results'] = dsl.count(index='resources')

        return JSONResponse(ret)
    else:
        return HttpResponseNotFound(
            _("There was an error retrieving the search results"))
Exemple #16
0
    def delete(self, user={}, note=""):
        """
        Deletes a single resource and any related indexed data

        """

        permit_deletion = False
        graph = models.GraphModel.objects.get(graphid=self.graph_id)
        if graph.isactive is False:
            message = _("This model is not yet active; unable to delete.")
            raise ModelInactiveError(message)
        if user != {}:
            user_is_reviewer = user_is_resource_reviewer(user)
            if user_is_reviewer is False:
                tiles = list(models.TileModel.objects.filter(resourceinstance=self))
                resource_is_provisional = True if sum([len(t.data) for t in tiles]) == 0 else False
                if resource_is_provisional is True:
                    permit_deletion = True
            else:
                permit_deletion = True
        else:
            permit_deletion = True

        if permit_deletion is True:
            related_resources = self.get_related_resources(lang="en-US", start=0, limit=1000, page=0)
            for rr in related_resources["resource_relationships"]:
                # delete any related resource entries, also reindex the resource that references this resource that's being deleted
                try:
                    resourceXresource = models.ResourceXResource.objects.get(pk=rr["resourcexid"])
                    resource_to_reindex = (
                        resourceXresource.resourceinstanceidfrom_id
                        if resourceXresource.resourceinstanceidto_id == self.resourceinstanceid
                        else resourceXresource.resourceinstanceidto_id
                    )
                    resourceXresource.delete(deletedResourceId=self.resourceinstanceid)
                    res = Resource.objects.get(pk=resource_to_reindex)
                    res.load_tiles()
                    res.index()
                except ObjectDoesNotExist:
                    se.delete(index=RESOURCE_RELATIONS_INDEX, id=rr["resourcexid"])

            query = Query(se)
            bool_query = Bool()
            bool_query.filter(Terms(field="resourceinstanceid", terms=[self.resourceinstanceid]))
            query.add_query(bool_query)
            results = query.search(index=TERMS_INDEX)["hits"]["hits"]
            for result in results:
                se.delete(index=TERMS_INDEX, id=result["_id"])
            se.delete(index=RESOURCES_INDEX, id=self.resourceinstanceid)

            try:
                self.save_edit(edit_type="delete", user=user, note=self.displayname)
            except:
                pass
            super(Resource, self).delete()

        return permit_deletion
Exemple #17
0
 def delete_concept_values_index(concepts_to_delete):
     se = SearchEngineFactory().create()
     for concept in concepts_to_delete.itervalues():
         query = Query(se, start=0, limit=10000)
         term = Term(field='conceptid', term=concept.id)
         query.add_query(term)
         query.delete(index='concept_labels')
         for conceptvalue in concept.values:
             se.delete_terms(conceptvalue.id)
Exemple #18
0
 def get_relations(resourceinstanceid, start, limit):
     query = Query(se, limit=limit, start=start)
     bool_filter = Bool()
     bool_filter.should(
         Terms(field='resourceinstanceidfrom',
               terms=resourceinstanceid))
     bool_filter.should(
         Terms(field='resourceinstanceidto', terms=resourceinstanceid))
     query.add_query(bool_filter)
     return query.search(index='resource_relations', doc_type='all')
Exemple #19
0
def build_search_terms_dsl(request):
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    query = Query(se, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH)
    boolquery = Bool()
    boolquery.should(Match(field='term', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='term.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='term.folded', query=searchString.lower(), fuzziness='AUTO'))
    query.add_query(boolquery)
    return query
Exemple #20
0
def search_terms(request):
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    query = Query(se, start=0, limit=0)

    boolquery = Bool()
    boolquery.should(Match(field='value', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='value.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='value.folded', query=searchString.lower(), fuzziness='AUTO'))
    query.add_query(boolquery)

    base_agg = Aggregation(name='value_agg', type='terms', field='value.raw', size=settings.SEARCH_DROPDOWN_LENGTH, order={"max_score": "desc"})
    nodegroupid_agg = Aggregation(name='nodegroupid', type='terms', field='nodegroupid')
    top_concept_agg = Aggregation(name='top_concept', type='terms', field='top_concept')
    conceptid_agg = Aggregation(name='conceptid', type='terms', field='conceptid')
    max_score_agg = MaxAgg(name='max_score', script='_score')

    top_concept_agg.add_aggregation(conceptid_agg)
    base_agg.add_aggregation(max_score_agg)
    base_agg.add_aggregation(top_concept_agg)
    base_agg.add_aggregation(nodegroupid_agg)
    query.add_aggregation(base_agg)

    results = query.search(index='strings') or {'hits': {'hits':[]}}

    i = 0;
    ret = []
    for result in results['aggregations']['value_agg']['buckets']:
        if len(result['top_concept']['buckets']) > 0:
            for top_concept in result['top_concept']['buckets']:
                top_concept_id = top_concept['key']
                top_concept_label = get_preflabel_from_conceptid(top_concept['key'], lang)['value']
                for concept in top_concept['conceptid']['buckets']:
                    ret.append({
                        'type': 'concept',
                        'context': top_concept_id,
                        'context_label': top_concept_label,
                        'id': i,
                        'text': result['key'],
                        'value': concept['key']
                    })
                i = i + 1
        else:
            ret.append({
                'type': 'term',
                'context': '',
                'context_label': '',
                'id': i,
                'text': result['key'],
                'value': result['key']
            })
            i = i + 1

    return JSONResponse(ret)
Exemple #21
0
def index_resources_by_type(resource_types, clear_index=True, index_name=None, batch_size=settings.BULK_IMPORT_BATCH_SIZE):
    """
    Indexes all resources of a given type(s)

    Arguments:
    resource_types -- array of graph ids that represent resource types

    Keyword Arguments:
    clear_index -- set to True to remove all the resources of the types passed in from the index before the reindexing operation
    index_name -- only applies to custom indexes and if given will try and just refresh the data in that index
    batch_size -- the number of records to index as a group, the larger the number to more memory required

    """
    
    status = ''
    se = SearchEngineFactory().create()
    datatype_factory = DataTypeFactory()
    node_datatypes = {str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list('nodeid', 'datatype')}

    status = ''
    for resource_type in resource_types:
        start = datetime.now()
        resources = Resource.objects.filter(graph_id=str(resource_type))
        graph_name = models.GraphModel.objects.get(graphid=str(resource_type)).name
        print("Indexing resource type '{0}'".format(graph_name))

        if index_name is None:
            q = Query(se=se)
            term = Term(field='graph_id', term=str(resource_type))
            q.add_query(term)
            if clear_index:
                q.delete(index='resources', refresh=True)

            with se.BulkIndexer(batch_size=batch_size, refresh=True) as doc_indexer:
                with se.BulkIndexer(batch_size=batch_size, refresh=True) as term_indexer:
                    for resource in resources:
                        document, terms = resource.get_documents_to_index(fetchTiles=True, datatype_factory=datatype_factory, node_datatypes=node_datatypes)
                        doc_indexer.add(index='resources', id=document['resourceinstanceid'], data=document)
                        for term in terms:
                            term_indexer.add(index='terms', id=term['_id'], data=term['_source'])

            result_summary = {'database': len(resources), 'indexed': se.count(index='resources', body=q.dsl)}
            status = 'Passed' if result_summary['database'] == result_summary['indexed'] else 'Failed'
            print("Status: {0}, Resource Type: {1}, In Database: {2}, Indexed: {3}, Took: {4} seconds".format(status, graph_name, result_summary['database'], result_summary['indexed'], (datetime.now()-start).seconds))

            for index in settings.ELASTICSEARCH_CUSTOM_INDEXES:
                es_index = import_class_from_string(index['module'])(index['name'])
                es_index.bulk_index(resources=resources, resource_type=resource_type, graph_name=graph_name, clear_index=clear_index)

        else:
            es_index = get_index(index_name)
            es_index.bulk_index(resources=resources, resource_type=resource_type, graph_name=graph_name, clear_index=clear_index)

    return status
Exemple #22
0
def index_resource_relations(clear_index=True,
                             batch_size=settings.BULK_IMPORT_BATCH_SIZE):
    """
    Indexes all resource to resource relation records

    Keyword Arguments:
    clear_index -- set to True to remove all the resources from the index before the reindexing operation
    batch_size -- the number of records to index as a group, the larger the number to more memory required

    """

    start = datetime.now()
    print("Indexing resource to resource relations")

    cursor = connection.cursor()
    if clear_index:
        q = Query(se=se)
        q.delete(index=RESOURCE_RELATIONS_INDEX)

    with se.BulkIndexer(batch_size=batch_size,
                        refresh=True) as resource_relations_indexer:
        sql = """
            SELECT resourcexid, notes, datestarted, dateended, relationshiptype, resourceinstanceidfrom, resourceinstancefrom_graphid,
            resourceinstanceidto, resourceinstanceto_graphid, modified, created, inverserelationshiptype, tileid, nodeid
            FROM public.resource_x_resource
        """

        cursor.execute(sql)
        for resource_relation in cursor.fetchall():
            doc = {
                "resourcexid": resource_relation[0],
                "notes": resource_relation[1],
                "datestarted": resource_relation[2],
                "dateended": resource_relation[3],
                "relationshiptype": resource_relation[4],
                "resourceinstanceidfrom": resource_relation[5],
                "resourceinstancefrom_graphid": resource_relation[6],
                "resourceinstanceidto": resource_relation[7],
                "resourceinstanceto_graphid": resource_relation[8],
                "modified": resource_relation[9],
                "created": resource_relation[10],
                "inverserelationshiptype": resource_relation[11],
                "tileid": resource_relation[12],
                "nodeid": resource_relation[13],
            }
            resource_relations_indexer.add(index=RESOURCE_RELATIONS_INDEX,
                                           id=doc["resourcexid"],
                                           data=doc)

    index_count = se.count(index=RESOURCE_RELATIONS_INDEX)
    print("Status: {0}, In Database: {1}, Indexed: {2}, Took: {3} seconds".
          format("Passed" if cursor.rowcount == index_count else "Failed",
                 cursor.rowcount, index_count,
                 (datetime.now() - start).seconds))
Exemple #23
0
def map_layers(request, entitytypeid='all', get_centroids=False):
    data = []
    geom_param = request.GET.get('geom', None)

    bbox = request.GET.get('bbox', '')
    limit = request.GET.get('limit', settings.MAP_LAYER_FEATURE_LIMIT)
    entityids = request.GET.get('entityid', '')
    geojson_collection = {"type": "FeatureCollection", "features": []}

    se = SearchEngineFactory().create()
    query = Query(se, limit=limit)

    args = {'index': 'maplayers'}
    if entitytypeid != 'all':
        args['doc_type'] = entitytypeid
    if entityids != '':
        for entityid in entityids.split(','):
            geojson_collection['features'].append(
                se.search(index='maplayers', id=entityid)['_source'])
        return JSONResponse(geojson_collection)

    data = query.search(**args)
    if not data:
        return JSONResponse({})
    for item in data['hits']['hits']:
        # Ce uporabnik ni avtenticiran, prikazemo le veljavne (to je verjetno potrebno se dodelati (mogoce da vidijo le svoje???)!!!)
        if (not request.user.username != 'anonymous'):
            if (item['_source']['properties']['ewstatus'] !=
                    settings.PUBLISHED_LABEL):
                continue
        if get_centroids:
            item['_source']['geometry'] = item['_source']['properties'][
                'centroid']
            #item['_source'].pop('properties', None)
            item['_source']['properties'].pop('extent', None)
            item['_source']['properties'].pop('elements', None)
            item['_source']['properties'].pop('entitytypeid', None)
            item['_source']['properties'].pop('constructions', None)
            item['_source']['properties'].pop('centroid', None)
            item['_source']['properties'].pop('ewstatus', None)
            item['_source']['properties'].pop('address', None)
            item['_source']['properties'].pop('designations', None)
            item['_source']['properties'].pop('primaryname', None)
            item['_source']['properties'].pop('resource_type', None)
        elif geom_param != None:
            item['_source']['geometry'] = item['_source']['properties'][
                geom_param]
            item['_source']['properties'].pop('extent', None)
            item['_source']['properties'].pop(geom_param, None)
        else:
            item['_source']['properties'].pop('extent', None)
            item['_source']['properties'].pop('centroid', None)
        geojson_collection['features'].append(item['_source'])
    return JSONResponse(geojson_collection)
Exemple #24
0
def map_layers(request, entitytypeid='all', get_centroids=False):
    data = []

    geom_param = request.GET.get('geom', None)

    bbox = request.GET.get('bbox', '')
    limit = request.GET.get('limit', settings.MAP_LAYER_FEATURE_LIMIT)
    entityids = request.GET.get('entityid', '')
    geojson_collection = {"type": "FeatureCollection", "features": []}

    se = SearchEngineFactory().create()
    query = Query(se, limit=limit)

    args = {'index': 'maplayers'}
    if entitytypeid != 'all':
        args['doc_type'] = entitytypeid
    if entityids != '':
        for entityid in entityids.split(','):
            geojson_collection['features'].append(
                se.search(index='maplayers', id=entityid)['_source'])
        return JSONResponse(geojson_collection)

    if get_centroids:
        # If we are just fetching the centroids, we can do a slightly optimised query by having elasticsearch pull out relevant fields
        args['fields'] = [
            'properties.centroid.coordinates', 'type', '_source.id'
        ]
        data = query.search(**args)
        geojson_collection['features'] = [{
            "geometry": {
                "type": "Point",
                "coordinates":
                item['fields']['properties.centroid.coordinates']
            },
            "type": "Feature",
            "id": item['_id']
        } for item in data['hits']['hits']]

    else:
        # We need the full data for each record
        data = query.search(**args)
        for item in data['hits']['hits']:
            if geom_param != None:
                item['_source']['geometry'] = item['_source']['properties'][
                    geom_param]
                item['_source']['properties'].pop('extent', None)
                item['_source']['properties'].pop(geom_param, None)
            else:
                item['_source']['properties'].pop('extent', None)
                item['_source']['properties'].pop('centroid', None)
            geojson_collection['features'].append(item['_source'])

    return JSONResponse(geojson_collection)
Exemple #25
0
 def get_resource_bounds(node):
     query = Query(se, start=0, limit=0)
     search_query = Bool()
     query.add_query(search_query)
     query.add_aggregation(
         GeoBoundsAgg(field='points.point', name='bounds'))
     results = query.search(index='resource',
                            doc_type=[str(node.graph_id)])
     bounds = results['aggregations']['bounds'][
         'bounds'] if 'bounds' in results['aggregations'][
             'bounds'] else None
     return bounds
Exemple #26
0
    def delete_index(self, resourceinstanceid=None):
        """
        Deletes all references to a resource from all indexes

        Keyword Arguments:
        resourceinstanceid -- the resource instance id to delete from related indexes, if supplied will use this over self.resourceinstanceid
        """

        if resourceinstanceid is None:
            resourceinstanceid = self.resourceinstanceid
        resourceinstanceid = str(resourceinstanceid)

        # delete any related terms
        query = Query(se)
        bool_query = Bool()
        bool_query.filter(Terms(field="resourceinstanceid", terms=[resourceinstanceid]))
        query.add_query(bool_query)
        query.delete(index=TERMS_INDEX)

        # delete any related resource index entries
        query = Query(se)
        bool_query = Bool()
        bool_query.should(Terms(field="resourceinstanceidto", terms=[resourceinstanceid]))
        bool_query.should(Terms(field="resourceinstanceidfrom", terms=[resourceinstanceid]))
        query.add_query(bool_query)
        query.delete(index=RESOURCE_RELATIONS_INDEX)

        # reindex any related resources
        query = Query(se)
        bool_query = Bool()
        bool_query.filter(Nested(path="ids", query=Terms(field="ids.id", terms=[resourceinstanceid])))
        query.add_query(bool_query)
        results = query.search(index=RESOURCES_INDEX)["hits"]["hits"]
        for result in results:
            res = Resource.objects.get(pk=result["_id"])
            res.load_tiles()
            res.index()

        # delete resource index
        se.delete(index=RESOURCES_INDEX, id=resourceinstanceid)
Exemple #27
0
    def delete(self, user={}, note=''):
        """
        Deletes a single resource and any related indexed data

        """

        permit_deletion = False
        graph = models.GraphModel.objects.get(graphid=self.graph_id)
        if graph.isactive is False:
            message = _('This model is not yet active; unable to delete.')
            raise ModelInactiveError(message)
        if user != {}:
            user_is_reviewer = user.groups.filter(
                name='Resource Reviewer').exists()
            if user_is_reviewer is False:
                tiles = list(
                    models.TileModel.objects.filter(resourceinstance=self))
                resource_is_provisional = True if sum(
                    [len(t.data) for t in tiles]) == 0 else False
                if resource_is_provisional is True:
                    permit_deletion = True
            else:
                permit_deletion = True
        else:
            permit_deletion = True

        if permit_deletion is True:
            se = SearchEngineFactory().create()
            related_resources = self.get_related_resources(lang="en-US",
                                                           start=0,
                                                           limit=1000,
                                                           page=0)
            for rr in related_resources['resource_relationships']:
                models.ResourceXResource.objects.get(
                    pk=rr['resourcexid']).delete()
            query = Query(se)
            bool_query = Bool()
            bool_query.filter(
                Terms(field='resourceinstanceid',
                      terms=[self.resourceinstanceid]))
            query.add_query(bool_query)
            results = query.search(index='terms')['hits']['hits']
            for result in results:
                se.delete(index='terms', id=result['_id'])
            se.delete(index='resources', id=self.resourceinstanceid)

            self.save_edit(edit_type='delete',
                           user=user,
                           note=self.displayname)
            super(Resource, self).delete()

        return permit_deletion
Exemple #28
0
 def get_resource_bounds(node):
     query = Query(se, start=0, limit=0)
     search_query = Bool()
     query.add_query(search_query)
     query.add_aggregation(
         GeoBoundsAgg(field="points.point", name="bounds"))
     query.add_query(
         Term(field="graph_id", term=str(node.graph.graphid)))
     results = query.search(index=RESOURCES_INDEX)
     bounds = results["aggregations"]["bounds"][
         "bounds"] if "bounds" in results["aggregations"][
             "bounds"] else None
     return bounds
def get_related_resources(resourceid, lang='en-US', limit=1000, start=0):
    ret = {'resource_relationships': [], 'related_resources': []}
    se = SearchEngineFactory().create()

    query = Query(se, limit=limit, start=start)
    query.add_filter(Terms(field='entityid1', terms=resourceid).dsl,
                     operator='or')
    resource_relations = query.search(index='resource_relations',
                                      doc_type='all')
    ret['total'] = resource_relations['hits']['total']
    for relation in resource_relations['hits']['hits']:
        ret['resource_relationships'].append(relation['_source'])
    return ret
Exemple #30
0
def get_search_contexts(request):
    search_context = {}
    search_context = cache.get('search_contexts')
    if search_context is not None:
        #print 'Search_context iz cacha!'
        return search_context
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se1 = SearchEngineFactory().create()
    context_label1 = '-'
    search_context = {}
    for search_term in settings.SEARCH_TERMS:
        searchString1 = search_term['text']
        print searchString1
        query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH)
        boolquery1 = Bool()
        boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO'))
        boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO'))
        boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO'))
        query1.add_query(boolquery1)
        results1 = query1.search(index='term', doc_type='value')
        conceptid1 = ''
        context1 = ''
        for result1 in results1['hits']['hits']:
            prefLabel = get_preflabel_from_conceptid(result1['_source']['context'], lang)
            result1['_source']['options']['context_label'] = prefLabel['value']
            if (prefLabel['value'] == search_term['context_label'] and result1['_source']['term'] == search_term['text']):
                conceptid1 = result1['_source']['options']['conceptid']
                context1 = result1['_source']['context']
                #print search_term['context_label'] + ': ' + conceptid1
                #print searchString1
                #print result1
        result = {'conceptid': conceptid1, 'context': context1}
        if context_label1 <> search_term['context_label']:
            value = {}
        print result
        value[search_term['text_key']] = result
        #print value
        search_context[search_term['context_key']] = value
        #print search_context
        #print 'Iscem [' + search_term['context_label'] + '][' + search_term['text']  + ']'
        #print value
        context_label1 = search_term['context_label']
    #print search_context
    #print search_context['Historical_Period']['BRONZE_AGE']
    #print 'Shranjujem search_context v cache'
    cache.set('search_contexts', search_context, 86400)
    return search_context