Python SearchEngineFactory Exemples, arches.app.search.search_engine_factory.SearchEngineFactory Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : resource.py Projet : oswalpalash/arches

    def prepare_resource_relations_index(self, create=False):
        """
        Creates the settings and mappings in Elasticsearch to support related resources

        """

        index_settings = { 
            'mappings':{
                'all': {
                    'properties': {
                        'resourcexid': {'type': 'long'},
                        'notes': { 'type': 'string'},
                        'relationshiptype': {'type': 'string', 'index' : 'not_analyzed'},
                        'entityid2': {'type': 'string', 'index' : 'not_analyzed'},
                        'entityid1': {'type': 'string', 'index' : 'not_analyzed'}
                    }  
                }
            }
        }    

        if create:
            se = SearchEngineFactory().create()
            se.create_index(index='resource_relations', body=index_settings, ignore=400)

        return index_settings

Exemple #2

0

Afficher le fichier

Fichier : resources.py Projet : archesproject/arches

def resource_manager(request, resourcetypeid='', form_id='default', resourceid=''):

    if resourceid != '':
        resource = Resource(resourceid)
    elif resourcetypeid != '':
        resource = Resource({'entitytypeid': resourcetypeid})

    if form_id == 'default':
        form_id = resource.form_groups[0]['forms'][0]['id']

    form = resource.get_form(form_id)

    if request.method == 'DELETE':
        resource.delete_index()
        se = SearchEngineFactory().create()
        realtionships = resource.get_related_resources(return_entities=False)
        for realtionship in realtionships:
            se.delete(index='resource_relations', doc_type='all', id=realtionship.resourcexid)
            realtionship.delete()
        resource.delete()
        return JSONResponse({ 'success': True })

    if request.method == 'POST':
        data = JSONDeserializer().deserialize(request.POST.get('formdata', {}))
        form.update(data, request.FILES)

        with transaction.atomic():
            if resourceid != '':
                resource.delete_index()
            resource.save(user=request.user)
            resource.index()
            resourceid = resource.entityid

            return redirect('resource_manager', resourcetypeid=resourcetypeid, form_id=form_id, resourceid=resourceid)

    min_max_dates = models.Dates.objects.aggregate(Min('val'), Max('val'))
    
    if request.method == 'GET':
        if form != None:
            lang = request.GET.get('lang', settings.LANGUAGE_CODE)
            form.load(lang)
            return render(request, 'resource-manager.htm', {
                'form': form,
                'formdata': JSONSerializer().serialize(form.data),
                'form_template': 'views/forms/' + form_id + '.htm',
                'form_id': form_id,
                'resourcetypeid': resourcetypeid,
                'resourceid': resourceid,
                'main_script': 'resource-manager',
                'active_page': 'ResourceManger',
                'resource': resource,
                'resource_name': resource.get_primary_name(),
                'resource_type_name': resource.get_type_name(),
                'form_groups': resource.form_groups,
                'min_date': min_max_dates['val__min'].year if min_max_dates['val__min'] != None else 0,
                'max_date': min_max_dates['val__max'].year if min_max_dates['val__min'] != None else 1,
                'timefilterdata': JSONSerializer().serialize(Concept.get_time_filter_data()),
            })
        else:
            return HttpResponseNotFound('<h1>Arches form not found.</h1>')

Exemple #3

0

Afficher le fichier

Fichier : concept.py Projet : bojankastelic/zbiva

 def delete_index(self):   
     se = SearchEngineFactory().create() 
     query = Query(se, start=0, limit=10000)
     phrase = Match(field='conceptid', query=self.conceptid, type='phrase')
     query.add_query(phrase)
     query.delete(index='concept_labels')  
     se.delete_terms(self.id)

Exemple #4

0

Afficher le fichier

Fichier : index_database.py Projet : 1000camels/arches

def index_resources():
    """
    Deletes any existing indicies from elasticsearch related to resources 
    and then indexes all resources from the database

    """

    result_summary = {}
    se = SearchEngineFactory().create()

    # clear existing indexes
    for index_type in ['resource_relations', 'entity', 'resource', 'maplayers']:
        se.delete_index(index=index_type)
    se.delete(index='term', body='{"query":{"bool":{"must":[{"constant_score":{"filter":{"missing":{"field":"value.options.conceptid"}}}}],"must_not":[],"should":[]}}}')

    Resource().prepare_term_index(create=True)

    cursor = connection.cursor()
    cursor.execute("""select entitytypeid from data.entity_types where isresource = TRUE""")
    resource_types = cursor.fetchall()
    Resource().prepare_resource_relations_index(create=True)

    for resource_type in resource_types:
        Resource().prepare_search_index(resource_type[0], create=True)
    
    index_resources_by_type(resource_types, result_summary)

    se.es.indices.refresh(index='entity')
    for resource_type in resource_types:
        result_summary[resource_type[0]]['indexed'] = se.es.count(index="entity", doc_type=resource_type[0])['count']

    print '\nResource Index Results:'
    for k, v in result_summary.iteritems():
        status = 'Passed' if v['database'] == v['indexed'] else 'failed'
        print "Status: {0}, Resource Type: {1}, In Database: {2}, Indexed: {3}".format(status, k, v['database'], v['indexed'])

Exemple #5

0

Afficher le fichier

Fichier : concept.py Projet : bojankastelic/zbiva

 def get_scheme_id(self):
     se = SearchEngineFactory().create()
     result = se.search(index='concept_labels', id=self.id)
     if result['found']:
         return Concept(result['_type'])
     else:
         return None

Exemple #6

0

Afficher le fichier

Fichier : resource.py Projet : azerbini/eamena

    def get_related_resources(self, lang='en-US', limit=1000, start=0):
        """
        Returns an object that lists the related resources, the relationship types, and a reference to the current resource

        """

        ret = {
            'resource_instance': self,
            'resource_relationships': [],
            'related_resources': []
        }
        se = SearchEngineFactory().create()
        query = Query(se, limit=limit, start=start)
        bool_filter = Bool()
        bool_filter.should(Terms(field='resourceinstanceidfrom', terms=self.resourceinstanceid))
        bool_filter.should(Terms(field='resourceinstanceidto', terms=self.resourceinstanceid))
        query.add_query(bool_filter)
        resource_relations = query.search(index='resource_relations', doc_type='all')
        ret['total'] = resource_relations['hits']['total']
        instanceids = set()
        for relation in resource_relations['hits']['hits']:
            relation['_source']['preflabel'] = get_preflabel_from_valueid(relation['_source']['relationshiptype'], lang)
            ret['resource_relationships'].append(relation['_source'])
            instanceids.add(relation['_source']['resourceinstanceidto'])
            instanceids.add(relation['_source']['resourceinstanceidfrom'])
        if len(instanceids) > 0:
            instanceids.remove(str(self.resourceinstanceid))

        related_resources = se.search(index='resource', doc_type='_all', id=list(instanceids))
        if related_resources:
            for resource in related_resources['docs']:
                ret['related_resources'].append(resource['_source'])

        return ret

Exemple #7

0

Afficher le fichier

Fichier : index_database.py Projet : 1000camels/arches

def index_resources_by_type(resource_types, result_summary):
    """
    Collects and indexes all resources

    """

    for resource_type in resource_types:
        resources = archesmodels.Entities.objects.filter(entitytypeid = resource_type)
        print "Indexing {0} {1} resources".format(len(resources), resource_type[0])
        result_summary[resource_type[0]] = {'database':len(resources), 'indexed':0}
        errors = []
        for resource in resources:
            try:
                resource = Resource().get(resource.entityid)
                resource.index()
            except Exception as e:
                if e not in errors:
                    errors.append(e)
        if len(errors) > 0:
            print errors[0], ':', len(errors)

        se = SearchEngineFactory().create()
        related_resource_records = archesmodels.RelatedResource.objects.all()
        for related_resource_record in related_resource_records:
            se.index_data(index='resource_relations', doc_type='all', body=model_to_dict(related_resource_record), idfield='resourcexid')

    return result_summary

Exemple #8

0

Afficher le fichier

Fichier : concept.py Projet : azerbini/eamena2

def get_preflabel_from_valueid(valueid, lang):

    se = SearchEngineFactory().create()
    concept_label = se.search(index='concept_labels', id=valueid)
    if concept_label['found']:
#         print "ConceptID from ValueID: %s" % get_concept_label_from_valueid(valueid)
        return get_preflabel_from_conceptid(get_concept_label_from_valueid(valueid)['conceptid'], lang)

Exemple #9

0

Afficher le fichier

Fichier : concept.py Projet : azerbini/eamena

 def get_scheme_id(self):
     se = SearchEngineFactory().create()
     result = se.search(index='strings', doc_type='concept', id=self.id)
     if result['found']:
         return Concept(result['top_concept'])
     else:
         return None

Exemple #10

0

Afficher le fichier

Fichier : resource_delujoc.py Projet : bojankastelic/zbiva

    def prepare_search_index(self, resource_type_id, create=False):
        """
        Creates the settings and mappings in Elasticsearch to support resource search

        """

        index_settings = super(Resource, self).prepare_search_index(resource_type_id, create=False)

        index_settings['mappings'][resource_type_id]['properties']['date_groups'] = { 
            'properties' : {
                'conceptid': {'type' : 'string', 'index' : 'not_analyzed'}
            }
        }
        
        #index_settings['mappings'][resource_type_id]['properties']['measurement_groups'] = { 
        #    'properties' : {
        #        'conceptid': {'type' : 'string', 'index' : 'not_analyzed'}
        #    }
        #}

        if create:
            se = SearchEngineFactory().create()
            try:
                se.create_index(index='entity', body=index_settings)
            except:
                index_settings = index_settings['mappings']
                se.create_mapping(index='entity', doc_type=resource_type_id, body=index_settings)

Exemple #11

0

Afficher le fichier

Fichier : mappings.py Projet : fargeo/arches

def prepare_resource_relations_index(create=False):
    """
    Creates the settings and mappings in Elasticsearch to support related resources

    """

    index_settings = {
        'mappings': {
            'all': {
                'properties': {
                    'resourcexid': {'type': 'keyword'},
                    'notes': {'type': 'text'},
                    'relationshiptype': {'type': 'keyword'},
                    'resourceinstanceidfrom': {'type': 'keyword'},
                    'resourceinstanceidto': {'type': 'keyword'},
                    'created': {'type': 'keyword'},
                    'modified': {'type': 'keyword'}
                }
            }
        }
    }

    if create:
        se = SearchEngineFactory().create()
        se.create_index(index='resource_relations', body=index_settings, ignore=400)

    return index_settings

Exemple #12

0

Afficher le fichier

Fichier : resources.py Projet : mradamcox/afrh

def related_resources(request, resourceid):

    ## get allowed resource types based on permissions
    allowedtypes = get_allowed_types(request)
    is_anon = False
    if request.user.username == "anonymous":
        is_anon = True
    
    if request.method == 'GET':
        lang = request.GET.get('lang', settings.LANGUAGE_CODE)
        start = request.GET.get('start', 0)
        resources = get_related_resources(resourceid, lang, start=start, limit=15, allowedtypes=allowedtypes, is_anon=is_anon)
        return JSONResponse(resources, indent=4)
    
    if 'edit' in request.user.user_groups and request.method == 'DELETE':
        se = SearchEngineFactory().create()
        data = JSONDeserializer().deserialize(request.body) 
        entityid1 = data.get('entityid1')
        entityid2 = data.get('entityid2')
        resourcexid = data.get('resourcexid')
        realtionshiptype = data.get('realtionshiptype')
        resource = Resource(entityid1)
        resource.delete_resource_relationship(entityid2, realtionshiptype)
        se.delete(index='resource_relations', doc_type='all', id=resourcexid)
        return JSONResponse({ 'success': True })

Exemple #13

0

Afficher le fichier

Fichier : resources.py Projet : archesproject/arches

def get_related_resources(resourceid, lang, limit=1000, start=0):
    ret = {
        'resource_relationships': [],
        'related_resources': []
    }
    se = SearchEngineFactory().create()

    query = Query(se, limit=limit, start=start)
    query.add_filter(Terms(field='entityid1', terms=resourceid).dsl, operator='or')
    query.add_filter(Terms(field='entityid2', terms=resourceid).dsl, operator='or')
    resource_relations = query.search(index='resource_relations', doc_type='all')
    ret['total'] = resource_relations['hits']['total']

    entityids = set()
    for relation in resource_relations['hits']['hits']:
        relation['_source']['preflabel'] = get_preflabel_from_valueid(relation['_source']['relationshiptype'], lang)
        ret['resource_relationships'].append(relation['_source'])
        entityids.add(relation['_source']['entityid1'])
        entityids.add(relation['_source']['entityid2'])
    if len(entityids) > 0:
        entityids.remove(resourceid)   

    related_resources = se.search(index='entity', doc_type='_all', id=list(entityids))
    if related_resources:
        for resource in related_resources['docs']:
            ret['related_resources'].append(resource['_source'])

    return ret

Exemple #14

0

Afficher le fichier

Fichier : models.py Projet : mradamcox/arches

 def save(self):
     from arches.app.search.search_engine_factory import SearchEngineFactory
     se = SearchEngineFactory().create()
     if not self.created:
         self.created = datetime.datetime.now()
     self.modified = datetime.datetime.now()
     document = model_to_dict(self)
     se.index_data(index='resource_relations', doc_type='all', body=document, idfield='resourcexid')
     super(ResourceXResource, self).save()

Exemple #15

0

Afficher le fichier

Fichier : datatypes.py Projet : mradamcox/arches

 def get_resource_names(self, nodevalue):
     resource_names = set([])
     es = Elasticsearch()
     se = SearchEngineFactory().create()
     id_list = self.get_id_list(nodevalue)
     for resourceid in id_list:
         print resourceid
         resource_document = se.search(index='resource', doc_type='_all', id=resourceid)
         resource_names.add(resource_document['_source']['displayname'])
     return resource_names

Exemple #16

0

Afficher le fichier

Fichier : resources.py Projet : mradamcox/afrh

def get_related_resources(resourceid, lang, limit=1000, start=0, allowedtypes=[], is_anon=False):

    ret = {
        'resource_relationships': [],
        'related_resources': []
    }
    se = SearchEngineFactory().create()

    query = Query(se, limit=limit, start=start)
    query.add_filter(Terms(field='entityid1', terms=resourceid).dsl, operator='or')
    query.add_filter(Terms(field='entityid2', terms=resourceid).dsl, operator='or')
    resource_relations = query.search(index='resource_relations', doc_type="all")

    entityids = set()
    for relation in resource_relations['hits']['hits']:
        relation['_source']['preflabel'] = get_preflabel_from_valueid(relation['_source']['relationshiptype'], lang)
        ret['resource_relationships'].append(relation['_source'])
        entityids.add(relation['_source']['entityid1'])
        entityids.add(relation['_source']['entityid2'])
    if len(entityids) > 0:
        entityids.remove(resourceid)

    # can't figure why passing allowed types to doc_type param doesn't work,
    # so filter is carried out later
    related_resources = se.search(index='entity', doc_type='_all', id=list(entityids))

    filtered_ids = []
    if related_resources:
        for resource in related_resources['docs']:
            if not resource['_type'] in allowedtypes:
                filtered_ids.append(resource['_source']['entityid'])
                continue
            
            if is_anon:
                # filter out protected resources if user is anonymous
                # (this is basically a subset of the get_protected_entityids below
                # they should be combined probably)
                from search import get_protection_conceptids
                protect_id = get_protection_conceptids(settings.PROTECTION_LEVEL_NODE)
                conceptids = [d['conceptid'] for d in resource['_source']['domains']]
                if protect_id in conceptids:
                    filtered_ids.append(resource['_source']['entityid'])
                    continue
            ret['related_resources'].append(resource['_source'])
    
    if len(filtered_ids) > 0:
        # remove all relationships in ret that match a filtered id (this lc is yuge but I think concise)
        filtered_relationships = [rel for rel in ret['resource_relationships'] if not rel['entityid1'] in filtered_ids and not rel['entityid2'] in filtered_ids]
        
        # update ret values
        ret['resource_relationships'] = filtered_relationships
        
    ret['total'] = len(ret['resource_relationships'])
    
    return ret

Exemple #17

0

Afficher le fichier

Fichier : concept.py Projet : azerbini/eamena

    def index(self, scheme=None):
        if self.category == 'label':
            se = SearchEngineFactory().create()
            data = JSONSerializer().serializeToPython(self)
            if scheme == None:
                scheme = self.get_scheme_id()
            if scheme == None:
                raise Exception('Index of label failed.  Index type (scheme id) could not be derived from the label.')

            data['top_concept'] = scheme.id
            se.index_data('strings', 'concept', data, 'id')

Exemple #18

0

Afficher le fichier

Fichier : resources.py Projet : bojankastelic/ew

def map_layers(request, entitytypeid='all', get_centroids=False):
    data = []
    geom_param = request.GET.get('geom', None)

    bbox = request.GET.get('bbox', '')
    limit = request.GET.get('limit', settings.MAP_LAYER_FEATURE_LIMIT)
    entityids = request.GET.get('entityid', '')
    geojson_collection = {
      "type": "FeatureCollection",
      "features": []
    }
    
    se = SearchEngineFactory().create()
    query = Query(se, limit=limit)

    args = { 'index': 'maplayers' }
    if entitytypeid != 'all':
        args['doc_type'] = entitytypeid
    if entityids != '':
        for entityid in entityids.split(','):
            geojson_collection['features'].append(se.search(index='maplayers', id=entityid)['_source'])
        return JSONResponse(geojson_collection)

    data = query.search(**args)
    if not data:
        return JSONResponse({})
    for item in data['hits']['hits']:
        # Ce uporabnik ni avtenticiran, prikazemo le veljavne (to je verjetno potrebno se dodelati (mogoce da vidijo le svoje???)!!!)
        if (not request.user.username != 'anonymous'):
            if (item['_source']['properties']['ewstatus'] != settings.PUBLISHED_LABEL):
                continue
        if get_centroids:
            item['_source']['geometry'] = item['_source']['properties']['centroid']
            #item['_source'].pop('properties', None)
            item['_source']['properties'].pop('extent', None)
            item['_source']['properties'].pop('elements', None)
            item['_source']['properties'].pop('entitytypeid', None)
            item['_source']['properties'].pop('constructions', None)
            item['_source']['properties'].pop('centroid', None)
            item['_source']['properties'].pop('ewstatus', None)
            item['_source']['properties'].pop('address', None)
            item['_source']['properties'].pop('designations', None)
            item['_source']['properties'].pop('primaryname', None)
            item['_source']['properties'].pop('resource_type', None)
        elif geom_param != None:
            item['_source']['geometry'] = item['_source']['properties'][geom_param]
            item['_source']['properties'].pop('extent', None)
            item['_source']['properties'].pop(geom_param, None)
        else:
            item['_source']['properties'].pop('extent', None)
            item['_source']['properties'].pop('centroid', None)
        geojson_collection['features'].append(item['_source'])
    return JSONResponse(geojson_collection)

Exemple #19

0

Afficher le fichier

Fichier : resources.py Projet : mradamcox/afrh

def map_layers(request, entitytypeid='all', get_centroids=False):

    data = []

    geom_param = request.GET.get('geom', None)

    bbox = request.GET.get('bbox', '')
    limit = request.GET.get('limit', settings.MAP_LAYER_FEATURE_LIMIT)
    entityids = request.GET.get('entityid', '')
    geojson_collection = {
      "type": "FeatureCollection",
      "features": []
    }
    
    se = SearchEngineFactory().create()
    query = Query(se, limit=limit)

    args = { 'index': 'maplayers' }
    if entitytypeid != 'all':
        args['doc_type'] = entitytypeid
    if entityids != '':
        for entityid in entityids.split(','):
            geojson_collection['features'].append(se.search(index='maplayers', id=entityid)['_source'])
        return JSONResponse(geojson_collection)

    data = query.search(**args)
    
    # if anonymous user, get list of protected entity ids to be excluded from map
    protected = []
    
    if request.user.username == 'anonymous':
        protected = get_protected_entityids()
        print protected

    for item in data['hits']['hits']:
        if item['_id'] in protected:
            print "hide this one"
            print json.dumps(item,indent=2)
            continue
        if get_centroids:
            item['_source']['geometry'] = item['_source']['properties']['centroid']
            item['_source'].pop('properties', None)
        elif geom_param != None:
            item['_source']['geometry'] = item['_source']['properties'][geom_param]
            item['_source']['properties'].pop('extent', None)
            item['_source']['properties'].pop(geom_param, None)
        else:
            item['_source']['properties'].pop('extent', None)
            item['_source']['properties'].pop('centroid', None)
        geojson_collection['features'].append(item['_source'])

    return JSONResponse(geojson_collection)

Exemple #20

0

Afficher le fichier

Fichier : concept_import_tests.py Projet : archesproject/arches

 def setUpClass(cls):
     se = SearchEngineFactory().create()
     se.delete_index(index="concept_labels")
     se.delete_index(index="term")
     se.create_index(index="concept_labels")
     se.create_index(index="term")
     management.call_command(
         "packages", operation="import_json", source="tests/fixtures/resource_graphs/archesv4_resource.json"
     )

Exemple #21

0

Afficher le fichier

Fichier : resource.py Projet : fargeo/arches

    def get(self, request, resourceid=None):
        if resourceid is not None:
            se = SearchEngineFactory().create()
            document = se.search(index='resource', doc_type='_all', id=resourceid)
            resource = Resource.objects.get(pk=resourceid)
            return JSONResponse({
                'graphid': document['_source']['graph_id'],
                'graph_name': resource.graph.name,
                'displaydescription': document['_source']['displaydescription'],
                'map_popup': document['_source']['map_popup'],
                'displayname': document['_source']['displayname'],
                'geometries': document['_source']['geometries'],
            })

        return HttpResponseNotFound()

Exemple #22

0

Afficher le fichier

Fichier : resource.py Projet : azerbini/eamena

    def index(self):
        """
        Indexes all the nessesary items values of a resource to support search

        """

        se = SearchEngineFactory().create()
        datatype_factory = DataTypeFactory()
        node_datatypes = {str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list('nodeid', 'datatype')}

        document, terms = self.get_documents_to_index(datatype_factory=datatype_factory, node_datatypes=node_datatypes)
        se.index_data('resource', self.graph_id, JSONSerializer().serializeToPython(document), id=self.pk)

        for term in terms:
            se.index_data('strings', 'term', term['_source'], id=term['_id'])

Exemple #23

0

Afficher le fichier

Fichier : resource.py Projet : fargeo/arches

    def index(self):
        """
        Indexes all the nessesary items values of a resource to support search

        """
        if unicode(self.graph_id) != unicode(settings.SYSTEM_SETTINGS_RESOURCE_MODEL_ID):
            se = SearchEngineFactory().create()
            datatype_factory = DataTypeFactory()
            node_datatypes = {str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list('nodeid', 'datatype')}
            document, terms = self.get_documents_to_index(datatype_factory=datatype_factory, node_datatypes=node_datatypes)
            document['root_ontology_class'] = self.get_root_ontology()
            se.index_data('resource', self.graph_id, JSONSerializer().serializeToPython(document), id=self.pk)

            for term in terms:
                se.index_data('strings', 'term', term['_source'], id=term['_id'])

Exemple #24

0

Afficher le fichier

Fichier : FixingMethods.py Projet : azerbini/eamena2

def IndexConceptFixer(source):
    """
    Simple utility to delete the ES index of a given list of conceptids
    """

    with open(source, 'rb') as csvfile:
        reader = csv.DictReader(csvfile, delimiter= '|')
        se = SearchEngineFactory().create() 
        for row in reader:
            try:
                
                conceptvalues = Values.objects.filter(conceptid = row['conceptid'])
                for conceptvalue in conceptvalues:
                    se.delete_terms(conceptvalue.valueid)
            except:
                print "Concept Value %s does not exist" % row['conceptid']

Exemple #25

0

Afficher le fichier

Fichier : index.py Projet : 1000camels/arches

    def index_concepts_for_search(self):
        # see http://sqlblog.com/blogs/adam_machanic/archive/2006/07/12/swinging-from-tree-to-tree-using-ctes-part-1-adjacency-to-nested-sets.aspx          
        # Value of Lft for the root node is 1
        # Value of Rgt for the root node is 2 * (Number of nodes)
        # Value of Lft for any node is ((Number of nodes visited) * 2) - (Level of current node)
        # Value of Rgt for any node is (Lft value) + ((Number of subnodes) * 2) + 1 
     

        sys.setrecursionlimit(3000)
        se = SearchEngineFactory().create()
        se.create_mapping('concept', 'all', 'conceptid', 'string', 'not_analyzed')
        se.create_mapping('concept', 'all', 'labelid', 'string', 'not_analyzed')

        def _findNarrowerConcept(conceptid, ret=None, limit=200000, level=1):
            returnobj = {'subnodes': 0}
            if ret == None: # the root node
                labels = archesmodels.Values.objects.filter(conceptid = conceptid)
                ret = {}
                nodesvisited = len(ret) + 1
                ret[conceptid] = {'labels': [], 'left': (nodesvisited*2)-level, 'right': 0}               
                for label in labels:
                    ret[conceptid]['labels'].append({'labelid': label.pk, 'label': label.value})
                level = level + 1

            conceptrealations = archesmodels.ConceptRelations.objects.filter(conceptidfrom = conceptid)
            for relation in conceptrealations:
                nodesvisited = len(ret) + 1
                labels = archesmodels.Values.objects.filter(conceptid = relation.conceptidto)
                ret[relation.conceptidto_id] = {'labels': [], 'left': (nodesvisited*2)-level, 'right': 0}
                for label in labels:
                    ret[relation.conceptidto_id]['labels'].append({'labelid': label.pk, 'label': label.value})
                returnobj = _findNarrowerConcept(relation.conceptidto_id, ret=ret, level=level+1)      
            
            subnodes = returnobj['subnodes']
            if subnodes == 0: # meaning we're at a leaf node
                ret[conceptid]['right'] = ret[conceptid]['left'] + 1
            else:
                ret[conceptid]['right'] = subnodes + 1
            return {'all_concepts': ret, 'subnodes': ret[conceptid]['right']}

        concepts = _findNarrowerConcept('00000000-0000-0000-0000-000000000003')
        
        all_concepts = []
        for key, concept in concepts['all_concepts'].iteritems():
            all_concepts.append({'conceptid': key, 'labels': concept['labels'], 'left': concept['left'], 'right': concept['right']})

        self.index(all_concepts, 'concept', 'all', 'conceptid')

Exemple #26

0

Afficher le fichier

Fichier : resources.py Projet : archesproject/arches

def related_resources(request, resourceid):
    if request.method == 'GET':
        lang = request.GET.get('lang', settings.LANGUAGE_CODE)
        start = request.GET.get('start', 0)
        return JSONResponse(get_related_resources(resourceid, lang, start=start, limit=15), indent=4)
    
    if 'edit' in request.user.user_groups and request.method == 'DELETE':
        se = SearchEngineFactory().create()
        data = JSONDeserializer().deserialize(request.body) 
        entityid1 = data.get('entityid1')
        entityid2 = data.get('entityid2')
        resourcexid = data.get('resourcexid')
        realtionshiptype = data.get('realtionshiptype')
        resource = Resource(entityid1)
        resource.delete_resource_relationship(entityid2, realtionshiptype)
        se.delete(index='resource_relations', doc_type='all', id=resourcexid)
        return JSONResponse({ 'success': True })

Exemple #27

0

Afficher le fichier

Fichier : index_database.py Projet : 1000camels/arches

def index_concepts():
    """
    Collects all concepts and indexes both concepts and concept_labels

    """

    se = SearchEngineFactory().create()
    se.delete_index(index='concept_labels')
    se.delete(index='term', body='{"query":{"bool":{"must_not":[{"constant_score":{"filter":{"missing":{"field":"value.options.conceptid"}}}}],"must":[],"should":[]}}}')

    Resource().prepare_term_index(create=True)

    print 'indexing concepts'
    start = datetime.now()

    cursor = connection.cursor()
    cursor.execute("""select conceptid from concepts.concepts""")
    conceptids = cursor.fetchall()
    for c in conceptids:
        if c[0] not in CORE_CONCEPTS:
            concept = Concept().get(id=c[0], include_subconcepts=True, include_parentconcepts=False, include=['label'])
            concept.index()

    end = datetime.now()
    duration = end - start
    print 'indexing concepts required', duration.seconds, 'seconds'

    cursor = connection.cursor()
    sql = """
        select conceptid, conceptlabel from concepts.vw_concepts where conceptid not in ('%s')
    """ % ("','".join(CORE_CONCEPTS))
    cursor.execute(sql)
    concepts = cursor.fetchall()
    concept_index_results = {'count':len(concepts), 'passed':0, 'failed':0}

    for conceptid, conceptvalue in concepts:
        result = get_indexed_concepts(se, conceptid, conceptvalue)
        if result != 'passed':
            concept_index_results['failed'] += 1
        else:
            concept_index_results['passed'] += 1

    status = 'Passed' if concept_index_results['failed'] == 0 else 'Failed'
    print '\nConcept Index Results:'
    print "Status: {0}, In Database: {1}, Indexed: {2}".format(status, concept_index_results['count'], concept_index_results['passed'])

Exemple #28

0

Afficher le fichier

Fichier : resources.py Projet : mradamcox/afrh

def get_protected_entityids():
    '''returns list of entity ids for protected resources'''
    
    from search import get_protection_conceptids
    protect_id = get_protection_conceptids(settings.PROTECTION_LEVEL_NODE)
    filtered_ids = []
    se = SearchEngineFactory().create()
    
    # for some reason doc_type must be speficied with INFORMATION RESOURCE in order for that type
    # to be queried. right now this is ok, because it's the only type with protection levels,
    # but this is very strange.
    all_resources = se.search(index='entity', doc_type="INFORMATION_RESOURCE.E73")['hits']['hits']
    for resource in all_resources:
        conceptids = [d['conceptid'] for d in resource['_source']['domains']]
        if protect_id in conceptids:
            filtered_ids.append(resource['_source']['entityid'])

    return filtered_ids

Exemple #29

0

Afficher le fichier

Fichier : resource.py Projet : fargeo/arches

    def bulk_save(resources):
        """
        Saves and indexes a list of resources

        Arguments:
        resources -- a list of resource models

        """

        se = SearchEngineFactory().create()
        datatype_factory = DataTypeFactory()
        node_datatypes = {str(nodeid): datatype for nodeid, datatype in models.Node.objects.values_list('nodeid', 'datatype')}
        tiles = []
        documents = []
        term_list = []

        # flatten out the nested tiles into a single array
        for resource in resources:
            for parent_tile in resource.tiles:
                for child_tile in parent_tile.tiles.itervalues():
                    if len(child_tile) > 0:
                        resource.tiles.extend(child_tile)
                parent_tile.tiles = {}

            tiles.extend(resource.tiles)

        # need to save the models first before getting the documents for index
        Resource.objects.bulk_create(resources)
        TileModel.objects.bulk_create(tiles)

        for resource in resources:
            resource.save_edit(edit_type='create')
            document, terms = resource.get_documents_to_index(fetchTiles=False, datatype_factory=datatype_factory, node_datatypes=node_datatypes)
            document['root_ontology_class'] = resource.get_root_ontology()
            documents.append(se.create_bulk_item(index='resource', doc_type=document['graph_id'], id=document['resourceinstanceid'], data=document))
            for term in terms:
                term_list.append(se.create_bulk_item(index='strings', doc_type='term', id=term['_id'], data=term['_source']))

        for tile in tiles:
            tile.save_edit(edit_type='tile create', new_value=tile.data)
        # bulk index the resources, tiles and terms
        se.bulk_index(documents)
        se.bulk_index(term_list)

Exemple #30

0

Afficher le fichier

Fichier : migrate_resources.py Projet : eamena-oxford/eamena-core-arches3

def add_resource_relation(entityid1, entityid2, relationship_type_string):
    # find the relationship type
    se = SearchEngineFactory().create()
    try:

        logging.warning("finding relationship: %s", relationship_type_string)
        value = models.Values.objects.get(
            value__icontains=relationship_type_string)
        relationship = models.RelatedResource(entityid1=entityid1,
                                              entityid2=entityid2,
                                              relationshiptype=value.pk)
        relationship.save()
        se.index_data(index='resource_relations',
                      doc_type='all',
                      body=model_to_dict(relationship),
                      idfield='resourcexid')
        logging.warning("Added relationship")
    except Exception as e:
        logging.warning("Unable to create relation %s to %s. %s", entityid1,
                        entityid2, e)

Exemple #31

0

Afficher le fichier

    def delete(self, *args, **kwargs):
        se = SearchEngineFactory().create()
        request = kwargs.pop('request', None)
        for tiles in self.tiles.itervalues():
            for tile in tiles:
                tile.delete(*args, request=request, **kwargs)

        query = Query(se)
        bool_query = Bool()
        bool_query.filter(Terms(field='tileid', terms=[self.tileid]))
        query.add_query(bool_query)
        results = query.search(index='strings', doc_type='term')['hits']['hits']
        for result in results:
            se.delete(index='strings', doc_type='term', id=result['_id'])

        self.__preDelete(request)
        self.save_edit(user=request.user, edit_type='tile delete', old_value=self.data)
        super(Tile, self).delete(*args, **kwargs)
        resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid)
        resource.index()

Exemple #32

0

Afficher le fichier

Fichier : resource.py Projet : KSWadwicz/arches

    def get(self, request, resourceid=None):
        if Resource.objects.filter(pk=resourceid).exclude(pk=settings.SYSTEM_SETTINGS_RESOURCE_ID).exists():
            try:
                resource = Resource.objects.get(pk=resourceid)
                se = SearchEngineFactory().create()
                document = se.search(index="resources", id=resourceid)
                return JSONResponse(
                    {
                        "graphid": document["_source"]["graph_id"],
                        "graph_name": resource.graph.name,
                        "displaydescription": document["_source"]["displaydescription"],
                        "map_popup": document["_source"]["map_popup"],
                        "displayname": document["_source"]["displayname"],
                        "geometries": document["_source"]["geometries"],
                    }
                )
            except Exception as e:
                logger.exception(_("Failed to fetch resource instance descriptors"))

        return HttpResponseNotFound()

Exemple #33

0

Afficher le fichier

Fichier : search.py Projet : pierrechoffe/arches

def build_search_terms_dsl(request):
    se = SearchEngineFactory().create()
    searchString = request.GET.get('q', '')
    query = Query(se, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH)
    boolquery = Bool()
    boolquery.should(Match(field='term', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='term.folded', query=searchString.lower(), type='phrase_prefix', fuzziness='AUTO'))
    boolquery.should(Match(field='term.folded', query=searchString.lower(), fuzziness='AUTO'))
    query.add_query(boolquery)

    return query

Exemple #34

0

Afficher le fichier

    def get_related_resources(self, lang='en-US', limit=1000, start=0):
        """
        Returns an object that lists the related resources, the relationship types, and a reference to the current resource

        """

        ret = {
            'resource_instance': self,
            'resource_relationships': [],
            'related_resources': []
        }
        se = SearchEngineFactory().create()
        query = Query(se, limit=limit, start=start)
        bool_filter = Bool()
        bool_filter.should(
            Terms(field='resourceinstanceidfrom',
                  terms=self.resourceinstanceid))
        bool_filter.should(
            Terms(field='resourceinstanceidto', terms=self.resourceinstanceid))
        query.add_query(bool_filter)
        resource_relations = query.search(index='resource_relations',
                                          doc_type='all')
        ret['total'] = resource_relations['hits']['total']
        instanceids = set()
        for relation in resource_relations['hits']['hits']:
            relation['_source']['preflabel'] = get_preflabel_from_valueid(
                relation['_source']['relationshiptype'], lang)
            ret['resource_relationships'].append(relation['_source'])
            instanceids.add(relation['_source']['resourceinstanceidto'])
            instanceids.add(relation['_source']['resourceinstanceidfrom'])
        if len(instanceids) > 0:
            instanceids.remove(str(self.resourceinstanceid))

        related_resources = se.search(index='resource',
                                      doc_type='_all',
                                      id=list(instanceids))
        if related_resources:
            for resource in related_resources['docs']:
                ret['related_resources'].append(resource['_source'])

        return ret

Exemple #35

0

Afficher le fichier

Fichier : mappings.py Projet : webjunkie/arches

def prepare_resource_relations_index(create=False):
    """
    Creates the settings and mappings in Elasticsearch to support related resources

    """

    index_settings = {
        'mappings': {
            'all': {
                'properties': {
                    'resourcexid': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    },
                    'notes': {
                        'type': 'string'
                    },
                    'relationshiptype': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    },
                    'resourceinstanceidfrom': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    },
                    'resourceinstanceidto': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    }
                }
            }
        }
    }

    if create:
        se = SearchEngineFactory().create()
        se.create_index(index='resource_relations',
                        body=index_settings,
                        ignore=400)

    return index_settings

Exemple #36

0

Afficher le fichier

Fichier : tile.py Projet : msrivastava/arches

    def delete(self, *args, **kwargs):
        se = SearchEngineFactory().create()
        request = kwargs.pop("request", None)
        provisional_edit_log_details = kwargs.pop("provisional_edit_log_details", None)
        for tile in self.tiles:
            tile.delete(*args, request=request, **kwargs)
        try:
            user = request.user
            user_is_reviewer = user_is_resource_reviewer(user)
        except AttributeError:  # no user
            user = None
            user_is_reviewer = True

        if user_is_reviewer is True or self.user_owns_provisional(user):
            query = Query(se)
            bool_query = Bool()
            bool_query.filter(Terms(field="tileid", terms=[self.tileid]))
            query.add_query(bool_query)
            results = query.search(index=TERMS_INDEX)["hits"]["hits"]

            for result in results:
                se.delete(index=TERMS_INDEX, id=result["_id"])

            self.__preDelete(request)
            self.save_edit(
                user=request.user, edit_type="tile delete", old_value=self.data, provisional_edit_log_details=provisional_edit_log_details
            )
            try:
                super(Tile, self).delete(*args, **kwargs)
                for nodeid, value in self.data.items():
                    node = models.Node.objects.get(nodeid=nodeid)
                    datatype = self.datatype_factory.get_instance(node.datatype)
                    datatype.post_tile_delete(self, nodeid)
                resource = Resource.objects.get(resourceinstanceid=self.resourceinstance.resourceinstanceid)
                resource.index()
            except IntegrityError:
                logger.error

        else:
            self.apply_provisional_edit(user, data={}, action="delete")
            super(Tile, self).save(*args, **kwargs)

Exemple #37

0

Afficher le fichier

def prepare_resource_relations_index(create=False):
    """
    Creates the settings and mappings in Elasticsearch to support related resources

    """

    index_settings = {
        "mappings": {
            "_doc": {
                "properties": {
                    "resourcexid": {
                        "type": "keyword"
                    },
                    "notes": {
                        "type": "text"
                    },
                    "relationshiptype": {
                        "type": "keyword"
                    },
                    "resourceinstanceidfrom": {
                        "type": "keyword"
                    },
                    "resourceinstanceidto": {
                        "type": "keyword"
                    },
                    "created": {
                        "type": "keyword"
                    },
                    "modified": {
                        "type": "keyword"
                    },
                }
            }
        }
    }

    if create:
        se = SearchEngineFactory().create()
        se.create_index(index="resource_relations", body=index_settings)

    return index_settings

Exemple #38

0

Afficher le fichier

Fichier : resource.py Projet : aprilwebster-spatial/arches

    def get(self, request, resourceid=None):
        if resourceid is not None:
            se = SearchEngineFactory().create()
            document = se.search(index='resources', id=resourceid)
            resource = Resource.objects.get(pk=resourceid)
            return JSONResponse({
                'graphid':
                document['_source']['graph_id'],
                'graph_name':
                resource.graph.name,
                'displaydescription':
                document['_source']['displaydescription'],
                'map_popup':
                document['_source']['map_popup'],
                'displayname':
                document['_source']['displayname'],
                'geometries':
                document['_source']['geometries'],
            })

        return HttpResponseNotFound()

Exemple #39

0

Afficher le fichier

Fichier : resources.py Projet : azerbini/eamena_dev

def related_resources(request, resourceid):
    if request.method == 'GET':
        lang = request.GET.get('lang', settings.LANGUAGE_CODE)
        start = request.GET.get('start', 0)
        return JSONResponse(get_related_resources(resourceid,
                                                  lang,
                                                  start=start,
                                                  limit=15),
                            indent=4)

    if 'edit' in request.user.user_groups and request.method == 'DELETE':
        se = SearchEngineFactory().create()
        data = JSONDeserializer().deserialize(request.body)
        entityid1 = data.get('entityid1')
        entityid2 = data.get('entityid2')
        resourcexid = data.get('resourcexid')
        realtionshiptype = data.get('realtionshiptype')
        resource = Resource(entityid1)
        resource.delete_resource_relationship(entityid2, realtionshiptype)
        se.delete(index='resource_relations', doc_type='all', id=resourcexid)
        return JSONResponse({'success': True})

Exemple #40

0

Afficher le fichier

    def index(self):
        """
        Indexes all the nessesary items values of a resource to support search

        """
        if unicode(self.graph_id) != unicode(
                settings.SYSTEM_SETTINGS_RESOURCE_MODEL_ID):
            se = SearchEngineFactory().create()
            datatype_factory = DataTypeFactory()
            node_datatypes = {
                str(nodeid): datatype
                for nodeid, datatype in models.Node.objects.values_list(
                    'nodeid', 'datatype')
            }
            document, terms = self.get_documents_to_index(
                datatype_factory=datatype_factory,
                node_datatypes=node_datatypes)
            document['root_ontology_class'] = self.get_root_ontology()
            se.index_data('resource',
                          self.graph_id,
                          JSONSerializer().serializeToPython(document),
                          id=self.pk)
            for term in terms:
                se.index_data('strings',
                              'term',
                              term['_source'],
                              id=term['_id'])

Exemple #41

0

Afficher le fichier

    def index(self, scheme=None):
        if self.category == 'label':
            se = SearchEngineFactory().create()
            data = JSONSerializer().serializeToPython(self)            
            if scheme == None:
                scheme = self.get_scheme_id()
            if scheme == None:
                raise Exception('Index of label failed.  Index type (scheme id) could not be derived from the label.')

            se.create_mapping('concept_labels', scheme.id, fieldname='conceptid', fieldtype='string', fieldindex='not_analyzed')
            se.index_data('concept_labels', scheme.id, data, 'id')
            # don't create terms for entity type concepts
            if not(scheme.id == '00000000-0000-0000-0000-000000000003' or scheme.id == '00000000-0000-0000-0000-000000000004'):
                se.index_term(self.value, self.id, scheme.id, {'conceptid': self.conceptid})

Exemple #42

0

Afficher le fichier

Fichier : resource.py Projet : digimatspa/arches

    def index(self):
        """
        Indexes all the nessesary items values of a resource to support search

        """
        if str(self.graph_id) != str(
                settings.SYSTEM_SETTINGS_RESOURCE_MODEL_ID):
            se = SearchEngineFactory().create()
            datatype_factory = DataTypeFactory()
            node_datatypes = {
                str(nodeid): datatype
                for nodeid, datatype in models.Node.objects.values_list(
                    "nodeid", "datatype")
            }
            document, terms = self.get_documents_to_index(
                datatype_factory=datatype_factory,
                node_datatypes=node_datatypes)
            document["root_ontology_class"] = self.get_root_ontology()
            doc = JSONSerializer().serializeToPython(document)
            se.index_data(index="resources", body=doc, id=self.pk)
            for term in terms:
                se.index_data("terms", body=term["_source"], id=term["_id"])

            for index in settings.ELASTICSEARCH_CUSTOM_INDEXES:
                es_index = import_class_from_string(index["module"])(
                    index["name"])
                document, doc_id = es_index.get_documents_to_index(
                    self, document["tiles"])
                es_index.index_document(document=document, id=doc_id)

Exemple #43

0

Afficher le fichier

    def prepare_search_index(self, resource_type_id, create=False):
        """
        Creates the settings and mappings in Elasticsearch to support resource search

        """

        index_settings = super(Resource,
                               self).prepare_search_index(resource_type_id,
                                                          create=False)

        index_settings['mappings'][resource_type_id]['properties'][
            'date_groups'] = {
                'properties': {
                    'conceptid': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    }
                }
            }

        if create:
            se = SearchEngineFactory().create()
            try:
                se.create_index(index='entity', body=index_settings)
            except:
                index_settings = index_settings['mappings']
                se.create_mapping(index='entity',
                                  doc_type=resource_type_id,
                                  body=index_settings)

Exemple #44

0

Afficher le fichier

Fichier : index_database.py Projet : webjunkie/arches

def index_resources():
    """
    Deletes any existing indicies from elasticsearch related to resources 
    and then indexes all resources from the database

    """

    result_summary = {}
    se = SearchEngineFactory().create()

    # clear existing indexes
    for index_type in ['resource_relations', 'entity', 'resource', 'maplayers']:
        se.delete_index(index=index_type)
    se.delete(index='term', body='{"query":{"bool":{"must":[{"constant_score":{"filter":{"missing":{"field":"value.options.conceptid"}}}}],"must_not":[],"should":[]}}}')

    Resource().prepare_term_index(create=True)

    cursor = connection.cursor()
    cursor.execute("""select entitytypeid from data.entity_types where isresource = TRUE""")
    resource_types = cursor.fetchall()
    Resource().prepare_resource_relations_index(create=True)

    for resource_type in resource_types:
        Resource().prepare_search_index(resource_type[0], create=True)
    
    index_resources_by_type(resource_types, result_summary)

    se.es.indices.refresh(index='entity')
    for resource_type in resource_types:
        result_summary[resource_type[0]]['indexed'] = se.es.count(index="entity", doc_type=resource_type[0])['count']

    print '\nResource Index Results:'
    for k, v in result_summary.iteritems():
        status = 'Passed' if v['database'] == v['indexed'] else 'failed'
        print "Status: {0}, Resource Type: {1}, In Database: {2}, Indexed: {3}".format(status, k, v['database'], v['indexed'])

Exemple #45

0

Afficher le fichier

Fichier : resources.py Projet : azerbini/eamena_dev

def map_layers(request, entitytypeid='all', get_centroids=False):
    data = []

    geom_param = request.GET.get('geom', None)

    bbox = request.GET.get('bbox', '')
    limit = request.GET.get('limit', settings.MAP_LAYER_FEATURE_LIMIT)
    entityids = request.GET.get('entityid', '')
    geojson_collection = {"type": "FeatureCollection", "features": []}

    se = SearchEngineFactory().create()
    query = Query(se, limit=limit)

    args = {'index': 'maplayers'}
    if entitytypeid != 'all':
        args['doc_type'] = entitytypeid
    if entityids != '':
        for entityid in entityids.split(','):
            geojson_collection['features'].append(
                se.search(index='maplayers', id=entityid)['_source'])
        return JSONResponse(geojson_collection)

    data = query.search(**args)

    for item in data['hits']['hits']:
        if get_centroids:
            item['_source']['geometry'] = item['_source']['properties'][
                'centroid']
            item['_source'].pop('properties', None)
        elif geom_param != None:
            item['_source']['geometry'] = item['_source']['properties'][
                geom_param]
            item['_source']['properties'].pop('extent', None)
            item['_source']['properties'].pop(geom_param, None)
        else:
            item['_source']['properties'].pop('extent', None)
            item['_source']['properties'].pop('centroid', None)
        geojson_collection['features'].append(item['_source'])

    return JSONResponse(geojson_collection)

Exemple #46

0

Afficher le fichier

Fichier : 4695_delete_resource_instance_bug.py Projet : aprilwebster-spatial/arches

    def forwards_func(apps, schema_editor):
        se = SearchEngineFactory().create()
        prefix = settings.ELASTICSEARCH_PREFIX
        if (se.es.indices.exists(index="%s_resource_relations" % prefix)):
            index_settings = prepare_resource_relations_index(create=False)
            se.create_index(index='resource_relations_temp',
                            body=index_settings)
            doc = {
                "source": {
                    "index": "%s_resource_relations" % prefix,
                    "type": "all"
                },
                "dest": {
                    "index": "%s_resource_relations_temp" % prefix,
                    "type": "_doc"
                }
            }
            se.es.reindex(body=doc, refresh=True, wait_for_completion=True)
            se.delete_index(index='resource_relations')

            prepare_resource_relations_index(create=True)
            doc = {
                "source": {
                    "index": "%s_resource_relations_temp" % prefix,
                    "type": "_doc"
                },
                "dest": {
                    "index": "%s_resource_relations" % prefix,
                    "type": "_doc"
                }
            }
            se.es.reindex(body=doc, refresh=True, wait_for_completion=True)

Exemple #47

0

Afficher le fichier

Fichier : search_tests.py Projet : pierrechoffe/arches

    def test_bulk_delete(self):
        """
        Test bulk deleting of documents in Elasticsearch

        """

        se = SearchEngineFactory().create()
        # se.create_index(index='test')

        for i in range(10):
            x = {
                'id': i,
                'type': 'prefLabel',
                'value': 'test pref label',
            }
            se.index_data(index='test', doc_type='test', body=x, idfield='id', refresh=True)
            y = {
                'id': i + 100,
                'type': 'altLabel',
                'value': 'test alt label',
            }
            se.index_data(index='test', doc_type='test', body=y, idfield='id', refresh=True)


        query = Query(se, start=0, limit=100)
        match = Match(field='type', query='altLabel')
        query.add_query(match)

        query.delete(index='test', refresh=True)

        self.assertEqual(se.es.count(index='test', doc_type='test')['count'], 10)

Exemple #48

0

Afficher le fichier

Fichier : resource.py Projet : u5673710/arches

    def delete(self, user={}, note=''):
        """
        Deletes a single resource and any related indexed data

        """

        se = SearchEngineFactory().create()
        related_resources = self.get_related_resources(lang="en-US",
                                                       start=0,
                                                       limit=1000)
        for rr in related_resources['resource_relationships']:
            models.ResourceXResource.objects.get(pk=rr['resourcexid']).delete()
        query = Query(se)
        bool_query = Bool()
        bool_query.filter(
            Terms(field='resourceinstanceid', terms=[self.resourceinstanceid]))
        query.add_query(bool_query)
        results = query.search(index='strings',
                               doc_type='term')['hits']['hits']
        for result in results:
            se.delete(index='strings', doc_type='term', id=result['_id'])
        se.delete(index='resource',
                  doc_type=str(self.graph_id),
                  id=self.resourceinstanceid)
        self.save_edit(edit_type='delete')
        super(Resource, self).delete()

Exemple #49

0

Afficher le fichier

Fichier : search.py Projet : michaeltfisher/arches

def search_results(request):
    se = SearchEngineFactory().create()
    search_results_object = {
        'query': Query(se)
    }

    include_provisional = get_provisional_type(request)
    permitted_nodegroups = get_permitted_nodegroups(request.user)

    search_filter_factory = SearchFilterFactory(request)
    try:
        for filter_type, querystring in request.GET.items() + [('search-results', '')]:
            search_filter = search_filter_factory.get_filter(filter_type)
            if search_filter:
                search_filter.append_dsl(search_results_object, permitted_nodegroups, include_provisional)
    except Exception as err:
        return JSONResponse(err.message, status=500)

    dsl = search_results_object.pop('query', None)
    dsl.include('graph_id')
    dsl.include('root_ontology_class')
    dsl.include('resourceinstanceid')
    dsl.include('points')
    dsl.include('geometries')
    dsl.include('displayname')
    dsl.include('displaydescription')
    dsl.include('map_popup')
    dsl.include('provisional_resource')
    if request.GET.get('tiles', None) is not None:
        dsl.include('tiles')

    results = dsl.search(index='resources')

    if results is not None:
        # allow filters to modify the results
        for filter_type, querystring in request.GET.items() + [('search-results', '')]:
            search_filter = search_filter_factory.get_filter(filter_type)
            if search_filter:
                search_filter.post_search_hook(search_results_object, results, permitted_nodegroups)

        ret = {}
        ret['results'] = results

        for key, value in search_results_object.items():
            ret[key] = value

        ret['reviewer'] = request.user.groups.filter(name='Resource Reviewer').exists()
        ret['timestamp'] = datetime.now()

        return JSONResponse(ret)
    else:
        return HttpResponseNotFound(_("There was an error retrieving the search results"))

Exemple #50

0

Afficher le fichier

def prepare_resource_relations_index(create=False):
    """
    Creates the settings and mappings in Elasticsearch to support related resources

    """

    index_settings = {
        'mappings': {
            'all': {
                'properties': {
                    'resourcexid': {
                        'type': 'keyword'
                    },
                    'notes': {
                        'type': 'text'
                    },
                    'relationshiptype': {
                        'type': 'keyword'
                    },
                    'resourceinstanceidfrom': {
                        'type': 'keyword'
                    },
                    'resourceinstanceidto': {
                        'type': 'keyword'
                    }
                }
            }
        }
    }

    if create:
        se = SearchEngineFactory().create()
        se.create_index(index='resource_relations',
                        body=index_settings,
                        ignore=400)
        concept = Concept('00000000-0000-0000-0000-000000000007')
        concept.index()

    return index_settings

Exemple #51

0

Afficher le fichier

Fichier : resource.py Projet : gisdevelope/arches

    def delete(self, request, resourceid=None):
        lang = request.GET.get('lang', settings.LANGUAGE_CODE)
        se = SearchEngineFactory().create()
        req = dict(request.GET)
        ids_to_delete = req['resourcexids[]']
        root_resourceinstanceid = req['root_resourceinstanceid']
        for resourcexid in ids_to_delete:
            try:
                ret = models.ResourceXResource.objects.get(pk=resourcexid).delete()
            except:
                print 'resource relation does not exist'
        start = request.GET.get('start', 0)
        se.es.indices.refresh(index=se._add_prefix("resource_relations"))
        resource = Resource.objects.get(pk=root_resourceinstanceid[0])
        page = 1 if request.GET.get('page') == '' else int(request.GET.get('page', 1))
        related_resources = resource.get_related_resources(lang=lang, start=start, limit=1000, page=page)
        ret = []

        if related_resources is not None:
            ret = self.paginate_related_resources(related_resources, page, request)

        return JSONResponse(ret, indent=4)

Exemple #52

0

Afficher le fichier

Fichier : resource.py Projet : digimatspa/arches

    def delete(self, request, resourceid=None):
        lang = request.GET.get("lang", settings.LANGUAGE_CODE)
        se = SearchEngineFactory().create()
        req = dict(request.GET)
        ids_to_delete = req["resourcexids[]"]
        root_resourceinstanceid = req["root_resourceinstanceid"]
        for resourcexid in ids_to_delete:
            try:
                ret = models.ResourceXResource.objects.get(pk=resourcexid).delete()
            except ObjectDoesNotExist:
                logger.exception(_("Unable to delete. Relationship does not exist"))
        start = request.GET.get("start", 0)
        se.es.indices.refresh(index=se._add_prefix("resource_relations"))
        resource = Resource.objects.get(pk=root_resourceinstanceid[0])
        page = 1 if request.GET.get("page") == "" else int(request.GET.get("page", 1))
        related_resources = resource.get_related_resources(lang=lang, start=start, limit=1000, page=page)
        ret = []

        if related_resources is not None:
            ret = self.paginate_related_resources(related_resources, page, request)

        return JSONResponse(ret, indent=4)

Exemple #53

0

Afficher le fichier

Fichier : load_relations.py Projet : eamena-oxford/eamena-core-arches3

def get_related_resources(resourceid, lang='en-US', limit=1000, start=0):
    ret = {'resource_relationships': [], 'related_resources': []}
    se = SearchEngineFactory().create()

    query = Query(se, limit=limit, start=start)
    query.add_filter(Terms(field='entityid1', terms=resourceid).dsl,
                     operator='or')
    resource_relations = query.search(index='resource_relations',
                                      doc_type='all')
    ret['total'] = resource_relations['hits']['total']
    for relation in resource_relations['hits']['hits']:
        ret['resource_relationships'].append(relation['_source'])
    return ret

Exemple #54

0

Afficher le fichier

Fichier : tile.py Projet : Tilkan/arches

    def delete(self, *args, **kwargs):
        se = SearchEngineFactory().create()
        request = kwargs.pop('request', None)
        provisional_edit_log_details = kwargs.pop(
            'provisional_edit_log_details', None)
        for tile in self.tiles:
            tile.delete(*args, request=request, **kwargs)
        try:
            user = request.user
            user_is_reviewer = request.user.groups.filter(
                name='Resource Reviewer').exists()
        except AttributeError:  #no user
            user = None

        if user_is_reviewer is True or self.user_owns_provisional(user):
            query = Query(se)
            bool_query = Bool()
            bool_query.filter(Terms(field='tileid', terms=[self.tileid]))
            query.add_query(bool_query)
            results = query.search(index='strings',
                                   doc_type='term')['hits']['hits']

            for result in results:
                se.delete(index='strings', doc_type='term', id=result['_id'])

            self.__preDelete(request)
            self.save_edit(
                user=request.user,
                edit_type='tile delete',
                old_value=self.data,
                provisional_edit_log_details=provisional_edit_log_details)
            super(Tile, self).delete(*args, **kwargs)
            resource = Resource.objects.get(
                resourceinstanceid=self.resourceinstance.resourceinstanceid)
            resource.index()

        else:
            self.apply_provisional_edit(user, data={}, action='delete')
            super(Tile, self).save(*args, **kwargs)

Exemple #55

0

Afficher le fichier

def search_results(request):
    se = SearchEngineFactory().create()
    search_results_object = {"query": Query(se)}

    include_provisional = get_provisional_type(request)
    permitted_nodegroups = get_permitted_nodegroups(request.user)

    search_filter_factory = SearchFilterFactory(request)
    try:
        for filter_type, querystring in list(request.GET.items()) + [("search-results", "")]:
            search_filter = search_filter_factory.get_filter(filter_type)
            if search_filter:
                search_filter.append_dsl(search_results_object, permitted_nodegroups, include_provisional)
    except Exception as err:
        return JSONResponse(err, status=500)

    dsl = search_results_object.pop("query", None)
    dsl.include("graph_id")
    dsl.include("root_ontology_class")
    dsl.include("resourceinstanceid")
    dsl.include("points")
    dsl.include("geometries")
    dsl.include("displayname")
    dsl.include("displaydescription")
    dsl.include("map_popup")
    dsl.include("provisional_resource")
    if request.GET.get("tiles", None) is not None:
        dsl.include("tiles")

    results = dsl.search(index="resources")

    if results is not None:
        # allow filters to modify the results
        for filter_type, querystring in list(request.GET.items()) + [("search-results", "")]:
            search_filter = search_filter_factory.get_filter(filter_type)
            if search_filter:
                search_filter.post_search_hook(search_results_object, results, permitted_nodegroups)

        ret = {}
        ret["results"] = results

        for key, value in list(search_results_object.items()):
            ret[key] = value

        ret["reviewer"] = request.user.groups.filter(name="Resource Reviewer").exists()
        ret["timestamp"] = datetime.now()
        ret["total_results"] = dsl.count(index="resources")

        return JSONResponse(ret)
    else:
        return HttpResponseNotFound(_("There was an error retrieving the search results"))

Exemple #56

0

Afficher le fichier

Fichier : index_database.py Projet : michaeltfisher/arches

def index_resource_relations(clear_index=True, batch_size=settings.BULK_IMPORT_BATCH_SIZE):
    """
    Indexes all resource to resource relation records

    Keyword Arguments:
    clear_index -- set to True to remove all the resources from the index before the reindexing operation
    batch_size -- the number of records to index as a group, the larger the number to more memory required

    """

    start = datetime.now()
    print "Indexing resource to resource relations"

    cursor = connection.cursor()
    se = SearchEngineFactory().create()
    if clear_index:
        q = Query(se=se)
        q.delete(index='resource_relations')

    with se.BulkIndexer(batch_size=batch_size, refresh=True) as resource_relations_indexer:
        sql = """
            SELECT resourcexid, resourceinstanceidfrom, notes, relationshiptype, resourceinstanceidto
            FROM public.resource_x_resource;
        """

        cursor.execute(sql)
        for resource_relation in cursor.fetchall():
            doc = {
                'resourcexid': resource_relation[0],
                'resourceinstanceidfrom': resource_relation[1],
                'notes': resource_relation[2],
                'relationshiptype': resource_relation[3],
                'resourceinstanceidto': resource_relation[4]
            }
            resource_relations_indexer.add(index='resource_relations', id=doc['resourcexid'], data=doc)

    index_count = se.count(index='resource_relations')
    print "Status: {0}, In Database: {1}, Indexed: {2}, Took: {3} seconds".format('Passed' if cursor.rowcount == index_count else 'Failed', cursor.rowcount, index_count, (datetime.now()-start).seconds)

Exemple #57

0

Afficher le fichier

Fichier : resource.py Projet : u5673710/arches

    def bulk_save(resources):
        """
        Saves and indexes a list of resources

        Arguments:
        resources -- a list of resource models

        """

        se = SearchEngineFactory().create()
        datatype_factory = DataTypeFactory()
        node_datatypes = {
            str(nodeid): datatype
            for nodeid, datatype in models.Node.objects.values_list(
                'nodeid', 'datatype')
        }
        tiles = []
        documents = []
        term_list = []

        # flatten out the nested tiles into a single array
        for resource in resources:
            for parent_tile in resource.tiles:
                for child_tile in parent_tile.tiles.itervalues():
                    if len(child_tile) > 0:
                        resource.tiles.extend(child_tile)
                parent_tile.tiles = {}

            tiles.extend(resource.tiles)

        # need to save the models first before getting the documents for index
        Resource.objects.bulk_create(resources)
        TileModel.objects.bulk_create(tiles)

        for resource in resources:
            resource.save_edit(edit_type='create')
            document, terms = resource.get_documents_to_index(
                fetchTiles=False,
                datatype_factory=datatype_factory,
                node_datatypes=node_datatypes)
            document['root_ontology_class'] = resource.get_root_ontology()
            documents.append(
                se.create_bulk_item(index='resource',
                                    doc_type=document['graph_id'],
                                    id=document['resourceinstanceid'],
                                    data=document))
            for term in terms:
                term_list.append(
                    se.create_bulk_item(index='strings',
                                        doc_type='term',
                                        id=term['_id'],
                                        data=term['_source']))

        for tile in tiles:
            tile.save_edit(edit_type='tile create', new_value=tile.data)
        # bulk index the resources, tiles and terms
        se.bulk_index(documents)
        se.bulk_index(term_list)

Exemple #58

0

Afficher le fichier

Fichier : concept.py Projet : eamena-oxford/eamena-core-arches3

    def index(self, scheme=None):
        if self.category == 'label':
            se = SearchEngineFactory().create()
            data = JSONSerializer().serializeToPython(self)            
            if scheme == None:
                scheme = self.get_scheme_id()
            if scheme == None:
                raise Exception('Index of label failed.  Index type (scheme id) could not be derived from the label.')

            se.create_mapping('concept_labels', scheme.id, fieldname='conceptid', fieldtype='string', fieldindex='not_analyzed')
            se.index_data('concept_labels', scheme.id, data, 'id')
            #Looks up whether the label is actually a dropdown label or an entity label and, if so, excludes them from the term search index.
            entity_or_dropdown= archesmodels.ConceptRelations.objects.filter(Q(relationtype ='hasCollection') | Q(relationtype ='hasEntity'),conceptidto = scheme.id)
            is_entity_or_dropdown = False if entity_or_dropdown.count() == 0 else True
            # don't create terms for entity type concepts
            if not(scheme.id == '00000000-0000-0000-0000-000000000003' or scheme.id == '00000000-0000-0000-0000-000000000004') and is_entity_or_dropdown ==False:
                se.index_term(self.value, self.id, scheme.id, {'conceptid': self.conceptid})

Exemple #59

0

Afficher le fichier

Fichier : search_utils.py Projet : bojankastelic/ew

def get_search_contexts(request):
    search_context = {}
    search_context = cache.get('search_contexts')
    if search_context is not None:
        #print 'Search_context iz cacha!'
        return search_context
    lang = request.GET.get('lang', settings.LANGUAGE_CODE)
    se1 = SearchEngineFactory().create()
    context_label1 = '-'
    search_context = {}
    for search_term in settings.SEARCH_TERMS:
        searchString1 = search_term['text']
        print searchString1
        query1 = Query(se1, start=0, limit=settings.SEARCH_DROPDOWN_LENGTH)
        boolquery1 = Bool()
        boolquery1.should(Match(field='term', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO'))
        boolquery1.should(Match(field='term.folded', query=searchString1.lower(), type='phrase_prefix', fuzziness='AUTO'))
        boolquery1.should(Match(field='term.folded', query=searchString1.lower(), fuzziness='AUTO'))
        query1.add_query(boolquery1)
        results1 = query1.search(index='term', doc_type='value')
        conceptid1 = ''
        context1 = ''
        for result1 in results1['hits']['hits']:
            prefLabel = get_preflabel_from_conceptid(result1['_source']['context'], lang)
            result1['_source']['options']['context_label'] = prefLabel['value']
            if (prefLabel['value'] == search_term['context_label'] and result1['_source']['term'] == search_term['text']):
                conceptid1 = result1['_source']['options']['conceptid']
                context1 = result1['_source']['context']
                #print search_term['context_label'] + ': ' + conceptid1
                #print searchString1
                #print result1
        result = {'conceptid': conceptid1, 'context': context1}
        if context_label1 <> search_term['context_label']:
            value = {}
        print result
        value[search_term['text_key']] = result
        #print value
        search_context[search_term['context_key']] = value
        #print search_context
        #print 'Iscem [' + search_term['context_label'] + '][' + search_term['text']  + ']'
        #print value
        context_label1 = search_term['context_label']
    #print search_context
    #print search_context['Historical_Period']['BRONZE_AGE']
    #print 'Shranjujem search_context v cache'
    cache.set('search_contexts', search_context, 86400)
    return search_context