def setUp(self):
        super(DiscoveryIntTest, self).setUp()

        self._start_container()
        self.addCleanup(DiscoveryIntTest.es_cleanup)
        self.container.start_rel_from_url('res/deploy/r2dm.yml')

        self.discovery = DiscoveryServiceClient()
        self.catalog   = CatalogManagementServiceClient()
        self.ims       = IndexManagementServiceClient()
        self.rr        = ResourceRegistryServiceClient()

        if use_es:
            self.es_host   = CFG.get_safe('server.elasticsearch.host', 'localhost')
            self.es_port   = CFG.get_safe('server.elasticsearch.port', '9200')
            CFG.server.elasticsearch.shards         = 1
            CFG.server.elasticsearch.replicas       = 0
            CFG.server.elasticsearch.river_shards   = 1
            CFG.server.elasticsearch.river_replicas = 0
            self.es = ep.ElasticSearch(
                host=self.es_host,
                port=self.es_port,
                timeout=10,
                verbose=True
            )
            op = DotDict(CFG)
            op.op = 'clean_bootstrap'
            self.container.spawn_process('index_bootstrap','ion.processes.bootstrap.index_bootstrap','IndexBootStrap', op)
Exemple #2
0
def autocomplete(request):
  preferred_language = translation.get_language()
  if request.GET.get('q'):
    results = []
    q = request.GET['q']
    search = ep.ElasticSearch()
    search.size(10)
    labels_q = ep.ElasticQuery().wildcard('labels_orig',"*" + q + "*")
    labels_results = search.search_advanced('thesaurus','terms',labels_q)
    matching_uris = []
    for res in labels_results["hits"]["hits"]:
      matching_uris.append(res["_source"]["uri"])
          
    if len(matching_uris) < 2:
      alt_labels_q = ep.ElasticQuery().wildcard('alt_labels_orig',"*" + q + "*")
      alt_labels_results = search.search_advanced('thesaurus','terms',alt_labels_q)
      #matching_uris = []
      for res in alt_labels_results["hits"]["hits"]:
        matching_uris.append(res["_source"]["uri"])
        
    uris = set(matching_uris)
    for u in uris:
      results.append({'url':u, 'value':get_preferred_label(URIRef(u),preferred_language)})
      
  return HttpResponse(json.dumps(results), content_type='application/json')
Exemple #3
0
    def on_start(self):
        if not self.CFG.get_safe('system.elasticsearch', False):
            text = 'Can not initialize indexes without ElasticSearch enabled.  Please enable system.elasticsearch.'
            log.error(text)
            raise BadRequest(text)

        self.sysname = get_sys_name().lower()

        self.es_host = self.CFG.get_safe('server.elasticsearch.host',
                                         'localhost')
        self.es_port = self.CFG.get_safe('server.elasticsearch.port', '9200')

        self.index_shards = self.CFG.get_safe('server.elasticsearch.shards', 5)
        self.index_replicas = self.CFG.get_safe(
            'server.elasticsearch.replicas', 1)

        self.river_shards = self.CFG.get_safe(
            'server.elasticsearch.river_shards', 5)
        self.river_replicas = self.CFG.get_safe(
            'server.elasticsearch.river_replicas', 1)

        self.es = ep.ElasticSearch(host=self.es_host,
                                   port=self.es_port,
                                   timeout=10)

        op = self.CFG.get('op', None)

        if op == 'index_bootstrap':
            self.index_bootstrap()
        elif op == 'clean_bootstrap':
            self.clean_bootstrap()
        else:
            raise BadRequest('Operation Unknown')
Exemple #4
0
 def setUp(self):
     self._start_container()
     self.container.start_rel_from_url('res/deploy/r2dm.yml')
     self.es = ep.ElasticSearch(host=CFG.server.elasticsearch.host,
                                port=CFG.server.elasticsearch.port)
     self.assertTrue(CFG.system.force_clean)
     self.wipe()
Exemple #5
0
    def query_term(self, source_id='', field='', value='', fuzzy=False, match=False, order=None, limit=0, offset=0, id_only=False):
        '''
        Elasticsearch Query against an index
        > discovery.query_index('indexID', 'name', '*', order={'name':'asc'}, limit=20, id_only=False)
        '''
        if not self.use_es:
            raise BadRequest('Can not make queries without ElasticSearch, enable system.elasticsearch to make queries.')

        validate_true(source_id, 'Unspecified source_id')
        validate_true(field, 'Unspecified field')
        validate_true(value, 'Unspecified value')


        es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port)

        source = self.clients.resource_registry.read(source_id)

        #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
        # If source is a view, catalog or collection go through it and recursively call query_range on all the results in the indexes
        #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
        iterate = self._multi(self.query_term, source, field=field, value=value, order=order, limit=limit, offset=offset, id_only=id_only)
        if iterate is not None:
            return iterate


        index = source
        validate_is_instance(index, ElasticSearchIndex, '%s does not refer to a valid index.' % index)
        if order: 
            validate_is_instance(order,dict, 'Order is incorrect.')
            es.sort(**order)

        if limit:
            es.size(limit)

        if offset:
            es.from_offset(offset)

        if field == '*':
            field = '_all'

        if fuzzy:
            query = ep.ElasticQuery.fuzzy_like_this(value, fields=[field])
        elif match:
            match_query = ep.ElasticQuery.match(field=field,query=value)
            query = {"match_phrase_prefix":match_query['match']}
            
        elif '*' in value:
            query = ep.ElasticQuery.wildcard(field=field, value=value)
        else:
            query = ep.ElasticQuery.field(field=field, query=value)

        response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query)

        IndexManagementService._check_response(response)

        return self._results_from_response(response, id_only)
Exemple #6
0
    def es_cleanup():
        es_host = CFG.get_safe('server.elasticsearch.host', 'localhost')
        es_port = CFG.get_safe('server.elasticsearch.port', '9200')
        es = ep.ElasticSearch(host=es_host, port=es_port, timeout=10)
        indexes = STD_INDEXES.keys()
        indexes.append('%s_resources_index' % get_sys_name().lower())
        indexes.append('%s_events_index' % get_sys_name().lower())

        for index in indexes:
            IndexManagementService._es_call(es.river_couchdb_delete, index)
            IndexManagementService._es_call(es.index_delete, index)
Exemple #7
0
    def query_time(self, source_id='', field='', from_value=None, to_value=None, order=None, limit=0, offset=0, id_only=False):
        if not self.use_es:
            raise BadRequest('Can not make queries without ElasticSearch, enable in res/config/pyon.yml')

        if from_value is not None:
            validate_is_instance(from_value,basestring,'"From" is not a valid string (%s)' % from_value)

        if to_value is not None:
            validate_is_instance(to_value,basestring,'"To" is not a valid string')

        es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port)

        source = self.clients.resource_registry.read(source_id)

        #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
        # If source is a view, catalog or collection go through it and recursively call query_time on all the results in the indexes
        #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
        iterate = self._multi(self.query_time, source, field=field, from_value=from_value, to_value=to_value, order=order, limit=limit, offset=offset, id_only=id_only)
        if iterate is not None:
            return iterate

        index = source
        validate_is_instance(index,ElasticSearchIndex,'%s does not refer to a valid index.' % source_id)
        if order:
            validate_is_instance(order,dict,'Order is incorrect.')
            es.sort(**order)

        if limit:
            es.size(limit)

        if field == '*':
            field = '_all'

        if from_value is not None:
            from_value = calendar.timegm(dateutil.parser.parse(from_value).timetuple()) * 1000

        if to_value is not None:
            to_value = calendar.timegm(dateutil.parser.parse(to_value).timetuple()) * 1000

        query = ep.ElasticQuery.range(
            field      = field,
            from_value = from_value,
            to_value   = to_value
        )

        response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query)

        IndexManagementService._check_response(response)

        return self._results_from_response(response, id_only)
Exemple #8
0
    def query_geo_distance(self, source_id='', field='', origin=None, distance='', units='mi',order=None, limit=0, offset=0, id_only=False):
        validate_true(isinstance(origin,(tuple,list)) , 'Origin is not a list or tuple.')
        validate_true(len(origin)==2, 'Origin is not of the right size: (2)')

        if not self.use_es:
            raise BadRequest('Can not make queries without ElasticSearch, enable in res/config/pyon.yml')

        es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port)
        source = self.clients.resource_registry.read(source_id)

        iterate = self._multi(self.query_geo_distance, source=source, field=field, origin=origin, distance=distance) 
        if iterate is not None:
            return iterate

        index = source
        validate_is_instance(index,ElasticSearchIndex, '%s does not refer to a valid index.' % index)

        sorts = ep.ElasticSort()
        if order is not None and isinstance(order,dict):
            sort_field = order.keys()[0]
            value = order[sort_field]
            sorts.sort(sort_field,value)
            es.sorted(sorts)

        if limit:
            es.size(limit)

        if offset:
            es.from_offset(offset)

        if field == '*':
            field = '_all'


        sorts.geo_distance(field, origin, units)

        es.sorted(sorts)

        filter = ep.ElasticFilter.geo_distance(field,origin, '%s%s' %(distance,units))

        es.filtered(filter)

        query = ep.ElasticQuery.match_all()

        response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query)
        IndexManagementService._check_response(response)

        return self._results_from_response(response,id_only)
Exemple #9
0
    def query_geo_bbox(self, source_id='', field='', top_left=None, bottom_right=None, order=None, limit=0, offset=0, id_only=False):
        validate_true(isinstance(top_left, (list,tuple)), 'Top Left is not a list or a tuple')
        validate_true(len(top_left)==2, 'Top Left is not of the right size: (2)')
        validate_true(isinstance(bottom_right, (list,tuple)), 'Bottom Right is not a list or a tuple')
        validate_true(len(bottom_right)==2, 'Bottom Right is not of the right size: (2)')

        if not self.use_es:
            raise BadRequest('Can not make queries without ElasticSearch, enable in res/config/pyon.yml')

        es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port)
        source = self.clients.resource_registry.read(source_id)

        iterate = self._multi(self.query_geo_bbox, source=source, field=field, top_left=top_left, bottom_right=bottom_right, order=order, limit=limit, offset=offset, id_only=id_only)
        if iterate is not None:
            return iterate

        index = source
        validate_is_instance(index,ElasticSearchIndex, '%s does not refer to a valid index.' % index)

        sorts = ep.ElasticSort()
        if order is not None and isinstance(order,dict):
            sort_field = order.keys()[0]
            value = order[sort_field]
            sorts.sort(sort_field,value)
            es.sorted(sorts)

        if limit:
            es.size(limit)

        if offset:
            es.from_offset(offset)

        if field == '*':
            field = '_all'


        filter = ep.ElasticFilter.geo_bounding_box(field, top_left, bottom_right)

        es.filtered(filter)

        query = ep.ElasticQuery.match_all()

        response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query)
        IndexManagementService._check_response(response)

        return self._results_from_response(response,id_only)
    def query_range(self, source_id='', field='', from_value=None, to_value=None, order=None, limit=0, offset=0, id_only=False):
        
        if not self.use_es:
            raise BadRequest('Can not make queries without ElasticSearch, enable in res/config/pyon.yml')

        validate_true(not from_value is None, 'from_value not specified')
        validate_true(isinstance(from_value,int) or isinstance(from_value,float), 'from_value is not a valid number')
        validate_true(not to_value is None, 'to_value not specified')
        validate_true(isinstance(to_value,int) or isinstance(to_value,float), 'to_value is not a valid number')
        validate_true(source_id, 'source_id not specified')

        es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port)


        source = self.clients.resource_registry.read(source_id)

        #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
        # If source is a view, catalog or collection go through it and recursively call query_range on all the results in the indexes
        #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
        iterate = self._multi(self.query_range, source, field=field, from_value=from_value, to_value=to_value, order=order, limit=limit, offset=offset, id_only=id_only)
        if iterate is not None:
            return iterate

        index = source
        validate_is_instance(index,ElasticSearchIndex,'%s does not refer to a valid index.' % source_id)
        if order:
            validate_is_instance(order,dict,'Order is incorrect.')
            es.sort(**order)

        if limit:
            es.size(limit)

        if field == '*':
            field = '_all'

        query = ep.ElasticQuery().range(
            field      = field,
            from_value = from_value,
            to_value   = to_value
        )

        response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query)

        IndexManagementService._check_response(response)

        return self._results_from_response(response, id_only)
Exemple #11
0
def search(request):
    context = {}
    query = ep.ElasticQuery().query_string(query=request.POST['text'])

    sr = ep.ElasticSearch()
    p = sr.search_advanced('student', 'info', query)

    if len(p['hits']['hits']) > 0:
        stu_list = []
        p = sr.return_records('student', 'info', query)

        num = len(p)
        for obj in p:
            stu_list.append(obj['_source'])
            context = {'posts': stu_list, 'count': num}

    return render(request, 'search.html', context)
Exemple #12
0
def search(request):
  preferred_language = translation.get_language()
  if request.GET.get('q'):
    results = []
    q = request.GET['q']
    search = ep.ElasticSearch()
    search.size(8000)
    labels_q = ep.ElasticQuery().match('labels',q)
    labels_results = search.search_advanced('thesaurus','terms',labels_q)
    for res in labels_results["hits"]["hits"]:
      results.append({'pref_label': get_preferred_label(URIRef(res["_source"]["uri"]), preferred_language), 'uri':res["_source"]["uri"]})
      
    alt_labels_q = ep.ElasticQuery().match('alt_labels',q)
    alt_labels_results = search.search_advanced('thesaurus','terms',alt_labels_q)
    for res in alt_labels_results["hits"]["hits"]:
      pref_label = get_preferred_label(URIRef(res["_source"]["uri"]), preferred_language)
      to_append = {'pref_label': pref_label, 'uri':res["_source"]["uri"]}
      if not to_append in results:
        results.append(to_append)
    
    query = ep.ElasticQuery().match('_all',q)
    this_results = search.search_advanced('thesaurus','terms',query)
    #print(this_results["hits"]["hits"])

    for res in this_results["hits"]["hits"]:
      pref_label = get_preferred_label(URIRef(res["_source"]["uri"]), preferred_language)
      to_append = {'pref_label': pref_label, 'uri':res["_source"]["uri"]}
      if not to_append in results:
        results.append(to_append)
    try:
      page = request.GET.get('page',1)
    except PageNotAnInteger:
      page = 1

    p = Paginator(results, 20, request=request)
    paginated_results = p.page(page)
    return render(request, 'thesaurus/search.html', {'results': paginated_results })
Exemple #13
0
 def test_query(self):
     query = ep.ElasticQuery().query_string(query='anm')
     search = ep.ElasticSearch()
     self.assertTrue(search.search_advanced('student', 'info', query))
Exemple #14
0
    def query_vertical_bounds(self, source_id='', field='', from_value=None, to_value=None, order=None, limit=0, offset=0, id_only=False):
        if from_value is not None:
            validate_is_instance(from_value,float,'"From" is not a valid float (%s)' % from_value)

        if to_value is not None:
            validate_is_instance(to_value,float,'"To" is not a valid float')

        es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port)

        source = self.clients.resource_registry.read(source_id)

        #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
        # If source is a view, catalog or collection go through it and recursively call query_time on all the results in the indexes
        #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
        iterate = self._multi(self.query_time, source, field=field, from_value=from_value, to_value=to_value, order=order, limit=limit, offset=offset, id_only=id_only)
        if iterate is not None:
            return iterate

        index = source
        validate_is_instance(index,ElasticSearchIndex,'%s does not refer to a valid index.' % source_id)
        if order:
            validate_is_instance(order,dict,'Order is incorrect.')
            es.sort(**order)


        if field == '*':
            field = '_all'
            vertical_min = 'geospatial_vertical_min'
            vertical_max = 'geospatial_vertical_max'
        else:
            vertical_min = '%s.geospatial_vertical_min' % field
            vertical_max = '%s.geospatial_vertical_max' % field


        query = {
          "query": {
            "match_all": {}
          },
          "filter": {
            "and": [
              {
                "or": [
                  {
                    "range": {
                      vertical_min: {
                        "gte": from_value
                      }
                    }
                  },
                  {
                    "range": {
                      vertical_max: {
                        "gte": from_value
                      }
                    }
                  }
                ]
              },
              {
                "or": [
                  {
                    "range": {
                      vertical_min: {
                        "lte": to_value
                      }
                    }
                  },
                  {
                    "range": {
                      vertical_max: {
                        "lte": to_value
                      }
                    }
                  }
                ]
              }
            ]
          }
        }
        if limit:
            query['size'] = limit
        if offset:
            query['from'] = offset

        response = IndexManagementService._es_call(es.raw_query,'%s/_search' % index.index_name,method='POST', data=query, host=self.elasticsearch_host, port=self.elasticsearch_port)
        IndexManagementService._check_response(response)
        retval= self._results_from_response(response, id_only)
        return retval
Exemple #15
0
    def query_time_bounds(self, source_id='', field='', from_value=None, to_value=None, order=None, limit=0, offset=0, id_only=False):
        if from_value is not None:
            validate_is_instance(from_value,basestring,'"From" is not a valid string (%s)' % from_value)

        if to_value is not None:
            validate_is_instance(to_value,basestring,'"To" is not a valid string')

        es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port)

        source = self.clients.resource_registry.read(source_id)

        #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
        # If source is a view, catalog or collection go through it and recursively call query_time on all the results in the indexes
        #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
        iterate = self._multi(self.query_time, source, field=field, from_value=from_value, to_value=to_value, order=order, limit=limit, offset=offset, id_only=id_only)
        if iterate is not None:
            return iterate

        index = source
        validate_is_instance(index,ElasticSearchIndex,'%s does not refer to a valid index.' % source_id)
        if order:
            validate_is_instance(order,dict,'Order is incorrect.')
            es.sort(**order)

        if field == '*':
            field = '_all'
            start_time = 'start_datetime'
            end_time = 'end_datetime'
        else:
            start_time = '%s.start_datetime' % field
            end_time = '%s.end_datetime' % field



        if from_value is not None:
            from_value = calendar.timegm(dateutil.parser.parse(from_value).timetuple()) * 1000

        if to_value is not None:
            to_value = calendar.timegm(dateutil.parser.parse(to_value).timetuple()) * 1000

        query = {
          "query": {
            "match_all": {}
          },
          "filter": {
            "and": [
              {
                "or": [
                  {
                    "range": {
                      start_time: {
                        "gte": from_value
                      }
                    }
                  },
                  {
                    "range": {
                      end_time: {
                        "gte": from_value
                      }
                    }
                  }
                ]
              },
              {
                "or": [
                  {
                    "range": {
                      start_time: {
                        "lte": to_value
                      }
                    }
                  },
                  {
                    "range": {
                      end_time: {
                        "lte": to_value
                      }
                    }
                  }
                ]
              }
            ]
          }
        }
        if limit:
            query['size'] = limit
        if offset:
            query['from'] = offset

        
        response = IndexManagementService._es_call(es.raw_query,'%s/_search' % index.index_name,method='POST', data=query, host=self.elasticsearch_host, port=self.elasticsearch_port)
        IndexManagementService._check_response(response)
        return self._results_from_response(response, id_only)