def create_dataset(self, name='', datastore_name='', view_name='', stream_id='', parameter_dict=None, spatial_domain=None, temporal_domain=None, parameter_dictionary_id='', description=''):
        validate_true(parameter_dict or parameter_dictionary_id, 'A parameter dictionary must be supplied to register a new dataset.')
        validate_is_not_none(spatial_domain, 'A spatial domain must be supplied to register a new dataset.')
        validate_is_not_none(temporal_domain, 'A temporal domain must be supplied to register a new dataset.')
        
        if parameter_dictionary_id:
            pd = self.read_parameter_dictionary(parameter_dictionary_id)
            pcs = self.read_parameter_contexts(parameter_dictionary_id, id_only=False)
            parameter_dict = self._merge_contexts([ParameterContext.load(i.parameter_context) for i in pcs], pd.temporal_context)
            parameter_dict = parameter_dict.dump()

        dataset                      = Dataset()
        dataset.description          = description
        dataset.name                 = name
        dataset.primary_view_key     = stream_id or None
        dataset.datastore_name       = datastore_name or self.DEFAULT_DATASTORE
        dataset.view_name            = view_name or self.DEFAULT_VIEW
        dataset.parameter_dictionary = parameter_dict
        dataset.temporal_domain      = temporal_domain
        dataset.spatial_domain       = spatial_domain
        dataset.registered           = False


        dataset_id, _ = self.clients.resource_registry.create(dataset)
        if stream_id:
            self.add_stream(dataset_id,stream_id)

        log.debug('creating dataset: %s', dataset_id)

        cov = self._create_coverage(dataset_id, description or dataset_id, parameter_dict, spatial_domain, temporal_domain) 
        self._save_coverage(cov)
        cov.close()

        return dataset_id
Beispiel #2
0
    def persist_data_stream(self,
                            stream_id='',
                            ingestion_configuration_id='',
                            dataset_id='',
                            config=None):
        #--------------------------------------------------------------------------------
        # Validate that the method call was indeed valid
        #--------------------------------------------------------------------------------
        config = config or {}
        validate_is_instance(stream_id, basestring,
                             'stream_id %s is not a valid string' % stream_id)
        validate_true(dataset_id,
                      'Clients must specify the dataset to persist')
        log.info('Persisting stream %s to dataset %s.', stream_id, dataset_id)

        ingestion_config = self.read_ingestion_configuration(
            ingestion_configuration_id)
        if self.is_persisted(stream_id):
            raise BadRequest('This stream is already being persisted')
        #--------------------------------------------------------------------------------
        # Set up the stream subscriptions and associations for this stream and its ingestion_type
        #--------------------------------------------------------------------------------
        if self.setup_queues(ingestion_config, stream_id, dataset_id, config):
            self.clients.pubsub_management.persist_stream(stream_id)

        return dataset_id
    def define_replay(self, dataset_id='', query=None, delivery_format=None, stream_id=''):
        ''' Define the stream that will contain the data from data store by streaming to an exchange name.
        query: 
          start_time: 0    The beginning timestamp
          end_time:   N    The ending timestamp
          parameters: []   The list of parameters which match the coverages parameters
          tdoa: slice()    The slice for the desired indices to be replayed
        '''

        if not dataset_id:
            raise BadRequest('(Data Retriever Service %s): No dataset provided.' % self.name)
        validate_true(stream_id, 'No stream_id provided')


        res, _  = self.clients.resource_registry.find_resources(restype=RT.ProcessDefinition,name=self.REPLAY_PROCESS,id_only=True)
        if not len(res):
            raise BadRequest('No replay process defined.')
        process_definition_id = res[0]

        replay_stream_id = stream_id
        pid = self.clients.process_dispatcher.create_process(process_definition_id=process_definition_id)

        #--------------------------------------------------------------------------------
        # Begin the Decision tree for the various types of replay
        #--------------------------------------------------------------------------------
        replay=self.replay_data_process(dataset_id, query, delivery_format, replay_stream_id)

        replay.process_id = pid

        self.clients.resource_registry.update(replay)
        self.clients.resource_registry.create_association(replay._id, PRED.hasStream, replay_stream_id)
        return replay._id, pid
Beispiel #4
0
    def create_parameter_context(self,
                                 name='',
                                 parameter_context=None,
                                 description='',
                                 parameter_type='',
                                 value_encoding='',
                                 unit_of_measure='',
                                 parameter_function_ids=None):
        parameter_function_ids = parameter_function_ids or []
        res, _ = self.clients.resource_registry.find_resources(
            restype=RT.ParameterContext, name=name, id_only=False)
        if len(res):
            for r in res:
                if r.name == name and self._compare_pc(r.parameter_context,
                                                       parameter_context):
                    return r._id

        validate_true(name, 'Name field may not be empty')
        validate_is_instance(parameter_context, dict,
                             'parameter_context field is not dictable.')
        pc_res = ParameterContextResource(name=name,
                                          parameter_context=parameter_context,
                                          description=description)
        pc_res.parameter_type = parameter_type
        pc_res.value_encoding = value_encoding
        pc_res.unit_of_measure = unit_of_measure
        pc_id, ver = self.clients.resource_registry.create(pc_res)
        for pfunc_id in parameter_function_ids:
            self.read_parameter_function(pfunc_id)
            self.clients.resource_registry.create_association(
                subject=pc_id,
                predicate=PRED.hasParameterFunction,
                object=pfunc_id)

        return pc_id
Beispiel #5
0
    def launch_worker(self, queue_name, config):
        config = DotDict(config or {})
        config.process.queue_name = queue_name
        config.process.buffer_limit = self.CFG.get_safe(
            'service.ingestion_management.buffer_limit', 10)
        config.process.time_limit = self.CFG.get_safe(
            'service.ingestion_management.time_limit', 10)

        process_definition_id, _ = self.clients.resource_registry.find_resources(
            restype=RT.ProcessDefinition,
            name='ingestion_worker_process',
            id_only=True)
        validate_true(
            len(process_definition_id),
            'No process definition for ingestion workers could be found')
        process_definition_id = process_definition_id[0]

        process_id = self.clients.process_dispatcher.create_process(
            process_definition_id=process_definition_id)

        xn_ids, _ = self.clients.resource_registry.find_resources(
            restype=RT.ExchangeName, name=queue_name, id_only=True)
        for xn_id in xn_ids:
            self.clients.resource_registry.create_association(
                xn_id, PRED.hasIngestionWorker, process_id)

        schedule = ProcessSchedule()
        schedule.restart_mode = ProcessRestartMode.ABNORMAL
        schedule.queueing_mode = ProcessQueueingMode.ALWAYS

        self.clients.process_dispatcher.schedule_process(
            process_definition_id=process_definition_id,
            schedule=schedule,
            process_id=process_id,
            configuration=config)
    def create_parameter_context(self, name='', parameter_context=None, description='', reference_urls=None, parameter_type='', internal_name='', value_encoding='', code_report=None, units='', fill_value='', display_name='', parameter_function_id='', parameter_function_map=None, standard_name='', ooi_short_name='', precision=''):
        res, _ = self.clients.resource_registry.find_resources(restype=RT.ParameterContext, name=name, id_only=False)
        if len(res):
            for r in res:
                if r.name == name and self._compare_pc(r.parameter_context, parameter_context):
                    return r._id
        
        validate_true(name, 'Name field may not be empty')
        validate_is_instance(parameter_context, dict, 'parameter_context field is not dictable.')
        pc_res = ParameterContextResource(name=name, parameter_context=parameter_context, description=description)
        pc_res.reference_urls = reference_urls or []
        pc_res.parameter_type = parameter_type
        pc_res.internal_name = internal_name or name
        pc_res.value_encoding = value_encoding
        pc_res.code_report = code_report or {}
        pc_res.units = units
        pc_res.fill_value = fill_value
        pc_res.display_name = display_name
        pc_res.parameter_function_id = parameter_function_id
        pc_res.parameter_function_map = parameter_function_map
        pc_res.standard_name = standard_name
        pc_res.ooi_short_name = ooi_short_name
        pc_res.precision = precision or '5'

        pc_id, ver = self.clients.resource_registry.create(pc_res)
        if parameter_function_id:
            self.read_parameter_function(parameter_function_id)
            self.clients.resource_registry.create_association(subject=pc_id, predicate=PRED.hasParameterFunction, object=parameter_function_id)
        
        return pc_id
    def deactivate_subscription(self, subscription_id=''):
        validate_true(self.subscription_is_active(subscription_id), 'Subscription is not active.')

        subscription = self.read_subscription(subscription_id)

        streams, assocs = self.clients.resource_registry.find_subjects(object=subscription_id, subject_type=RT.Stream, predicate=PRED.hasSubscription,id_only=False)
        topic_ids, assocs = self.clients.resource_registry.find_objects(subject=subscription_id, predicate=PRED.hasTopic, id_only=True)

        topic_topology = set()

        for topic_id in topic_ids:
            topic_tree = self._child_topics(topic_id)
            topic_topology = topic_topology.union(topic_tree)
        
        if topic_topology:
            topics = self.clients.resource_registry.read_mult(object_ids=list(topic_topology))
            for topic in topics:
                log.info('Topic %s -X-> %s', topic.name, subscription.exchange_name)
                self._unbind(topic.exchange_point, subscription.exchange_name, '#.%s.#' % self._sanitize(topic.name))

        for stream in streams:
            log.info('%s -X-> %s', stream.name, subscription.exchange_name)
            self._unbind(stream.stream_route.exchange_point, subscription.exchange_name, stream.stream_route.routing_key)

        for exchange_point in subscription.exchange_points:
            log.info('Exchange %s -X-> %s', exchange_point, subscription.exchange_name)
            self._unbind(exchange_point, subscription.exchange_name, '*')


        subscription.activated = False
        self.clients.resource_registry.update(subscription)
    def retrieve(self, dataset_id='', query=None, delivery_format=None, module='', cls='', kwargs=None):

        if query is None:
            query = {}
        if delivery_format is None:
            delivery_format = {}

        validate_is_instance(query,dict,'Query was improperly formatted.')
        validate_true(dataset_id, 'No dataset provided')
        

        replay_instance = ReplayProcess()

        replay_instance.dataset = self.clients.dataset_management.read_dataset(dataset_id)
        replay_instance.dataset_id = dataset_id
        replay_instance.start_time = query.get('start_time', None)
        replay_instance.end_time = query.get('end_time', None)
        replay_instance.parameters = query.get('parameters',None)
        replay_instance.container = self.container

        retrieve_data = replay_instance.execute_retrieve()

        if module and cls:
            return self._transform_data(retrieve_data, module, cls, kwargs or {})

        return retrieve_data
Beispiel #9
0
    def get_datastore(self, ds_name, profile=DataStore.DS_PROFILE.BASIC, config=None):
        """
        Factory method to get a datastore instance from given name, profile and config.
        @param ds_name  Logical name of datastore (will be scoped with sysname)
        @param profile  One of known constants determining the use of the store
        @param config  Override config to use
        """
        validate_true(ds_name, 'ds_name must be provided')
        if ds_name in self._datastores:
            log.debug("get_datastore(): Found instance of store '%s'" % ds_name)
            return self._datastores[ds_name]

        scoped_name = DatastoreManager.get_scoped_name(ds_name)

        # Create a datastore instance
        log.info("get_datastore(): Create instance of store '%s' as database=%s" % (ds_name, scoped_name))
        new_ds = DatastoreManager.get_datastore_instance(ds_name, profile)

        # Create store if not existing
        if not new_ds.datastore_exists(scoped_name):
            new_ds.create_datastore(scoped_name, create_indexes=True, profile=profile)
        else:
            # NOTE: This may be expensive if called more than once per container
            # If views exist and are dropped and recreated
            new_ds._define_views(profile=profile, keepviews=True)

        # Set a few standard datastore instance fields
        new_ds.local_name = ds_name
        new_ds.ds_profile = profile

        self._datastores[ds_name] = new_ds

        return new_ds
 def create_parameter_function(self, name='', parameter_function=None, description=''):
     validate_true(name, 'Name field may not be empty')
     validate_is_instance(parameter_function, dict, 'parameter_function field is not dictable.')
     parameter_function = self.numpy_walk(parameter_function)
     pf_res = ParameterFunctionResource(name=name, parameter_function=parameter_function, description=description)
     pf_id, ver = self.clients.resource_registry.create(pf_res)
     return pf_id
    def persist_data_stream(self, stream_id='', ingestion_configuration_id='', dataset_id=''):
        # Figure out which MIME or xpath in the stream definition belongs where

        # Just going to use the first queue for now

        validate_is_instance(stream_id,basestring, 'stream_id %s is not a valid string' % stream_id)
        validate_true(dataset_id,'Clients must specify the dataset to persist')

        ingestion_config = self.read_ingestion_configuration(ingestion_configuration_id)
        if self.is_persisted(stream_id):
            raise BadRequest('This stream is already being persisted')
        stream = self.clients.pubsub_management.read_stream(stream_id)
        stream.persisted = True
        self.clients.pubsub_management.update_stream(stream)

        ingestion_queue = self._determine_queue(stream_id, ingestion_config.queues)

        subscription_id = self.clients.pubsub_management.create_subscription(
            query=StreamQuery(stream_ids=[stream_id]),
            exchange_name=ingestion_queue.name,
            exchange_point=ingestion_config.exchange_point
        )

        self.clients.pubsub_management.activate_subscription(subscription_id=subscription_id)

        self.clients.resource_registry.create_association(
            subject=ingestion_configuration_id,
            predicate=PRED.hasSubscription,
            object=subscription_id
        )
        self._existing_dataset(stream_id,dataset_id)
        return dataset_id
 def add_stream(self, dataset_id='', stream_id=''):
     log.info('Adding stream %s to dataset %s', stream_id, dataset_id)
     validate_true(
         dataset_id and stream_id,
         'Clients must provide both the dataset_id and stream_id')
     self.clients.resource_registry.create_association(
         subject=dataset_id, predicate=PRED.hasStream, object=stream_id)
    def create_parameter_context(self, name='', parameter_context=None, description='', reference_urls=None, parameter_type='', internal_name='', value_encoding='', code_report='', units='', fill_value='', display_name='', parameter_function_id='', parameter_function_map='', standard_name='', ooi_short_name='', precision='', visible=True):
        
        validate_true(name, 'Name field may not be empty')
        validate_is_instance(parameter_context, dict, 'parameter_context field is not dictable.')
        parameter_context = self.numpy_walk(parameter_context)
        pc_res = ParameterContextResource(name=name, parameter_context=parameter_context, description=description)
        pc_res.reference_urls = reference_urls or []
        pc_res.parameter_type = parameter_type
        pc_res.internal_name = internal_name or name
        pc_res.value_encoding = value_encoding
        pc_res.code_report = code_report or ''
        pc_res.units = units
        pc_res.fill_value = fill_value
        pc_res.display_name = display_name
        pc_res.parameter_function_id = parameter_function_id
        pc_res.parameter_function_map = parameter_function_map
        pc_res.standard_name = standard_name
        pc_res.ooi_short_name = ooi_short_name
        pc_res.precision = precision or '5'
        pc_res.visible = visible

        pc_id, ver = self.clients.resource_registry.create(pc_res)
        if parameter_function_id:
            self.read_parameter_function(parameter_function_id)
            self.clients.resource_registry.create_association(subject=pc_id, predicate=PRED.hasParameterFunction, object=parameter_function_id)
        
        return pc_id
    def launch_worker(self, queue_name):
        config = DotDict()
        config.process.queue_name = queue_name
        config.process.buffer_limit = self.CFG.get_safe("service.ingestion_management.buffer_limit", 10)
        config.process.time_limit = self.CFG.get_safe("service.ingestion_management.time_limit", 10)

        process_definition_id, _ = self.clients.resource_registry.find_resources(
            restype=RT.ProcessDefinition, name="ingestion_worker_process", id_only=True
        )
        validate_true(len(process_definition_id), "No process definition for ingestion workers could be found")
        process_definition_id = process_definition_id[0]

        process_id = self.clients.process_dispatcher.create_process(process_definition_id=process_definition_id)

        xn_ids, _ = self.clients.resource_registry.find_resources(
            restype=RT.ExchangeName, name=queue_name, id_only=True
        )
        for xn_id in xn_ids:
            self.clients.resource_registry.create_association(xn_id, PRED.hasIngestionWorker, process_id)

        schedule = ProcessSchedule()
        schedule.restart_mode = ProcessRestartMode.ABNORMAL

        self.clients.process_dispatcher.schedule_process(
            process_definition_id=process_definition_id, schedule=schedule, process_id=process_id, configuration=config
        )
 def create_parameter_dictionary(self,
                                 name='',
                                 parameter_context_ids=None,
                                 temporal_context='',
                                 description=''):
     res, _ = self.clients.resource_registry.find_resources(
         restype=RT.ParameterDictionary, name=name, id_only=True)
     if len(res):
         context_ids, _ = self.clients.resource_registry.find_objects(
             subject=res[0],
             predicate=PRED.hasParameterContext,
             id_only=True)
         context_ids.sort()
         parameter_context_ids.sort()
         if context_ids == parameter_context_ids:
             return res[0]
         else:
             raise Conflict(
                 'A parameter dictionary with name %s already exists and has a different definition'
                 % name)
     validate_true(name, 'Name field may not be empty.')
     parameter_context_ids = parameter_context_ids or []
     pd_res = ParameterDictionaryResource(name=name,
                                          temporal_context=temporal_context,
                                          description=description)
     pd_res_id, ver = self.clients.resource_registry.create(pd_res)
     for pc_id in parameter_context_ids:
         self._link_pcr_to_pdr(pc_id, pd_res_id)
     return pd_res_id
 def stream_def_from_data_product(self, data_product_id=''):
     stream_ids, _ = self.clients.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, object_type=RT.Stream, id_only=True)
     validate_true(stream_ids, 'No stream found for this data product: %s' % data_product_id)
     stream_id = stream_ids.pop()
     stream_def_ids, _ = self.clients.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition, id_only=True)
     validate_true(stream_def_ids, 'No stream definition found for this stream: %s' % stream_def_ids)
     stream_def_id = stream_def_ids.pop()
     return stream_def_id
 def create_parameter_dictionary(self, name='', parameter_context_ids=None, temporal_context='', description=''):
     validate_true(name, 'Name field may not be empty.')
     parameter_context_ids = parameter_context_ids or []
     pd_res = ParameterDictionaryResource(name=name, temporal_context=temporal_context, description=description)
     pd_res_id, ver = self.clients.resource_registry.create(pd_res)
     for pc_id in parameter_context_ids:
         self._link_pcr_to_pdr(pc_id, pd_res_id)
     return pd_res_id
Beispiel #18
0
 def query_collection(self,collection_id='', id_only=False):
     validate_true(collection_id, 'Unspecified collection id')
     resource_ids = self.clients.index_management.list_collection_resources(collection_id, id_only=True)
     if id_only:
         return resource_ids
     
     resources = map(self.clients.resource_registry.read,resource_ids)
     return resources
 def find_indexes(self, index_name='', filters=None):
     validate_true(index_name, "No index name provided")
     indices = self.list_indexes()
     for name, index_id in indices.iteritems():
         if index_name in name:
             return index_id
     else:
         return None
 def find_indexes(self, index_name='', filters=None):
     validate_true(index_name,"No index name provided")
     indices = self.list_indexes()
     for name, index_id in indices.iteritems():
         if index_name in name:
             return index_id
     else:
         return None 
 def query_collection(self,collection_id='', id_only=False):
     validate_true(collection_id, 'Unspecified collection id')
     resource_ids = self.clients.index_management.list_collection_resources(collection_id, id_only=True)
     if id_only:
         return resource_ids
     
     resources = map(self.clients.resource_registry.read,resource_ids)
     return resources
    def create_view(self, view_name='', description='', fields=None, order=None, filters=''):
        """Creates a view which has the specified search fields, the order in which the search fields are presented
        to a query and a term filter.
        @param view_name Name of the view
        @param description Simple descriptive sentence
        @param fields Search fields
        @param order List of fields to determine order of precendence in which the results are presented
        @param filter Simple term filter

        @param view_name    str
        @param description    str
        @param fields    list
        @param order    list
        @param filters    str
        @retval view_id    str
        """
        res, _ = self.clients.resource_registry.find_resources(name=view_name, id_only=True)
        if len(res) > 0:
            raise BadRequest('The view resource with name: %s, already exists.' % view_name)

        #======================
        # Arg Validations
        #======================
        validate_is_instance(fields,list, 'Specified fields must be a list.')
        validate_true(len(fields)>0, 'Specfied fields must be a list.')
        if order is not None:
            validate_is_instance(order,list, 'Specified order must be a list of fields')
            for field in order:
                if not field in fields:
                    raise BadRequest('The specified ordering field was not part of the search fields.')

        fields = set(fields) # Convert fields to a set for aggregation across the catalogs
        #======================================================================================================
        # Priorty Queue Index Matching
        #======================================================================================================

        pq = [] # Priority queue for matching
        catalog_id = None
        catalogs, _ = self.clients.resource_registry.find_resources(restype=RT.Catalog, id_only=False)
        for catalog in catalogs:
            if set(catalog.catalog_fields).issubset(fields):
                index_num = len(self.clients.catalog_management.list_indexes(catalog._id))
                heapq.heappush(pq, (index_num,catalog))
        if pq:
            weight, catalog = heapq.heappop(pq)
            if weight < self.heuristic_cutoff:
                catalog_id = catalog._id

                
        if catalog_id is None:
            catalog_id = self.clients.catalog_management.create_catalog('%s_catalog'% view_name, keywords=list(fields))

        view_res = View(name=view_name, description=description)
        view_res.order = order
        view_res.filters = filters
        view_id, _ = self.clients.resource_registry.create(view_res)
        self.clients.resource_registry.create_association(subject=view_id, predicate=PRED.hasCatalog,object=catalog_id)
        return view_id
Beispiel #23
0
    def create_view(self, view_name='', description='', fields=None, order=None, filters=''):
        """Creates a view which has the specified search fields, the order in which the search fields are presented
        to a query and a term filter.
        @param view_name Name of the view
        @param description Simple descriptive sentence
        @param fields Search fields
        @param order List of fields to determine order of precendence in which the results are presented
        @param filter Simple term filter

        @param view_name    str
        @param description    str
        @param fields    list
        @param order    list
        @param filters    str
        @retval view_id    str
        """
        res, _ = self.clients.resource_registry.find_resources(name=view_name, id_only=True)
        if len(res) > 0:
            raise BadRequest('The view resource with name: %s, already exists.' % view_name)

        #======================
        # Arg Validations
        #======================
        validate_is_instance(fields,list, 'Specified fields must be a list.')
        validate_true(len(fields)>0, 'Specfied fields must be a list.')
        if order is not None:
            validate_is_instance(order,list, 'Specified order must be a list of fields')
            for field in order:
                if not field in fields:
                    raise BadRequest('The specified ordering field was not part of the search fields.')

        fields = set(fields) # Convert fields to a set for aggregation across the catalogs
        #======================================================================================================
        # Priorty Queue Index Matching
        #======================================================================================================

        pq = [] # Priority queue for matching
        catalog_id = None
        catalogs, _ = self.clients.resource_registry.find_resources(restype=RT.Catalog, id_only=False)
        for catalog in catalogs:
            if set(catalog.catalog_fields).issubset(fields):
                index_num = len(self.clients.catalog_management.list_indexes(catalog._id))
                heapq.heappush(pq, (index_num,catalog))
        if pq:
            weight, catalog = heapq.heappop(pq)
            if weight < self.heuristic_cutoff:
                catalog_id = catalog._id

                
        if catalog_id is None:
            catalog_id = self.clients.catalog_management.create_catalog('%s_catalog'% view_name, keywords=list(fields))

        view_res = View(name=view_name, description=description)
        view_res.order = order
        view_res.filters = filters
        view_id, _ = self.clients.resource_registry.create(view_res)
        self.clients.resource_registry.create_association(subject=view_id, predicate=PRED.hasCatalog,object=catalog_id)
        return view_id
    def create_stream(self, name='', exchange_point='', topic_ids=None, credentials=None, stream_definition_id='', description='', stream_name='', stream_type=''):
        # Argument Validation
        if name and self.clients.resource_registry.find_resources(restype=RT.Stream, name=name, id_only=True)[0]:
            raise Conflict("The named stream '%s' already exists on XP '%s'" % (name, exchange_point))
        validate_true(exchange_point, 'An exchange point must be specified')

        exchange_point_id = None
        if re.match(r'[0-9a-f]{32}', exchange_point):  # It's a uuid
            xp_obj = self.clients.exchange_management.read_exchange_point(exchange_point)
            exchange_point_id = exchange_point
            exchange_point = xp_obj.name
        else:
            self.container.ex_manager.create_xp(exchange_point)
            xp_objs, _ = self.clients.resource_registry.find_resources(restype=RT.ExchangePoint, name=exchange_point, id_only=True)
            if not xp_objs:
                raise BadRequest('failed to create an ExchangePoint: ' + exchange_point)
            exchange_point_id = xp_objs[0]

        topic_ids = topic_ids or []

        if not name: name = create_unique_identifier()

        # Get topic names and topics
        topic_names = []
        associated_topics = []
        for topic_id in topic_ids:
            topic = self.read_topic(topic_id)
            if topic.exchange_point == exchange_point:
                topic_names.append(self._sanitize(topic.name))
                associated_topics.append(topic_id)
            else:
                log.warning('Attempted to attach stream %s to topic %s with different exchange points', name, topic.name)

        stream = Stream(name=name, description=description)
        routing_key = '.'.join([self._sanitize(name)] + topic_names + ['stream'])
        if len(routing_key) > 255:
            raise BadRequest('There are too many topics for this.')

        stream.stream_route.exchange_point = exchange_point
        stream.stream_route.routing_key = routing_key
        #@todo: validate credentials
        stream.stream_route.credentials = credentials
        stream.stream_name = stream_name
        stream.stream_type = stream_type

        stream_id, rev = self.clients.resource_registry.create(stream)

        self._associate_stream_with_exchange_point(stream_id,exchange_point_id)

        if stream_definition_id: #@Todo: what if the stream has no definition?!
            self._associate_stream_with_definition(stream_id, stream_definition_id)

        for topic_id in associated_topics:
            self._associate_topic_with_stream(topic_id, stream_id)

        log.info('Stream %s: %s', name, routing_key)

        return stream_id, stream.stream_route
Beispiel #25
0
    def create_subscription(self,
                            name='',
                            stream_ids=None,
                            exchange_points=None,
                            topic_ids=None,
                            exchange_name='',
                            credentials=None,
                            description=''):
        stream_ids = stream_ids or []
        exchange_points = exchange_points or []
        topic_ids = topic_ids or []

        exchange_name = exchange_name or name
        validate_true(exchange_name, 'Clients must provide an exchange name')
        log.info('Creating Subscription %s for %s <- %s', name, exchange_name,
                 stream_ids or exchange_points or topic_ids)

        if not name: name = create_unique_identifier()

        if stream_ids:
            validate_is_instance(stream_ids, list,
                                 'stream ids must be in list format')

        if exchange_points:
            validate_is_instance(exchange_points, list,
                                 'exchange points must be in list format')

        if topic_ids:
            validate_is_instance(topic_ids, list,
                                 'topic ids must be in list format')

        subscription = Subscription(name=name, description=description)
        subscription.exchange_points = exchange_points
        subscription.exchange_name = exchange_name

        subscription_id, rev = self.clients.resource_registry.create(
            subscription)
        self.container.ex_manager.create_xn_queue(exchange_name)
        xn_ids, _ = self.clients.resource_registry.find_resources(
            restype=RT.ExchangeName, name=exchange_name, id_only=True)
        if xn_ids:
            xn_id = xn_ids[0]
            self.clients.resource_registry.create_association(
                xn_id, PRED.hasSubscription, subscription_id)

        #---------------------------------
        # Associations
        #---------------------------------

        for stream_id in stream_ids:
            self._associate_stream_with_subscription(stream_id,
                                                     subscription_id)

        for topic_id in topic_ids:
            self._associate_topic_with_subscription(topic_id, subscription_id)

        return subscription_id
 def remove_stream(self, dataset_id='', stream_id=''):
     log.info('Removing stream %s from dataset %s', stream_id, dataset_id)
     validate_true(
         dataset_id and stream_id,
         'Clients must provide both the dataset_id and stream_id')
     assocs = self.clients.resource_registry.find_associations(
         subject=dataset_id, predicate=PRED.hasStream, object=stream_id)
     for assoc in assocs:
         self.clients.resource_registry.delete_association(assoc)
Beispiel #27
0
    def deactivate_subscription(self, subscription_id=''):
        validate_true(self.subscription_is_active(subscription_id),
                      'Subscription is not active.')

        subscription = self.read_subscription(subscription_id)

        streams, assocs = self.clients.resource_registry.find_objects(
            subject=subscription_id,
            object_type=RT.Stream,
            predicate=PRED.hasStream,
            id_only=False)
        topic_ids, assocs = self.clients.resource_registry.find_objects(
            subject=subscription_id, predicate=PRED.hasTopic, id_only=True)
        data_product_ids, assocs = self.clients.resource_registry.find_objects(
            subject=subscription_id,
            predicate=PRED.hasDataProduct,
            id_only=True)

        topic_topology = set()

        for topic_id in topic_ids:
            topic_tree = self._child_topics(topic_id)
            topic_topology = topic_topology.union(topic_tree)

        if topic_topology:
            topics = self.clients.resource_registry.read_mult(
                object_ids=list(topic_topology))
            for topic in topics:
                log.info('Topic %s -X-> %s', topic.name,
                         subscription.exchange_name)
                self._unbind(topic.exchange_point, subscription.exchange_name,
                             '#.%s.#' % self._sanitize(topic.name))

        for stream in streams:
            log.info('%s -X-> %s', stream.name, subscription.exchange_name)
            self._unbind(stream.stream_route.exchange_point,
                         subscription.exchange_name,
                         stream.stream_route.routing_key)

        for exchange_point in subscription.exchange_points:
            log.info('Exchange %s -X-> %s', exchange_point,
                     subscription.exchange_name)
            self._unbind(exchange_point, subscription.exchange_name, '*')

        for data_product_id in data_product_ids:
            streams, assocs = self.clients.resource_registry.find_objects(
                subject=data_product_id,
                predicate=PRED.hasStream,
                id_only=False)
            for stream in streams:
                log.info('%s -X-> %s', stream.name, subscription.exchange_name)
                self._unbind(stream.stream_route.exchange_point,
                             subscription.exchange_name,
                             stream.stream_route.routing_key)

        subscription.activated = False
        self.clients.resource_registry.update(subscription)
Beispiel #28
0
    def test_validates_success(self):
        import pyon.util.arg_check as ac
        fl = self._FakeLog()
        ac.log = fl
        try:
            ac.validate_true(False, 'blah')
        except BadRequest as e:
            self.assertTrue(e.message == 'blah')

        self.assertEquals(fl.name, "pyon.util.test.test_arg_check")
Beispiel #29
0
    def test_validates_success(self):
        import pyon.util.arg_check as ac
        fl = self._FakeLog()
        ac.log = fl
        try:
            ac.validate_true(False,'blah')
        except BadRequest as e:
            self.assertTrue(e.message == 'blah')

        self.assertEquals(fl.name,"pyon.util.test.test_arg_check")
Beispiel #30
0
    def create_dataset(self,
                       name='',
                       datastore_name='',
                       view_name='',
                       stream_id='',
                       parameter_dict=None,
                       spatial_domain=None,
                       temporal_domain=None,
                       parameter_dictionary_id='',
                       description=''):
        validate_true(
            parameter_dict or parameter_dictionary_id,
            'A parameter dictionary must be supplied to register a new dataset.'
        )
        validate_is_not_none(
            spatial_domain,
            'A spatial domain must be supplied to register a new dataset.')
        validate_is_not_none(
            temporal_domain,
            'A temporal domain must be supplied to register a new dataset.')

        if parameter_dictionary_id:
            pd = self.read_parameter_dictionary(parameter_dictionary_id)
            pcs = self.read_parameter_contexts(parameter_dictionary_id,
                                               id_only=False)
            parameter_dict = self._merge_contexts(
                [ParameterContext.load(i.parameter_context) for i in pcs],
                pd.temporal_context)
            parameter_dict = parameter_dict.dump()

        dataset = Dataset()
        dataset.description = description
        dataset.name = name
        dataset.primary_view_key = stream_id or None
        dataset.datastore_name = datastore_name or self.DEFAULT_DATASTORE
        dataset.view_name = view_name or self.DEFAULT_VIEW
        dataset.parameter_dictionary = parameter_dict
        dataset.temporal_domain = temporal_domain
        dataset.spatial_domain = spatial_domain
        dataset.registered = False

        dataset_id, _ = self.clients.resource_registry.create(dataset)
        if stream_id:
            self.add_stream(dataset_id, stream_id)

        log.debug('creating dataset: %s', dataset_id)

        cov = self._create_coverage(dataset_id, description or dataset_id,
                                    parameter_dict, spatial_domain,
                                    temporal_domain)
        self._save_coverage(cov)
        cov.close()

        return dataset_id
    def query_owner(self, resource_id='', depth=0, id_only=False):
        validate_true(resource_id, 'Unspecified resource')
        if depth:
            resource_ids = self.iterative_traverse(resource_id, depth-1)
        else:
            resource_ids = self.reverse_traverse(resource_id)
        if id_only:
            return resource_ids

        resources = self.clients.resource_registry.read_mult(resource_ids)

        return resources
    def query_association(self, resource_id="", depth=0, id_only=False):
        validate_true(resource_id, "Unspecified resource")
        if depth:
            resource_ids = self.iterative_traverse(resource_id, depth - 1)
        else:
            resource_ids = self.traverse(resource_id)
        if id_only:
            return resource_ids

        resources = self.clients.resource_registry.read_mult(resource_ids)

        return resources
    def query_association(self,resource_id='', depth=0, id_only=False):
        validate_true(resource_id, 'Unspecified resource')
        if depth:
            resource_ids = self.iterative_traverse(resource_id, depth-1)
        else:
            resource_ids = self.traverse(resource_id)
        if id_only:
            return resource_ids

        resources = self.clients.resource_registry.read_mult(resource_ids)

        return resources
Beispiel #34
0
    def create_parameter_context(self,
                                 name='',
                                 parameter_context=None,
                                 description='',
                                 reference_urls=None,
                                 parameter_type='',
                                 internal_name='',
                                 value_encoding='',
                                 code_report='',
                                 units='',
                                 fill_value='',
                                 display_name='',
                                 parameter_function_id='',
                                 parameter_function_map='',
                                 standard_name='',
                                 ooi_short_name='',
                                 precision='',
                                 visible=True):

        validate_true(name, 'Name field may not be empty')

        validate_is_instance(parameter_context, dict,
                             'parameter_context field is not dictable.')
        name = re.sub(r'[^a-zA-Z0-9_]', '_', name)
        parameter_context = self.numpy_walk(parameter_context)
        parameter_context['name'] = name
        pc_res = ParameterContextResource(name=name,
                                          parameter_context=parameter_context,
                                          description=description)
        pc_res.reference_urls = reference_urls or []
        pc_res.parameter_type = parameter_type
        pc_res.internal_name = internal_name or name
        pc_res.value_encoding = value_encoding
        pc_res.code_report = code_report or ''
        pc_res.units = units
        pc_res.fill_value = fill_value
        pc_res.display_name = display_name
        pc_res.parameter_function_id = parameter_function_id
        pc_res.parameter_function_map = parameter_function_map
        pc_res.standard_name = standard_name
        pc_res.ooi_short_name = ooi_short_name
        pc_res.precision = precision or '5'
        pc_res.visible = visible

        pc_id, ver = self.clients.resource_registry.create(pc_res)
        if parameter_function_id:
            self.read_parameter_function(parameter_function_id)
            self.clients.resource_registry.create_association(
                subject=pc_id,
                predicate=PRED.hasParameterFunction,
                object=parameter_function_id)

        return pc_id
    def query(self, query=None, id_only=True):
        """Issue a query against the indexes as specified in the query, applying filters and operators
        accordingly. The query format is a structured dict.
        See the query format definition: https://confluence.oceanobservatories.org/display/CIDev/Discovery+Service+Query+Format

        @param query    dict
        @param id_only    bool
        @retval results    list
        """
        validate_true(query, 'Invalid query')

        return self.request(query, id_only)
    def query_range(
        self, source_id="", field="", from_value=None, to_value=None, order=None, limit=0, offset=0, id_only=False
    ):

        if not self.use_es:
            raise BadRequest("Can not make queries without ElasticSearch, enable in res/config/pyon.yml")

        validate_true(not from_value is None, "from_value not specified")
        validate_true(isinstance(from_value, int) or isinstance(from_value, float), "from_value is not a valid number")
        validate_true(not to_value is None, "to_value not specified")
        validate_true(isinstance(to_value, int) or isinstance(to_value, float), "to_value is not a valid number")
        validate_true(source_id, "source_id not specified")

        es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port)

        source = self.clients.resource_registry.read(source_id)

        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        # If source is a view, catalog or collection go through it and recursively call query_range on all the results in the indexes
        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        iterate = self._multi(
            self.query_range,
            source,
            field=field,
            from_value=from_value,
            to_value=to_value,
            order=order,
            limit=limit,
            offset=offset,
            id_only=id_only,
        )
        if iterate is not None:
            return iterate

        index = source
        validate_is_instance(index, ElasticSearchIndex, "%s does not refer to a valid index." % source_id)
        if order:
            validate_is_instance(order, dict, "Order is incorrect.")
            es.sort(**order)

        if limit:
            es.size(limit)

        if field == "*":
            field = "_all"

        query = ep.ElasticQuery().range(field=field, from_value=from_value, to_value=to_value)

        response = IndexManagementService._es_call(es.search_index_advanced, index.index_name, query)

        IndexManagementService._check_response(response)

        return self._results_from_response(response, id_only)
    def create_topic(self, name='', exchange_point='', parent_topic_id='', description=''):
        validate_true(exchange_point, 'An exchange point must be provided for the topic')
        name = name or create_unique_identifier()
        topic = Topic(name=name, description=description, exchange_point=exchange_point)
        if parent_topic_id:
            parent_topic = self.read_topic(parent_topic_id)
            validate_equal(exchange_point, parent_topic.exchange_point, 'Can not make a sub-topic with a different exchange point')
        topic_id, rev = self.clients.resource_registry.create(topic)

        if parent_topic_id:
            self._associate_topic_with_topic(parent_topic_id, topic_id)

        return topic_id
Beispiel #38
0
 def create_parameter_function(self,
                               name='',
                               parameter_function=None,
                               description=''):
     validate_true(name, 'Name field may not be empty')
     validate_is_instance(parameter_function, dict,
                          'parameter_function field is not dictable.')
     pf_res = ParameterFunctionResource(
         name=name,
         parameter_function=parameter_function,
         description=description)
     pf_id, ver = self.clients.resource_registry.create(pf_res)
     return pf_id
    def query_owner(self, resource_id='', depth=0, id_only=False):
        validate_true(resource_id, 'Unspecified resource')
        if depth:
            resource_ids = self.iterative_traverse(resource_id, depth - 1)
        else:
            resource_ids = self.reverse_traverse(resource_id)
        if id_only:
            return resource_ids

            if not isinstance(resource_ids, list):
                resource_ids = list(resource_ids)
        resources = self.clients.resource_registry.read_mult(resource_ids)

        return resources
    def query(self, query=None, id_only=True, search_args=None):
        """Issue a query provided in structured dict format or internal datastore query format.
        Returns a list of resource or event objects or their IDs only.
        Search_args may contain parameterized values.
        See the query format definition: https://confluence.oceanobservatories.org/display/CIDev/Discovery+Service+Query+Format

        @param query    dict
        @param id_only    bool
        @param search_args dict
        @retval results    list
        """
        validate_true(query, 'Invalid query')

        return self._discovery_request(query, id_only, search_args=search_args, query_params=search_args)
Beispiel #41
0
 def create_parameter_dictionary(self,
                                 name='',
                                 parameter_context_ids=None,
                                 temporal_context='',
                                 description=''):
     validate_true(name, 'Name field may not be empty.')
     parameter_context_ids = parameter_context_ids or []
     pd_res = ParameterDictionaryResource(name=name,
                                          temporal_context=temporal_context,
                                          description=description)
     pd_res_id, ver = self.clients.resource_registry.create(pd_res)
     for pc_id in parameter_context_ids:
         self._link_pcr_to_pdr(pc_id, pd_res_id)
     return pd_res_id
    def query_association(self, resource_id='', depth=0, id_only=False):
        validate_true(resource_id, 'Unspecified resource')
        if depth:
            resource_ids = self.iterative_traverse(resource_id, depth-1)
        else:
            resource_ids = self.traverse(resource_id)
        if id_only:
            return resource_ids

        if not isinstance(resource_ids, list):
            resource_ids = list(resource_ids)
        resources = self.clients.resource_registry.read_mult(resource_ids)

        return resources
    def query_geo_distance(
        self, source_id="", field="", origin=None, distance="", units="mi", order=None, limit=0, offset=0, id_only=False
    ):
        validate_true(isinstance(origin, (tuple, list)), "Origin is not a list or tuple.")
        validate_true(len(origin) == 2, "Origin is not of the right size: (2)")

        if not self.use_es:
            raise BadRequest("Can not make queries without ElasticSearch, enable in res/config/pyon.yml")

        es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port)
        source = self.clients.resource_registry.read(source_id)

        iterate = self._multi(self.query_geo_distance, source=source, field=field, origin=origin, distance=distance)
        if iterate is not None:
            return iterate

        index = source
        validate_is_instance(index, ElasticSearchIndex, "%s does not refer to a valid index." % index)

        sorts = ep.ElasticSort()
        if order is not None and isinstance(order, dict):
            sort_field = order.keys()[0]
            value = order[sort_field]
            sorts.sort(sort_field, value)
            es.sorted(sorts)

        if limit:
            es.size(limit)

        if offset:
            es.from_offset(offset)

        if field == "*":
            field = "_all"

        sorts.geo_distance(field, origin, units)

        es.sorted(sorts)

        filter = ep.ElasticFilter.geo_distance(field, origin, "%s%s" % (distance, units))

        es.filtered(filter)

        query = ep.ElasticQuery.match_all()

        response = IndexManagementService._es_call(es.search_index_advanced, index.index_name, query)
        IndexManagementService._check_response(response)

        return self._results_from_response(response, id_only)
    def create_ingestion_configuration(self,name='', exchange_point_id='', queues=None):
        validate_is_instance(queues,list,'The queues parameter is not a proper list.')
        validate_true(len(queues)>0, 'Ingestion needs at least one queue to ingest from')
        for queue in queues:
            validate_is_instance(queue, IngestionQueue)

        ingestion_config = IngestionConfiguration()

        ingestion_config.name = name
        ingestion_config.exchange_point = exchange_point_id
        ingestion_config.queues = queues

        config_id, rev = self.clients.resource_registry.create(ingestion_config)

        return config_id
Beispiel #45
0
    def query_geo_distance(self, source_id='', field='', origin=None, distance='', units='mi',order=None, limit=0, offset=0, id_only=False):
        validate_true(isinstance(origin,(tuple,list)) , 'Origin is not a list or tuple.')
        validate_true(len(origin)==2, 'Origin is not of the right size: (2)')

        if not self.use_es:
            raise BadRequest('Can not make queries without ElasticSearch, enable in res/config/pyon.yml')

        es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port)
        source = self.clients.resource_registry.read(source_id)

        iterate = self._multi(self.query_geo_distance, source=source, field=field, origin=origin, distance=distance) 
        if iterate is not None:
            return iterate

        index = source
        validate_is_instance(index,ElasticSearchIndex, '%s does not refer to a valid index.' % index)

        sorts = ep.ElasticSort()
        if order is not None and isinstance(order,dict):
            sort_field = order.keys()[0]
            value = order[sort_field]
            sorts.sort(sort_field,value)
            es.sorted(sorts)

        if limit:
            es.size(limit)

        if offset:
            es.from_offset(offset)

        if field == '*':
            field = '_all'


        sorts.geo_distance(field, origin, units)

        es.sorted(sorts)

        filter = ep.ElasticFilter.geo_distance(field,origin, '%s%s' %(distance,units))

        es.filtered(filter)

        query = ep.ElasticQuery.match_all()

        response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query)
        IndexManagementService._check_response(response)

        return self._results_from_response(response,id_only)
 def create_parameter_context(self, name='', parameter_context=None, description='', parameter_type='', value_encoding='', unit_of_measure=''):
     res, _ = self.clients.resource_registry.find_resources(restype=RT.ParameterContext, name=name, id_only=False)
     if len(res):
         for r in res:
             if r.name == name and self._compare_pc(r.parameter_context, parameter_context):
                 return r._id
     
     validate_true(name, 'Name field may not be empty')
     validate_is_instance(parameter_context, dict, 'parameter_context field is not dictable.')
     pc_res = ParameterContextResource(name=name, parameter_context=parameter_context, description=description)
     pc_res.parameter_type  = parameter_type
     pc_res.value_encoding  = value_encoding
     pc_res.unit_of_measure = unit_of_measure
     pc_id, ver = self.clients.resource_registry.create(pc_res)
     
     return pc_id
    def find_collection(self, collection_name='', resource_ids=[]):
        if not resource_ids: resource_ids = []
        validate_true(collection_name or resource_ids, 'You must specify either a name or a list of resources.')
        results = set()
        
        if collection_name:
            colls = self.clients.resource_registry.find_resources(name=collection_name, restype=RT.Collection,id_only=True)
            results = results.union(colls[0])

        if resource_ids:
            for resource_id in resource_ids:
                assocs = self.clients.resource_registry.find_associations(object=resource_id, predicate=PRED.hasElement, id_only=False)
                collections = [assoc.s for assoc in assocs]
                results = results.union(collections)

        return list(results)
Beispiel #48
0
    def find_collection(self, collection_name='', resource_ids=[]):
        if not resource_ids: resource_ids = []
        validate_true(collection_name or resource_ids, 'You must specify either a name or a list of resources.')
        results = set()
        
        if collection_name:
            colls = self.clients.resource_registry.find_resources(name=collection_name, restype=RT.Collection,id_only=True)
            results = results.union(colls[0])

        if resource_ids:
            for resource_id in resource_ids:
                assocs = self.clients.resource_registry.find_associations(object=resource_id, predicate=PRED.hasElement, id_only=False)
                collections = [assoc.s for assoc in assocs]
                results = results.union(collections)

        return list(results)
    def create_subscription(self, name='', stream_ids=None, exchange_points=None, topic_ids=None, exchange_name='', credentials=None, description='', data_product_ids=[]):
        stream_ids       = stream_ids or []
        exchange_points  = exchange_points or []
        topic_ids        = topic_ids or []
        data_product_ids = data_product_ids or []

        exchange_name = exchange_name or name
        validate_true(exchange_name, 'Clients must provide an exchange name')
        log.info('Creating Subscription %s for %s <- %s', name, exchange_name, stream_ids or exchange_points or topic_ids)

        if not name: name = create_unique_identifier()

        if stream_ids:
            validate_is_instance(stream_ids, list, 'stream ids must be in list format')

        if exchange_points:
            validate_is_instance(exchange_points, list, 'exchange points must be in list format')

        if topic_ids:
            validate_is_instance(topic_ids, list, 'topic ids must be in list format')


        subscription = Subscription(name=name, description=description)
        subscription.exchange_points = exchange_points
        subscription.exchange_name   = exchange_name

        subscription_id, rev = self.clients.resource_registry.create(subscription)
        self.container.ex_manager.create_xn_queue(exchange_name)
        xn_ids, _ = self.clients.resource_registry.find_resources(restype=RT.ExchangeName, name=exchange_name, id_only=True)
        if xn_ids:
            xn_id = xn_ids[0]
            self.clients.resource_registry.create_association(xn_id, PRED.hasSubscription, subscription_id)

        #---------------------------------
        # Associations
        #---------------------------------

        for stream_id in stream_ids:
            self._associate_stream_with_subscription(stream_id, subscription_id)
        
        for topic_id in topic_ids:
            self._associate_topic_with_subscription(topic_id, subscription_id)

        for data_product_id in data_product_ids:
            self._associate_data_product_with_subscription(data_product_id, subscription_id)
        
        return subscription_id
    def query_geo_bbox(
        self, source_id="", field="", top_left=None, bottom_right=None, order=None, limit=0, offset=0, id_only=False
    ):
        validate_true(isinstance(top_left, (list, tuple)), "Top Left is not a list or a tuple")
        validate_true(len(top_left) == 2, "Top Left is not of the right size: (2)")
        validate_true(isinstance(bottom_right, (list, tuple)), "Bottom Right is not a list or a tuple")
        validate_true(len(bottom_right) == 2, "Bottom Right is not of the right size: (2)")

        if not self.use_es:
            raise BadRequest("Can not make queries without ElasticSearch, enable in res/config/pyon.yml")

        es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port)
        source = self.clients.resource_registry.read(source_id)

        iterate = self._multi(
            self.query_geo_bbox,
            source=source,
            field=field,
            top_left=top_left,
            bottom_right=bottom_right,
            order=order,
            limit=limit,
            offset=offset,
            id_only=id_only,
        )
        if iterate is not None:
            return iterate

        index = source
        validate_is_instance(index, ElasticSearchIndex, "%s does not refer to a valid index." % index)

        sorts = ep.ElasticSort()
        if order is not None and isinstance(order, dict):
            sort_field = order.keys()[0]
            value = order[sort_field]
            sorts.sort(sort_field, value)
            es.sorted(sorts)

        if limit:
            es.size(limit)

        if offset:
            es.from_offset(offset)

        if field == "*":
            field = "_all"

        filter = ep.ElasticFilter.geo_bounding_box(field, top_left, bottom_right)

        es.filtered(filter)

        query = ep.ElasticQuery.match_all()

        response = IndexManagementService._es_call(es.search_index_advanced, index.index_name, query)
        IndexManagementService._check_response(response)

        return self._results_from_response(response, id_only)
    def query_range(self, source_id='', field='', from_value=None, to_value=None, order=None, limit=0, offset=0, id_only=False):
        
        if not self.use_es:
            raise BadRequest('Can not make queries without ElasticSearch, enable in res/config/pyon.yml')

        validate_true(not from_value is None, 'from_value not specified')
        validate_true(isinstance(from_value,int) or isinstance(from_value,float), 'from_value is not a valid number')
        validate_true(not to_value is None, 'to_value not specified')
        validate_true(isinstance(to_value,int) or isinstance(to_value,float), 'to_value is not a valid number')
        validate_true(source_id, 'source_id not specified')

        es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port)


        source = self.clients.resource_registry.read(source_id)

        #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
        # If source is a view, catalog or collection go through it and recursively call query_range on all the results in the indexes
        #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
        iterate = self._multi(self.query_range, source, field=field, from_value=from_value, to_value=to_value, order=order, limit=limit, offset=offset, id_only=id_only)
        if iterate is not None:
            return iterate

        index = source
        validate_is_instance(index,ElasticSearchIndex,'%s does not refer to a valid index.' % source_id)
        if order:
            validate_is_instance(order,dict,'Order is incorrect.')
            es.sort(**order)

        if limit:
            es.size(limit)

        if field == '*':
            field = '_all'

        query = ep.ElasticQuery().range(
            field      = field,
            from_value = from_value,
            to_value   = to_value
        )

        response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query)

        IndexManagementService._check_response(response)

        return self._results_from_response(response, id_only)
Beispiel #52
0
    def test_validations(self):
        import pyon.util.arg_check as arg_check

        with self.assertRaises(BadRequest):
            arg_check.validate_true(False, 'test')

        with self.assertRaises(BadRequest):
            arg_check.validate_equal(3, 4, 'test')

        with self.assertRaises(BadRequest):
            arg_check.validate_not_equal(4, 4, 'test')

        with self.assertRaises(BadRequest):
            arg_check.validate_false(True, 'test')

        with self.assertRaises(BadRequest):
            one = list()
            two = list()
            arg_check.validate_is(one, two, 'test')

        with self.assertRaises(BadRequest):
            one = list()
            two = one
            arg_check.validate_is_not(one, two, 'test')

        with self.assertRaises(BadRequest):
            c = None
            arg_check.validate_is_not_none(c, 'test')

        with self.assertRaises(BadRequest):
            one = list([1, 3])
            two = 2
            arg_check.validate_in(two, one, 'test')

        with self.assertRaises(BadRequest):
            one = list([1, 2, 3])
            two = 2
            arg_check.validate_not_in(two, one, 'test')

        with self.assertRaises(BadRequest):
            one = list()
            arg_check.validate_is_instance(one, dict, 'test')

        with self.assertRaises(BadRequest):
            one = list()
            arg_check.validate_not_is_instance(one, list, 'test')
 def stream_def_from_data_product(self, data_product_id=''):
     stream_ids, _ = self.clients.resource_registry.find_objects(
         subject=data_product_id,
         predicate=PRED.hasStream,
         object_type=RT.Stream,
         id_only=True)
     validate_true(
         stream_ids,
         'No stream found for this data product: %s' % data_product_id)
     stream_id = stream_ids.pop()
     stream_def_ids, _ = self.clients.resource_registry.find_objects(
         subject=stream_id,
         predicate=PRED.hasStreamDefinition,
         id_only=True)
     validate_true(
         stream_def_ids,
         'No stream definition found for this stream: %s' % stream_def_ids)
     stream_def_id = stream_def_ids.pop()
     return stream_def_id
    def define_replay(self,
                      dataset_id='',
                      query=None,
                      delivery_format='',
                      stream_id=''):
        ''' Define the stream that will contain the data from data store by streaming to an exchange name.
        query: 
          start_time: 0    The beginning timestamp
          end_time:   N    The ending timestamp
          parameters: []   The list of parameters which match the coverages parameters
          tdoa: slice()    The slice for the desired indices to be replayed
        '''

        if not dataset_id:
            raise BadRequest(
                '(Data Retriever Service %s): No dataset provided.' %
                self.name)
        validate_true(stream_id, 'No stream_id provided')

        res, _ = self.clients.resource_registry.find_resources(
            restype=RT.ProcessDefinition,
            name=self.REPLAY_PROCESS,
            id_only=True)
        if not len(res):
            raise BadRequest('No replay process defined.')
        process_definition_id = res[0]

        replay_stream_id = stream_id
        pid = self.clients.process_dispatcher.create_process(
            process_definition_id=process_definition_id)

        #--------------------------------------------------------------------------------
        # Begin the Decision tree for the various types of replay
        #--------------------------------------------------------------------------------
        replay = self.replay_data_process(dataset_id, query, delivery_format,
                                          replay_stream_id)

        replay.process_id = pid

        self.clients.resource_registry.update(replay)
        self.clients.resource_registry.create_association(
            replay._id, PRED.hasStream, replay_stream_id)
        return replay._id, pid
    def create_collection(self, name='', resources=None):
        res, _ = self.clients.resource_registry.find_resources(name=name)
        if len(res) > 0:
            raise BadRequest('Resource with name %s already exists.' % name)

        if resources is None:
            raise BadRequest('No resources provided to make collection.')

        validate_true(
            len(resources) > 0, 'No resources provided to make collection.')

        collection = Collection(name=name)
        collection_id, _ = self.clients.resource_registry.create(collection)
        for resource in resources:
            self.clients.resource_registry.create_association(
                subject=collection_id,
                predicate=PRED.hasElement,
                object=resource)

        return collection_id
Beispiel #56
0
    def create_ingestion_configuration(self,
                                       name='',
                                       exchange_point_id='',
                                       queues=None):
        validate_is_instance(queues, list,
                             'The queues parameter is not a proper list.')
        validate_true(
            len(queues) > 0,
            'Ingestion needs at least one queue to ingest from')
        for queue in queues:
            validate_is_instance(queue, IngestionQueue)

        ingestion_config = IngestionConfiguration()

        ingestion_config.name = name
        ingestion_config.exchange_point = exchange_point_id
        ingestion_config.queues = queues

        config_id, rev = self.clients.resource_registry.create(
            ingestion_config)

        return config_id
Beispiel #57
0
    def create_topic(self,
                     name='',
                     exchange_point='',
                     parent_topic_id='',
                     description=''):
        validate_true(exchange_point,
                      'An exchange point must be provided for the topic')
        name = name or create_unique_identifier()
        topic = Topic(name=name,
                      description=description,
                      exchange_point=exchange_point)
        if parent_topic_id:
            parent_topic = self.read_topic(parent_topic_id)
            validate_equal(
                exchange_point, parent_topic.exchange_point,
                'Can not make a sub-topic with a different exchange point')
        topic_id, rev = self.clients.resource_registry.create(topic)

        if parent_topic_id:
            self._associate_topic_with_topic(parent_topic_id, topic_id)

        return topic_id
Beispiel #58
0
    def query_term(self, source_id='', field='', value='', fuzzy=False, match=False, order=None, limit=0, offset=0, id_only=False):
        '''
        Elasticsearch Query against an index
        > discovery.query_index('indexID', 'name', '*', order={'name':'asc'}, limit=20, id_only=False)
        '''
        if not self.use_es:
            raise BadRequest('Can not make queries without ElasticSearch, enable system.elasticsearch to make queries.')

        validate_true(source_id, 'Unspecified source_id')
        validate_true(field, 'Unspecified field')
        validate_true(value, 'Unspecified value')


        es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port)

        source = self.clients.resource_registry.read(source_id)

        #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
        # If source is a view, catalog or collection go through it and recursively call query_range on all the results in the indexes
        #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
        iterate = self._multi(self.query_term, source, field=field, value=value, order=order, limit=limit, offset=offset, id_only=id_only)
        if iterate is not None:
            return iterate


        index = source
        validate_is_instance(index, ElasticSearchIndex, '%s does not refer to a valid index.' % index)
        if order: 
            validate_is_instance(order,dict, 'Order is incorrect.')
            es.sort(**order)

        if limit:
            es.size(limit)

        if offset:
            es.from_offset(offset)

        if field == '*':
            field = '_all'

        if fuzzy:
            query = ep.ElasticQuery.fuzzy_like_this(value, fields=[field])
        elif match:
            match_query = ep.ElasticQuery.match(field=field,query=value)
            query = {"match_phrase_prefix":match_query['match']}
            
        elif '*' in value:
            query = ep.ElasticQuery.wildcard(field=field, value=value)
        else:
            query = ep.ElasticQuery.field(field=field, query=value)

        response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query)

        IndexManagementService._check_response(response)

        return self._results_from_response(response, id_only)