def _check_response(response): validate_is_instance(response,dict,"Malformed response from ElasticSearch (%s)" % response, ServerError) if response.has_key('error'): raise ServerError("ElasticSearch error: %s" % response['error']) if response.has_key('ok') and not response['ok']: # Cant determine a better exception to throw, add a new one perhapse? raise NotFound("ElasticSearch Response: %s" % response)
def create_parameter_context(self, name='', parameter_context=None, description='', parameter_type='', value_encoding='', unit_of_measure='', parameter_function_ids=None): parameter_function_ids = parameter_function_ids or [] res, _ = self.clients.resource_registry.find_resources( restype=RT.ParameterContext, name=name, id_only=False) if len(res): for r in res: if r.name == name and self._compare_pc(r.parameter_context, parameter_context): return r._id validate_true(name, 'Name field may not be empty') validate_is_instance(parameter_context, dict, 'parameter_context field is not dictable.') pc_res = ParameterContextResource(name=name, parameter_context=parameter_context, description=description) pc_res.parameter_type = parameter_type pc_res.value_encoding = value_encoding pc_res.unit_of_measure = unit_of_measure pc_id, ver = self.clients.resource_registry.create(pc_res) for pfunc_id in parameter_function_ids: self.read_parameter_function(pfunc_id) self.clients.resource_registry.create_association( subject=pc_id, predicate=PRED.hasParameterFunction, object=pfunc_id) return pc_id
def update_index(self, index=None): if index is None: raise BadRequest("No index specified") validate_is_instance( index, Index, 'The specified index is not of type interface.objects.Index') return self.clients.resource_registry.update(index)
def retrieve(self, dataset_id='', query=None, delivery_format=None, module='', cls='', kwargs=None): if query is None: query = {} if delivery_format is None: delivery_format = {} validate_is_instance(query,dict,'Query was improperly formatted.') validate_true(dataset_id, 'No dataset provided') replay_instance = ReplayProcess() replay_instance.dataset = self.clients.dataset_management.read_dataset(dataset_id) replay_instance.dataset_id = dataset_id replay_instance.start_time = query.get('start_time', None) replay_instance.end_time = query.get('end_time', None) replay_instance.parameters = query.get('parameters',None) replay_instance.container = self.container retrieve_data = replay_instance.execute_retrieve() if module and cls: return self._transform_data(retrieve_data, module, cls, kwargs or {}) return retrieve_data
def persist_data_stream(self, stream_id='', ingestion_configuration_id='', dataset_id='', config=None): #-------------------------------------------------------------------------------- # Validate that the method call was indeed valid #-------------------------------------------------------------------------------- config = config or {} validate_is_instance(stream_id, basestring, 'stream_id %s is not a valid string' % stream_id) validate_true(dataset_id, 'Clients must specify the dataset to persist') log.info('Persisting stream %s to dataset %s.', stream_id, dataset_id) ingestion_config = self.read_ingestion_configuration( ingestion_configuration_id) if self.is_persisted(stream_id): raise BadRequest('This stream is already being persisted') #-------------------------------------------------------------------------------- # Set up the stream subscriptions and associations for this stream and its ingestion_type #-------------------------------------------------------------------------------- if self.setup_queues(ingestion_config, stream_id, dataset_id, config): self.clients.pubsub_management.persist_stream(stream_id) return dataset_id
def suspend_data_product_persistence(self, data_product_id=''): """Suspend data product data persistence into a data set, multiple options @param data_product_id str @param type str @throws NotFound object with specified id does not exist """ #-------------------------------------------------------------------------------- # retrieve the data_process object #-------------------------------------------------------------------------------- data_product_obj = self.clients.resource_registry.read(data_product_id) validate_is_not_none(data_product_obj, 'Should not have been empty') validate_is_instance(data_product_obj, DataProduct) if data_product_obj.dataset_configuration_id is None: raise NotFound("Data Product %s dataset configuration does not exist" % data_product_id) #-------------------------------------------------------------------------------- # get the Stream associated with this data product; if no stream then create one, if multiple streams then Throw #streams = self.data_product.find_stemming_stream(data_product_id) #-------------------------------------------------------------------------------- stream_id = self._get_stream_id(data_product_id) validate_is_not_none(stream_id, 'Data Product %s must have one stream associated' % str(data_product_id)) ret = self.clients.ingestion_management.unpersist_data_stream(stream_id=stream_id, ingestion_configuration_id=data_product_obj.dataset_configuration_id)
def read_dataset(self, dataset_id=''): """ @throws NotFound if resource does not exist. """ retval = self.clients.resource_registry.read(dataset_id) validate_is_instance(retval,DataSet) return retval
def create_parameter_context(self, name='', parameter_context=None, description='', reference_urls=None, parameter_type='', internal_name='', value_encoding='', code_report='', units='', fill_value='', display_name='', parameter_function_id='', parameter_function_map='', standard_name='', ooi_short_name='', precision='', visible=True): validate_true(name, 'Name field may not be empty') validate_is_instance(parameter_context, dict, 'parameter_context field is not dictable.') parameter_context = self.numpy_walk(parameter_context) pc_res = ParameterContextResource(name=name, parameter_context=parameter_context, description=description) pc_res.reference_urls = reference_urls or [] pc_res.parameter_type = parameter_type pc_res.internal_name = internal_name or name pc_res.value_encoding = value_encoding pc_res.code_report = code_report or '' pc_res.units = units pc_res.fill_value = fill_value pc_res.display_name = display_name pc_res.parameter_function_id = parameter_function_id pc_res.parameter_function_map = parameter_function_map pc_res.standard_name = standard_name pc_res.ooi_short_name = ooi_short_name pc_res.precision = precision or '5' pc_res.visible = visible pc_id, ver = self.clients.resource_registry.create(pc_res) if parameter_function_id: self.read_parameter_function(parameter_function_id) self.clients.resource_registry.create_association(subject=pc_id, predicate=PRED.hasParameterFunction, object=parameter_function_id) return pc_id
def persist_data_stream(self, stream_id='', ingestion_configuration_id='', dataset_id=''): # Figure out which MIME or xpath in the stream definition belongs where # Just going to use the first queue for now validate_is_instance(stream_id,basestring, 'stream_id %s is not a valid string' % stream_id) validate_true(dataset_id,'Clients must specify the dataset to persist') ingestion_config = self.read_ingestion_configuration(ingestion_configuration_id) if self.is_persisted(stream_id): raise BadRequest('This stream is already being persisted') stream = self.clients.pubsub_management.read_stream(stream_id) stream.persisted = True self.clients.pubsub_management.update_stream(stream) ingestion_queue = self._determine_queue(stream_id, ingestion_config.queues) subscription_id = self.clients.pubsub_management.create_subscription( query=StreamQuery(stream_ids=[stream_id]), exchange_name=ingestion_queue.name, exchange_point=ingestion_config.exchange_point ) self.clients.pubsub_management.activate_subscription(subscription_id=subscription_id) self.clients.resource_registry.create_association( subject=ingestion_configuration_id, predicate=PRED.hasSubscription, object=subscription_id ) self._existing_dataset(stream_id,dataset_id) return dataset_id
def create_parameter_function(self, name='', parameter_function=None, description=''): validate_true(name, 'Name field may not be empty') validate_is_instance(parameter_function, dict, 'parameter_function field is not dictable.') parameter_function = self.numpy_walk(parameter_function) pf_res = ParameterFunctionResource(name=name, parameter_function=parameter_function, description=description) pf_id, ver = self.clients.resource_registry.create(pf_res) return pf_id
def read_stream(self, stream_id=''): stream = self.clients.resource_registry.read(stream_id) validate_is_instance( stream, Stream, 'The specified identifier does not correspond to a Stream resource' ) return stream
def execute(input=None, context=None, config=None, params=None, state=None): validate_is_instance(input, Number) return input + 1
def shift(vector): validate_is_instance(vector,list) N = len(vector) x = vector[0] vector[0] = vector[N-1] vector[N-1] = x return vector
def create_parameter_context(self, name='', parameter_context=None, description='', reference_urls=None, parameter_type='', internal_name='', value_encoding='', code_report=None, units='', fill_value='', display_name='', parameter_function_id='', parameter_function_map=None, standard_name='', ooi_short_name='', precision=''): res, _ = self.clients.resource_registry.find_resources(restype=RT.ParameterContext, name=name, id_only=False) if len(res): for r in res: if r.name == name and self._compare_pc(r.parameter_context, parameter_context): return r._id validate_true(name, 'Name field may not be empty') validate_is_instance(parameter_context, dict, 'parameter_context field is not dictable.') pc_res = ParameterContextResource(name=name, parameter_context=parameter_context, description=description) pc_res.reference_urls = reference_urls or [] pc_res.parameter_type = parameter_type pc_res.internal_name = internal_name or name pc_res.value_encoding = value_encoding pc_res.code_report = code_report or {} pc_res.units = units pc_res.fill_value = fill_value pc_res.display_name = display_name pc_res.parameter_function_id = parameter_function_id pc_res.parameter_function_map = parameter_function_map pc_res.standard_name = standard_name pc_res.ooi_short_name = ooi_short_name pc_res.precision = precision or '5' pc_id, ver = self.clients.resource_registry.create(pc_res) if parameter_function_id: self.read_parameter_function(parameter_function_id) self.clients.resource_registry.create_association(subject=pc_id, predicate=PRED.hasParameterFunction, object=parameter_function_id) return pc_id
def create_view(self, view_name='', description='', fields=None, order=None, filters=''): """Creates a view which has the specified search fields, the order in which the search fields are presented to a query and a term filter. @param view_name Name of the view @param description Simple descriptive sentence @param fields Search fields @param order List of fields to determine order of precendence in which the results are presented @param filter Simple term filter @param view_name str @param description str @param fields list @param order list @param filters str @retval view_id str """ res, _ = self.clients.resource_registry.find_resources(name=view_name, id_only=True) if len(res) > 0: raise BadRequest('The view resource with name: %s, already exists.' % view_name) #====================== # Arg Validations #====================== validate_is_instance(fields,list, 'Specified fields must be a list.') validate_true(len(fields)>0, 'Specfied fields must be a list.') if order is not None: validate_is_instance(order,list, 'Specified order must be a list of fields') for field in order: if not field in fields: raise BadRequest('The specified ordering field was not part of the search fields.') fields = set(fields) # Convert fields to a set for aggregation across the catalogs #====================================================================================================== # Priorty Queue Index Matching #====================================================================================================== pq = [] # Priority queue for matching catalog_id = None catalogs, _ = self.clients.resource_registry.find_resources(restype=RT.Catalog, id_only=False) for catalog in catalogs: if set(catalog.catalog_fields).issubset(fields): index_num = len(self.clients.catalog_management.list_indexes(catalog._id)) heapq.heappush(pq, (index_num,catalog)) if pq: weight, catalog = heapq.heappop(pq) if weight < self.heuristic_cutoff: catalog_id = catalog._id if catalog_id is None: catalog_id = self.clients.catalog_management.create_catalog('%s_catalog'% view_name, keywords=list(fields)) view_res = View(name=view_name, description=description) view_res.order = order view_res.filters = filters view_id, _ = self.clients.resource_registry.create(view_res) self.clients.resource_registry.create_association(subject=view_id, predicate=PRED.hasCatalog,object=catalog_id) return view_id
def query_term( self, source_id="", field="", value="", fuzzy=False, match=False, order=None, limit=0, offset=0, id_only=False ): """ Elasticsearch Query against an index > discovery.query_index('indexID', 'name', '*', order={'name':'asc'}, limit=20, id_only=False) """ if not self.use_es: raise BadRequest("Can not make queries without ElasticSearch, enable system.elasticsearch to make queries.") validate_true(source_id, "Unspecified source_id") validate_true(field, "Unspecified field") validate_true(value, "Unspecified value") es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # If source is a view, catalog or collection go through it and recursively call query_range on all the results in the indexes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - iterate = self._multi( self.query_term, source, field=field, value=value, order=order, limit=limit, offset=offset, id_only=id_only ) if iterate is not None: return iterate index = source validate_is_instance(index, ElasticSearchIndex, "%s does not refer to a valid index." % index) if order: validate_is_instance(order, dict, "Order is incorrect.") es.sort(**order) if limit: es.size(limit) if offset: es.from_offset(offset) if field == "*": field = "_all" if fuzzy: query = ep.ElasticQuery.fuzzy_like_this(value, fields=[field]) elif match: match_query = ep.ElasticQuery.match(field=field, query=value) query = {"match_phrase_prefix": match_query["match"]} elif "*" in value: query = ep.ElasticQuery.wildcard(field=field, value=value) else: query = ep.ElasticQuery.field(field=field, query=value) response = IndexManagementService._es_call(es.search_index_advanced, index.index_name, query) IndexManagementService._check_response(response) return self._results_from_response(response, id_only)
def query_geo_bbox( self, source_id="", field="", top_left=None, bottom_right=None, order=None, limit=0, offset=0, id_only=False ): validate_true(isinstance(top_left, (list, tuple)), "Top Left is not a list or a tuple") validate_true(len(top_left) == 2, "Top Left is not of the right size: (2)") validate_true(isinstance(bottom_right, (list, tuple)), "Bottom Right is not a list or a tuple") validate_true(len(bottom_right) == 2, "Bottom Right is not of the right size: (2)") if not self.use_es: raise BadRequest("Can not make queries without ElasticSearch, enable in res/config/pyon.yml") es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) iterate = self._multi( self.query_geo_bbox, source=source, field=field, top_left=top_left, bottom_right=bottom_right, order=order, limit=limit, offset=offset, id_only=id_only, ) if iterate is not None: return iterate index = source validate_is_instance(index, ElasticSearchIndex, "%s does not refer to a valid index." % index) sorts = ep.ElasticSort() if order is not None and isinstance(order, dict): sort_field = order.keys()[0] value = order[sort_field] sorts.sort(sort_field, value) es.sorted(sorts) if limit: es.size(limit) if offset: es.from_offset(offset) if field == "*": field = "_all" filter = ep.ElasticFilter.geo_bounding_box(field, top_left, bottom_right) es.filtered(filter) query = ep.ElasticQuery.match_all() response = IndexManagementService._es_call(es.search_index_advanced, index.index_name, query) IndexManagementService._check_response(response) return self._results_from_response(response, id_only)
def query_term(self, source_id='', field='', value='', fuzzy=False, match=False, order=None, limit=0, offset=0, id_only=False): ''' Elasticsearch Query against an index > discovery.query_index('indexID', 'name', '*', order={'name':'asc'}, limit=20, id_only=False) ''' if not self.use_es: raise BadRequest('Can not make queries without ElasticSearch, enable system.elasticsearch to make queries.') validate_true(source_id, 'Unspecified source_id') validate_true(field, 'Unspecified field') validate_true(value, 'Unspecified value') es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # If source is a view, catalog or collection go through it and recursively call query_range on all the results in the indexes #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - iterate = self._multi(self.query_term, source, field=field, value=value, order=order, limit=limit, offset=offset, id_only=id_only) if iterate is not None: return iterate index = source validate_is_instance(index, ElasticSearchIndex, '%s does not refer to a valid index.' % index) if order: validate_is_instance(order,dict, 'Order is incorrect.') es.sort(**order) if limit: es.size(limit) if offset: es.from_offset(offset) if field == '*': field = '_all' if fuzzy: query = ep.ElasticQuery.fuzzy_like_this(value, fields=[field]) elif match: match_query = ep.ElasticQuery.match(field=field,query=value) query = {"match_phrase_prefix":match_query['match']} elif '*' in value: query = ep.ElasticQuery.wildcard(field=field, value=value) else: query = ep.ElasticQuery.field(field=field, query=value) response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query) IndexManagementService._check_response(response) return self._results_from_response(response, id_only)
def shift(vector): validate_is_instance(vector,numpy.ndarray) N = len(vector) x = numpy.zeros_like(vector) for i in xrange(N): if i == 0: x[i] = vector[N-1] else: x[i] = vector[i-1] return x
def shift(vector): validate_is_instance(vector,list) N = len(vector) x = list() for i in xrange(N): if i == 0: x.append(vector[N-1]) else: x.append(vector[i-1]) return x
def shift(vector): validate_is_instance(vector, list) N = len(vector) x = list() for i in xrange(N): v = 0 for j in xrange(N): v += vector[i] - vector[j] x.append(v) return x
def update_data_product(self, data_product=None): """ @todo document this interface!!! @param data_product DataProduct @throws NotFound object with specified id does not exist """ validate_is_instance(data_product, DataProduct) self.data_product.update_one(data_product)
def __init__(self, stream_id, stream_route): ''' Creates a new StandaloneStreamPublisher @param stream_id The stream identifier @param stream_route The StreamRoute to publish on. ''' super(StandaloneStreamPublisher, self).__init__() self.stream_id = stream_id validate_is_instance(stream_route, StreamRoute, 'stream route is not valid') self.stream_route = stream_route
def query_range( self, source_id="", field="", from_value=None, to_value=None, order=None, limit=0, offset=0, id_only=False ): if not self.use_es: raise BadRequest("Can not make queries without ElasticSearch, enable in res/config/pyon.yml") if from_value is not None: validate_true( isinstance(from_value, int) or isinstance(from_value, float), "from_value is not a valid number" ) if to_value is not None: validate_true(isinstance(to_value, int) or isinstance(to_value, float), "to_value is not a valid number") validate_true(source_id, "source_id not specified") es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # If source is a view, catalog or collection go through it and recursively call query_range on all the results in the indexes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - iterate = self._multi( self.query_range, source, field=field, from_value=from_value, to_value=to_value, order=order, limit=limit, offset=offset, id_only=id_only, ) if iterate is not None: return iterate index = source validate_is_instance(index, ElasticSearchIndex, "%s does not refer to a valid index." % source_id) if order: validate_is_instance(order, dict, "Order is incorrect.") es.sort(**order) if limit: es.size(limit) if field == "*": field = "_all" query = ep.ElasticQuery.range(field=field, from_value=from_value, to_value=to_value) response = IndexManagementService._es_call(es.search_index_advanced, index.index_name, query) IndexManagementService._check_response(response) return self._results_from_response(response, id_only)
def find_associations_mult(self, subjects, id_only=False): ds, datastore_name = self._get_datastore() validate_is_instance(subjects, list, 'subjects is not a list of resource_ids') view_args = dict(keys=subjects, include_docs=True) results = self.query_view(self._get_viewname("association","by_bulk"), view_args) ids = list([i['value'] for i in results]) assocs = list([i['doc'] for i in results]) if id_only: return ids, assocs else: return self.read_mult(ids), assocs
def read_data_product(self, data_product_id=''): """ method docstring """ # Retrieve all metadata for a specific data product # Return data product resource data_product = self.data_product.read_one(data_product_id) validate_is_instance(data_product,DataProduct) return data_product
def add_indexes(self, catalog_id='', index_ids=None): """Add an index to the specified catalog @param index_ids list @retval success bool """ validate_is_instance(index_ids,list, "A list of index IDs was not provided.") for index_id in index_ids: self.clients.resource_registry.create_association(subject=catalog_id, predicate=PRED.hasIndex,object=index_id) return True
def ingest(self, msg, stream_id): ''' Actual ingestion mechanism ''' if msg == {}: log.error('Received empty message from stream: %s', stream_id) return # Message validation validate_is_instance(msg,Granule,'Incoming message is not compatible with this ingestion worker') granule = msg self.add_granule(stream_id, granule) self.persist_meta(stream_id, granule)
def create_parameter_context(self, name='', parameter_context=None, description='', reference_urls=None, parameter_type='', internal_name='', value_encoding='', code_report='', units='', fill_value='', display_name='', parameter_function_id='', parameter_function_map='', standard_name='', ooi_short_name='', precision='', visible=True): validate_true(name, 'Name field may not be empty') validate_is_instance(parameter_context, dict, 'parameter_context field is not dictable.') name = re.sub(r'[^a-zA-Z0-9_]', '_', name) parameter_context = self.numpy_walk(parameter_context) parameter_context['name'] = name pc_res = ParameterContextResource(name=name, parameter_context=parameter_context, description=description) pc_res.reference_urls = reference_urls or [] pc_res.parameter_type = parameter_type pc_res.internal_name = internal_name or name pc_res.value_encoding = value_encoding pc_res.code_report = code_report or '' pc_res.units = units pc_res.fill_value = fill_value pc_res.display_name = display_name pc_res.parameter_function_id = parameter_function_id pc_res.parameter_function_map = parameter_function_map pc_res.standard_name = standard_name pc_res.ooi_short_name = ooi_short_name pc_res.precision = precision or '5' pc_res.visible = visible pc_id, ver = self.clients.resource_registry.create(pc_res) if parameter_function_id: self.read_parameter_function(parameter_function_id) self.clients.resource_registry.create_association( subject=pc_id, predicate=PRED.hasParameterFunction, object=parameter_function_id) return pc_id
def find_associations_mult(self, subjects, id_only=False): ds, datastore_name = self._get_datastore() validate_is_instance(subjects, list, 'subjects is not a list of resource_ids') view_args = dict(keys=subjects, include_docs=True) results = self.query_view(self._get_viewname("association", "by_bulk"), view_args) ids = list([i['value'] for i in results]) assocs = list([i['doc'] for i in results]) if id_only: return ids, assocs else: return self.read_mult(ids), assocs
def create_parameter_function(self, name='', parameter_function=None, description=''): validate_true(name, 'Name field may not be empty') validate_is_instance(parameter_function, dict, 'parameter_function field is not dictable.') pf_res = ParameterFunctionResource( name=name, parameter_function=parameter_function, description=description) pf_id, ver = self.clients.resource_registry.create(pf_res) return pf_id
def __init__(self, process, exchange_name, callback=None): ''' Creates a new StreamSubscriber which will listen on the specified queue (exchange_name). @param process The Ion Process to attach to. @param exchange_name The subscribing queue name. @param callback The callback to execute upon receipt of a packet. ''' validate_is_instance(process, BaseService, 'No valid process was provided.') self.container = process.container self.xn = self.container.ex_manager.create_xn_queue(exchange_name) self.started = False self.callback = callback or process.call_process super(StreamSubscriber, self).__init__(from_name=self.xn, callback=self.preprocess)
def dct(vector): import numpy as np validate_is_instance(vector,np.ndarray) N = vector.size x = list() for k in xrange(N): if k == 0: x.append( float(np.sqrt(1./N) * np.sum(vector))) else: v = np.vectorize(lambda n : np.cos(np.pi * k * (2. * n + 1.) / (2. * N))) x.append(float(np.sqrt(2. / N) * np.sum(v(np.arange(0,N)) * vector))) return x
def delete_data_product(self, data_product_id=''): #Check if this data product is associated to a producer #todo: convert to impl call producer_ids = self.data_product.find_stemming_data_producer(data_product_id) for producer_id in producer_ids: log.debug("DataProductManagementService:delete_data_product unassigning data producers: %s") self.clients.data_acquisition_management.unassign_data_product(producer_id, data_product_id) #-------------------------------------------------------------------------------- # remove stream associations #-------------------------------------------------------------------------------- stream_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasStream, RT.Stream, id_only=True) for stream_id in stream_ids: self.data_product.unlink_stream(data_product_id=data_product_id, stream_id=stream_id) #-------------------------------------------------------------------------------- # remove dataset associations #-------------------------------------------------------------------------------- dataset_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.DataSet, id_only=True) for dataset_id in dataset_ids: self.data_product.unlink_data_set(data_product_id=data_product_id, data_set_id=dataset_id) # # delete the hasOutputDataProduct associations link # dp_assocs = self.clients.resource_registry.find_associations(data_product_id, PRED.hasOutputProduct) # for dp_assoc in dp_assocs: # self.clients.resource_registry.delete_association(dp_assoc) # # delete the hasInputDataProduct associations link # dp_assocs = self.clients.resource_registry.find_associations(data_product_id, PRED.hasInputProduct) # for dp_assoc in dp_assocs: # self.clients.resource_registry.delete_association(dp_assoc) #-------------------------------------------------------------------------------- # Delete the data product #-------------------------------------------------------------------------------- data_product_obj = self.read_data_product(data_product_id) validate_is_instance(data_product_obj, DataProduct) if data_product_obj.lcstate != LCS.RETIRED: self.data_product.delete_one(data_product_id) #self.clients.resource_registry.delete(data_product_id) #self.clients.resource_registry.set_lifecycle_state(data_product_id, LCS.RETIRED) return
def build_granule(record_dictionary=None, **kwargs): """ This method is a simple wrapper that knows how to produce a granule IonObject from a RecordDictionaryTool and a TaxonomyTool A granule is a unit of information which conveys part of a coverage. A granule contains a record dictionary. The record dictionary is composed of named value sequences. We want the Granule Builder to have a dictionary like behavior for building record dictionaries, using the taxonomy as a map from the name to the ordinal in the record dictionary. """ if record_dictionary is None: raise StandardError('Must provide a record dictionary') validate_is_instance(record_dictionary, RecordDictionaryTool) return record_dictionary.to_granule(**kwargs)
def read_stream_definition(self, stream_definition_id='', stream_id=''): retval = None if stream_id and self.read_stream(stream_id): sds, assocs = self.clients.resource_registry.find_objects(subject=stream_id, predicate=PRED.hasStreamDefinition,id_only=False) if sds: retval = sds[0] else: raise NotFound('No Stream Definition is associated with this Stream') stream_definition = retval or self.clients.resource_registry.read(stream_definition_id) pdicts, _ = self.clients.resource_registry.find_objects(subject=stream_definition._id, object_type=RT.ParameterDictionaryResource, id_only=True) if len(pdicts): stream_definition.parameter_dictionary = DatasetManagementService.get_parameter_dictionary(pdicts[0]).dump() validate_is_instance(stream_definition,StreamDefinition) return stream_definition
def start_replay_agent(self, replay_id=''): """ """ res, _ = self.clients.resource_registry.find_resources(restype=RT.ProcessDefinition,name=self.REPLAY_PROCESS,id_only=True) if not len(res): raise BadRequest('No replay process defined.') process_definition_id = res[0] replay = self.clients.resource_registry.read(replay_id) validate_is_instance(replay,Replay) config = replay.config pid = replay.process_id self.clients.process_dispatcher.schedule_process(process_definition_id=process_definition_id, process_id=pid, configuration=config)
def build_granule(record_dictionary=None, **kwargs): """ This method is a simple wrapper that knows how to produce a granule IonObject from a RecordDictionaryTool and a TaxonomyTool A granule is a unit of information which conveys part of a coverage. A granule contains a record dictionary. The record dictionary is composed of named value sequences. We want the Granule Builder to have a dictionary like behavior for building record dictionaries, using the taxonomy as a map from the name to the ordinal in the record dictionary. """ if record_dictionary is None: raise StandardError('Must provide a record dictionary') validate_is_instance(record_dictionary,RecordDictionaryTool) return record_dictionary.to_granule(**kwargs)
def __init__(self, stream_id, stream_route): ''' Creates a new StandaloneStreamPublisher @param stream_id The stream identifier @param stream_route The StreamRoute to publish on. ''' super(StandaloneStreamPublisher, self).__init__() from pyon.container.cc import Container self.stream_id = stream_id validate_is_instance(stream_route, StreamRoute, 'stream route is not valid') self.stream_route = stream_route self.xp = Container.instance.ex_manager.create_xp(stream_route.exchange_point) self.xp_route = self.xp.create_route(stream_route.routing_key)
def query_geo_distance( self, source_id="", field="", origin=None, distance="", units="mi", order=None, limit=0, offset=0, id_only=False ): validate_true(isinstance(origin, (tuple, list)), "Origin is not a list or tuple.") validate_true(len(origin) == 2, "Origin is not of the right size: (2)") if not self.use_es: raise BadRequest("Can not make queries without ElasticSearch, enable in res/config/pyon.yml") es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) iterate = self._multi(self.query_geo_distance, source=source, field=field, origin=origin, distance=distance) if iterate is not None: return iterate index = source validate_is_instance(index, ElasticSearchIndex, "%s does not refer to a valid index." % index) sorts = ep.ElasticSort() if order is not None and isinstance(order, dict): sort_field = order.keys()[0] value = order[sort_field] sorts.sort(sort_field, value) es.sorted(sorts) if limit: es.size(limit) if offset: es.from_offset(offset) if field == "*": field = "_all" sorts.geo_distance(field, origin, units) es.sorted(sorts) filter = ep.ElasticFilter.geo_distance(field, origin, "%s%s" % (distance, units)) es.filtered(filter) query = ep.ElasticQuery.match_all() response = IndexManagementService._es_call(es.search_index_advanced, index.index_name, query) IndexManagementService._check_response(response) return self._results_from_response(response, id_only)
def find_associations_mult(self, subjects, id_only=False): """ Returns a list of associations for a given list of subjects """ ds, datastore_name = self._get_datastore() validate_is_instance(subjects, list, 'subjects is not a list of resource_ids') view_args = dict(keys=subjects, include_docs=True) results = self.query_view(self._get_viewname("association","by_bulk"), view_args) ids = [i['value'] for i in results] assocs = [i['doc'] for i in results] self._count(find_assocs_mult_call=1, find_assocs_mult_obj=len(ids)) if id_only: return ids, assocs else: return self.read_mult(ids), assocs
def query_geo_distance(self, source_id='', field='', origin=None, distance='', units='mi',order=None, limit=0, offset=0, id_only=False): validate_true(isinstance(origin,(tuple,list)) , 'Origin is not a list or tuple.') validate_true(len(origin)==2, 'Origin is not of the right size: (2)') if not self.use_es: raise BadRequest('Can not make queries without ElasticSearch, enable in res/config/pyon.yml') es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) iterate = self._multi(self.query_geo_distance, source=source, field=field, origin=origin, distance=distance) if iterate is not None: return iterate index = source validate_is_instance(index,ElasticSearchIndex, '%s does not refer to a valid index.' % index) sorts = ep.ElasticSort() if order is not None and isinstance(order,dict): sort_field = order.keys()[0] value = order[sort_field] sorts.sort(sort_field,value) es.sorted(sorts) if limit: es.size(limit) if offset: es.from_offset(offset) if field == '*': field = '_all' sorts.geo_distance(field, origin, units) es.sorted(sorts) filter = ep.ElasticFilter.geo_distance(field,origin, '%s%s' %(distance,units)) es.filtered(filter) query = ep.ElasticQuery.match_all() response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query) IndexManagementService._check_response(response) return self._results_from_response(response,id_only)
def test_validations(self): import pyon.util.arg_check as arg_check with self.assertRaises(BadRequest): arg_check.validate_true(False, 'test') with self.assertRaises(BadRequest): arg_check.validate_equal(3, 4, 'test') with self.assertRaises(BadRequest): arg_check.validate_not_equal(4, 4, 'test') with self.assertRaises(BadRequest): arg_check.validate_false(True, 'test') with self.assertRaises(BadRequest): one = list() two = list() arg_check.validate_is(one, two, 'test') with self.assertRaises(BadRequest): one = list() two = one arg_check.validate_is_not(one, two, 'test') with self.assertRaises(BadRequest): c = None arg_check.validate_is_not_none(c, 'test') with self.assertRaises(BadRequest): one = list([1, 3]) two = 2 arg_check.validate_in(two, one, 'test') with self.assertRaises(BadRequest): one = list([1, 2, 3]) two = 2 arg_check.validate_not_in(two, one, 'test') with self.assertRaises(BadRequest): one = list() arg_check.validate_is_instance(one, dict, 'test') with self.assertRaises(BadRequest): one = list() arg_check.validate_not_is_instance(one, list, 'test')
def query_geo_bbox(self, source_id='', field='', top_left=None, bottom_right=None, order=None, limit=0, offset=0, id_only=False): validate_true(isinstance(top_left, (list,tuple)), 'Top Left is not a list or a tuple') validate_true(len(top_left)==2, 'Top Left is not of the right size: (2)') validate_true(isinstance(bottom_right, (list,tuple)), 'Bottom Right is not a list or a tuple') validate_true(len(bottom_right)==2, 'Bottom Right is not of the right size: (2)') if not self.use_es: raise BadRequest('Can not make queries without ElasticSearch, enable in res/config/pyon.yml') es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) iterate = self._multi(self.query_geo_bbox, source=source, field=field, top_left=top_left, bottom_right=bottom_right, order=order, limit=limit, offset=offset, id_only=id_only) if iterate is not None: return iterate index = source validate_is_instance(index,ElasticSearchIndex, '%s does not refer to a valid index.' % index) sorts = ep.ElasticSort() if order is not None and isinstance(order,dict): sort_field = order.keys()[0] value = order[sort_field] sorts.sort(sort_field,value) es.sorted(sorts) if limit: es.size(limit) if offset: es.from_offset(offset) if field == '*': field = '_all' filter = ep.ElasticFilter.geo_bounding_box(field, top_left, bottom_right) es.filtered(filter) query = ep.ElasticQuery.match_all() response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query) IndexManagementService._check_response(response) return self._results_from_response(response,id_only)
def query_range(self, source_id='', field='', from_value=None, to_value=None, order=None, limit=0, offset=0, id_only=False): if not self.use_es: raise BadRequest('Can not make queries without ElasticSearch, enable in res/config/pyon.yml') validate_true(not from_value is None, 'from_value not specified') validate_true(isinstance(from_value,int) or isinstance(from_value,float), 'from_value is not a valid number') validate_true(not to_value is None, 'to_value not specified') validate_true(isinstance(to_value,int) or isinstance(to_value,float), 'to_value is not a valid number') validate_true(source_id, 'source_id not specified') es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # If source is a view, catalog or collection go through it and recursively call query_range on all the results in the indexes #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - iterate = self._multi(self.query_range, source, field=field, from_value=from_value, to_value=to_value, order=order, limit=limit, offset=offset, id_only=id_only) if iterate is not None: return iterate index = source validate_is_instance(index,ElasticSearchIndex,'%s does not refer to a valid index.' % source_id) if order: validate_is_instance(order,dict,'Order is incorrect.') es.sort(**order) if limit: es.size(limit) if field == '*': field = '_all' query = ep.ElasticQuery().range( field = field, from_value = from_value, to_value = to_value ) response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query) IndexManagementService._check_response(response) return self._results_from_response(response, id_only)
def on_start(self): super(DemoStreamAlertTransform,self).on_start() #------------------------------------------------------------------------------------- # Values that are passed in when the transform is launched #------------------------------------------------------------------------------------- self.instrument_variable_name = self.CFG.get_safe('process.variable_name', 'input_voltage') self.time_field_name = self.CFG.get_safe('process.time_field_name', 'preferred_timestamp') self.valid_values = self.CFG.get_safe('process.valid_values', [-200,200]) self.timer_origin = self.CFG.get_safe('process.timer_origin', 'Interval Timer') self.timer_interval = self.CFG.get_safe('process.timer_interval', 6) # Check that valid_values is a list validate_is_instance(self.valid_values, list) # Start the timer self.ssclient = SchedulerServiceProcessClient(node=self.container.node, process=self) id = self._create_interval_timer_with_end_time(timer_interval=self.timer_interval, end_time=-1)
def persist_file(self, file_data='', digest='', metadata=None): ds = self.container.datastore_manager.get_datastore( self.datastore_name, DS.DS_PROFILE.FILESYSTEM) validate_is_instance(file_data, basestring, "File or binary data must be a string.") validate_is_instance(metadata, File) if self.list_files(metadata.name + metadata.extension): raise BadRequest('%s already exists.' % metadata.name + metadata.extension) digest_ = sha224(file_data).hexdigest() if digest: validate_equal( digest, digest_, "The provided digest does not match the file's digest. Ensure you are using sha224." ) else: digest = digest_ extension = metadata.extension if '.' in metadata.name: t = metadata.name.split('.') metadata.name, metadata.extension = ('.'.join(t[:-1]), '.' + t[-1]) url = FileSystem.get_hierarchical_url(FS.CACHE, digest, extension) try: with open(url, 'w+b') as f: f.write(file_data) f.close() except Exception: log.exception('Failed to write %s', url) raise BadRequest('Could not successfully write file data') if metadata.name[0] != '/': metadata.name = '/' + metadata.name metadata.url = url metadata.digest = digest metadata.created_date = IonTime().to_string() metadata.modified_date = IonTime().to_string() metadata.size = len(file_data) doc_id, rev_id = ds.create(metadata) return doc_id
def __init__(self, process=None, stream_id='', stream_route=None, exchange_point='', routing_key=''): ''' Creates a StreamPublisher which publishes to the specified stream by default and is attached to the specified process. @param process The process which the subscriber is to be attached. @param stream_id Stream identifier for the publishing stream. @param stream_route A StreamRoute corresponding to the stream_id @param exchange_point The name of the exchange point, to be used in lieu of stream_route or stream_id @param routing_key The routing key to be used in lieu of stream_route or stream_id ''' super(StreamPublisher, self).__init__() validate_is_instance(process, BaseService, 'No valid process provided.') #-------------------------------------------------------------------------------- # The important part of publishing is the stream_route and there are three ways # to the stream route # - The Route is obtained from Pubsub Management with a stream id. # - The Route is obtained by combining exchange_point and the routing_key # but all other information is lost (credentials, etc.) # - The Route is obtained by being provided directly to __init__ #-------------------------------------------------------------------------------- self.stream_id = stream_id if stream_id: # Regardless of what's passed in for stream_route look it up, prevents mismatching pubsub_cli = PubsubManagementServiceProcessClient(process=process, node=process.container.node) self.stream_route = pubsub_cli.read_stream_route(stream_id) elif not stream_route: self.stream_route = None if exchange_point and routing_key: self.stream_route = StreamRoute(exchange_point=exchange_point, routing_key=routing_key) else: pubsub_cli = PubsubManagementServiceProcessClient(process=process, node=process.container.node) stream_id, stream_route = pubsub_cli.create_stream(process.id, exchange_point=exchange_point or 'void') self.stream_id = stream_id self.stream_route = stream_route else: self.stream_route = stream_route validate_is_instance(self.stream_route, StreamRoute, 'No valid stream route provided to publisher.') self.container = process.container self.xp = self.container.ex_manager.create_xp(self.stream_route.exchange_point) self.xp_route = self.xp.create_route(self.stream_route.routing_key)
def start_replay_agent(self, replay_id=''): """ """ res, _ = self.clients.resource_registry.find_resources( restype=RT.ProcessDefinition, name=self.REPLAY_PROCESS, id_only=True) if not len(res): raise BadRequest('No replay process defined.') process_definition_id = res[0] replay = self.clients.resource_registry.read(replay_id) validate_is_instance(replay, Replay) config = replay.config pid = replay.process_id self.clients.process_dispatcher.schedule_process( process_definition_id=process_definition_id, process_id=pid, configuration=config)
def query_time(self, source_id='', field='', from_value=None, to_value=None, order=None, limit=0, offset=0, id_only=False): if not self.use_es: raise BadRequest('Can not make queries without ElasticSearch, enable in res/config/pyon.yml') if from_value is not None: validate_is_instance(from_value,basestring,'"From" is not a valid string (%s)' % from_value) if to_value is not None: validate_is_instance(to_value,basestring,'"To" is not a valid string') es = ep.ElasticSearch(host=self.elasticsearch_host, port=self.elasticsearch_port) source = self.clients.resource_registry.read(source_id) #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # If source is a view, catalog or collection go through it and recursively call query_time on all the results in the indexes #- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - iterate = self._multi(self.query_time, source, field=field, from_value=from_value, to_value=to_value, order=order, limit=limit, offset=offset, id_only=id_only) if iterate is not None: return iterate index = source validate_is_instance(index,ElasticSearchIndex,'%s does not refer to a valid index.' % source_id) if order: validate_is_instance(order,dict,'Order is incorrect.') es.sort(**order) if limit: es.size(limit) if field == '*': field = '_all' if from_value is not None: from_value = calendar.timegm(dateutil.parser.parse(from_value).timetuple()) * 1000 if to_value is not None: to_value = calendar.timegm(dateutil.parser.parse(to_value).timetuple()) * 1000 query = ep.ElasticQuery.range( field = field, from_value = from_value, to_value = to_value ) response = IndexManagementService._es_call(es.search_index_advanced,index.index_name,query) IndexManagementService._check_response(response) return self._results_from_response(response, id_only)
def create_ingestion_configuration(self, name='', exchange_point_id='', queues=None): validate_is_instance(queues, list, 'The queues parameter is not a proper list.') validate_true( len(queues) > 0, 'Ingestion needs at least one queue to ingest from') for queue in queues: validate_is_instance(queue, IngestionQueue) ingestion_config = IngestionConfiguration() ingestion_config.name = name ingestion_config.exchange_point = exchange_point_id ingestion_config.queues = queues config_id, rev = self.clients.resource_registry.create( ingestion_config) return config_id