def on_start(self): #pragma no cover #-------------------------------------------------------------------------------- # Explicit on_start #-------------------------------------------------------------------------------- # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created # We want explicit management of the thread and subscriber object for ingestion TransformStreamProcess.on_start(self) self.queue_name = self.CFG.get_safe('process.queue_name',self.id) self.subscriber = StreamSubscriber(process=self, exchange_name=self.queue_name, callback=self.receive_callback) self.thread_lock = RLock() #-------------------------------------------------------------------------------- # Normal on_start after this point #-------------------------------------------------------------------------------- BaseIngestionWorker.on_start(self) self._rpc_server = self.container.proc_manager._create_listening_endpoint(from_name=self.id, process=self) self.add_endpoint(self._rpc_server) self.event_publisher = EventPublisher(OT.DatasetModified) self.stored_value_manager = StoredValueManager(self.container) self.lookup_docs = self.CFG.get_safe('process.lookup_docs',[]) self.input_product = self.CFG.get_safe('process.input_product','') self.new_lookups = Queue() self.lookup_monitor = EventSubscriber(event_type=OT.ExternalReferencesUpdatedEvent, callback=self._add_lookups, auto_delete=True) self.add_endpoint(self.lookup_monitor) self.connection_id = '' self.connection_index = None self.start_listener()
def process_execution(self, temp_vector, qc_params, bad_times): interval_key = uuid4().hex data_product_id = self.make_large_dataset(temp_vector) async_queue = Queue() def cb(event, *args, **kwargs): if '_'.join(event.qc_parameter.split('_')[1:]) not in qc_params: # I don't care about return times = event.temporal_values self.assertEquals(len(times), bad_times) async_queue.put(1) es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=data_product_id, callback=cb, auto_delete=True) es.start() self.addCleanup(es.stop) config = DotDict() config.process.interval_key = interval_key config.process.qc_params = qc_params self.sync_launch(config) # So now the process is started, time to throw an event at it ep = EventPublisher(event_type='TimerEvent') ep.publish_event(origin=interval_key) try: async_queue.get(timeout=120) except Empty: raise AssertionError('QC was not flagged in time')
def start(self): self.container.stats_mgr = ContainerStatsManager(self.container) self.container.stats_mgr.start() ## create queue listener and publisher self.sender = EventPublisher(event_type="ContainerManagementResult") self.receiver = EventSubscriber(event_type="ContainerManagementRequest", callback=self._receive_event) with self.lock: self.running = True self.receiver.start() log.debug('Container ready for container management requests')
def start(self): if self.events_enabled: # init change event publisher self.event_pub = EventPublisher() # Register to receive directory changes # self.event_sub = EventSubscriber(event_type="ContainerConfigModifiedEvent", # origin="Directory", # callback=self.receive_directory_change_event) # Create directory root entry (for current org) if not existing self.register("/", "DIR", sys_name=bootstrap.get_sys_name(), create_only=True)
def process(self, dataset_id, start_time=0, end_time=0): if not dataset_id: raise BadRequest('No dataset id specified.') now = time.time() start_time = start_time or (now - (3600 * (self.run_interval + 1)) ) # Every N hours with 1 of overlap end_time = end_time or now qc_params = [i for i in self.qc_params if i in self.qc_suffixes ] or self.qc_suffixes self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent) log.debug('Iterating over the data blocks') for st, et in self.chop(int(start_time), int(end_time)): log.debug('Chopping %s:%s', st, et) log.debug( "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')", dataset_id, st, et) try: granule = self.data_retriever.retrieve(dataset_id, query={ 'start_time': st, 'end_time': et }) except BadRequest: data_products, _ = self.container.resource_registry.find_subjects( object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct) for data_product in data_products: log.exception('Failed to perform QC Post Processing on %s', data_product.name) log.error('Calculated Start Time: %s', st) log.error('Calculated End Time: %s', et) raise log.debug('Retrieved Data') rdt = RecordDictionaryTool.load_from_granule(granule) qc_fields = [ i for i in rdt.fields if any([i.endswith(j) for j in qc_params]) ] log.debug('QC Fields: %s', qc_fields) for field in qc_fields: val = rdt[field] if val is None: continue if not np.all(val): log.debug('Found QC Alerts') indexes = np.where(val == 0) timestamps = rdt[rdt.temporal_parameter][indexes[0]] self.flag_qc_parameter(dataset_id, field, timestamps.tolist(), {})
def test_base_subscriber_as_catchall(self): ar = event.AsyncResult() gq = queue.Queue() self.count = 0 def cb(*args, **kwargs): self.count += 1 gq.put(args[0]) if self.count == 2: ar.set() sub = EventSubscriber(callback=cb) pub1 = EventPublisher(event_type="ResourceEvent") pub2 = EventPublisher(event_type="ContainerLifecycleEvent") self._listen(sub) pub1.publish_event(origin="some", description="1") pub2.publish_event(origin="other", description="2") ar.get(timeout=5) res = [] for x in xrange(self.count): res.append(gq.get(timeout=5)) self.assertEquals(len(res), 2) self.assertEquals(res[0].description, "1") self.assertEquals(res[1].description, "2")
def process(self, dataset_id, start_time=0, end_time=0): if not dataset_id: raise BadRequest('No dataset id specified.') now = time.time() start_time = start_time or (now - (3600 * (self.run_interval + 1)) ) # Every N hours with 1 of overlap end_time = end_time or now qc_params = [i for i in self.qc_params if i in self.qc_suffixes ] or self.qc_suffixes self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent) log.debug('Iterating over the data blocks') for st, et in self.chop(int(start_time), int(end_time)): log.debug('Chopping %s:%s', st, et) log.debug( "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')", dataset_id, st, et) granule = self.data_retriever.retrieve(dataset_id, query={ 'start_time': st, 'end_time': et }) log.debug('Retrieved Data') rdt = RecordDictionaryTool.load_from_granule(granule) qc_fields = [ i for i in rdt.fields if any([i.endswith(j) for j in qc_params]) ] log.debug('QC Fields: %s', qc_fields) for field in qc_fields: val = rdt[field] if val is None: continue if not np.all(val): log.debug('Found QC Alerts') indexes = np.where(val == 0) timestamps = rdt[rdt.temporal_parameter][indexes[0]] self.flag_qc_parameter(dataset_id, field, timestamps.tolist(), {})
def test_pub_on_different_origins(self): ar = event.AsyncResult() gq = queue.Queue() self.count = 0 def cb(*args, **kwargs): self.count += 1 gq.put(args[0]) if self.count == 3: ar.set() sub = EventSubscriber(event_type="ResourceEvent", callback=cb) pub = EventPublisher(event_type="ResourceEvent") self._listen(sub) pub.publish_event(origin="one", description="1") pub.publish_event(origin="two", description="2") pub.publish_event(origin="three", description="3") ar.get(timeout=5) res = [] for x in xrange(self.count): res.append(gq.get(timeout=5)) self.assertEquals(len(res), 3) self.assertEquals(res[0].description, "1") self.assertEquals(res[1].description, "2") self.assertEquals(res[2].description, "3")
def fail(x): ''' The goal behind this function is to publish an event so that threads can synchronize with it to verify that it was run, regardless of context ''' event_publisher = EventPublisher(OT.GranuleIngestionErrorEvent) try: event_publisher.publish_event(error_msg='failure') raise StandardError('Something you tried to do failed') finally: event_publisher.close()
def test_subscriber_listening_for_specific_origin(self): ar = event.AsyncResult() self.count = 0 def cb(*args, **kwargs): self.count += 1 ar.set(args[0]) sub = EventSubscriber(event_type="ResourceEvent", origin="specific", callback=cb) pub = EventPublisher(event_type="ResourceEvent") self._listen(sub) pub.publish_event(origin="notspecific", description="1") pub.publish_event(origin="notspecific", description="2") pub.publish_event(origin="specific", description="3") pub.publish_event(origin="notspecific", description="4") evmsg = ar.get(timeout=5) self.assertEquals(self.count, 1) self.assertEquals(evmsg.description, "3")
def start(self): self.container.event_pub = EventPublisher()
class QCPostProcessing(SimpleProcess): ''' QC Post Processing Process This process provides the capability to ION clients and operators to evaluate the automated quality control flags on various data products. This process should be run periodically with overlapping spans of data to ensure complete dataset QC verification. This parameters that this process accepts as configurations are: - dataset_id: The dataset identifier, required. - start_time: Unix timestamp, defaults to 24 hours in the past - end_time: Unix timestamp, defaults to current time - qc_params: a list of qc functions to evaluate, currently supported functions are: ['glblrng_qc', 'spketst_qc', 'stuckvl_qc'], defaults to all ''' qc_suffixes = ['glblrng_qc', 'spketst_qc', 'stuckvl_qc'] def on_start(self): SimpleProcess.on_start(self) self.data_retriever = DataRetrieverServiceProcessClient(process=self) self.interval_key = self.CFG.get_safe('process.interval_key', None) self.qc_params = self.CFG.get_safe('process.qc_params', []) validate_is_not_none( self.interval_key, 'An interval key is necessary to paunch this process') self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent, origin=self.interval_key, callback=self._event_callback, auto_delete=True) self.add_endpoint(self.event_subscriber) self.resource_registry = self.container.resource_registry self.run_interval = self.CFG.get_safe( 'service.qc_processing.run_interval', 24) def _event_callback(self, *args, **kwargs): log.info('QC Post Processing Triggered') dataset_ids, _ = self.resource_registry.find_resources( restype=RT.Dataset, id_only=True) for dataset_id in dataset_ids: log.info('QC Post Processing for dataset %s', dataset_id) try: self.process(dataset_id) except BadRequest as e: if 'Problems reading from the coverage' in e.message: log.error('Failed to read from dataset') def process(self, dataset_id, start_time=0, end_time=0): if not dataset_id: raise BadRequest('No dataset id specified.') now = time.time() start_time = start_time or (now - (3600 * (self.run_interval + 1)) ) # Every N hours with 1 of overlap end_time = end_time or now qc_params = [i for i in self.qc_params if i in self.qc_suffixes ] or self.qc_suffixes self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent) log.debug('Iterating over the data blocks') for st, et in self.chop(int(start_time), int(end_time)): log.debug('Chopping %s:%s', st, et) log.debug( "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')", dataset_id, st, et) granule = self.data_retriever.retrieve(dataset_id, query={ 'start_time': st, 'end_time': et }) log.debug('Retrieved Data') rdt = RecordDictionaryTool.load_from_granule(granule) qc_fields = [ i for i in rdt.fields if any([i.endswith(j) for j in qc_params]) ] log.debug('QC Fields: %s', qc_fields) for field in qc_fields: val = rdt[field] if val is None: continue if not np.all(val): log.debug('Found QC Alerts') indexes = np.where(val == 0) timestamps = rdt[rdt.temporal_parameter][indexes[0]] self.flag_qc_parameter(dataset_id, field, timestamps.tolist(), {}) def flag_qc_parameter(self, dataset_id, parameter, temporal_values, configuration): log.info('Flagging QC for %s', parameter) data_product_ids, _ = self.resource_registry.find_subjects( object=dataset_id, subject_type=RT.DataProduct, predicate=PRED.hasDataset, id_only=True) for data_product_id in data_product_ids: self.qc_publisher.publish_event(origin=data_product_id, qc_parameter=parameter, temporal_values=temporal_values, configuration=configuration) @classmethod def chop(cls, start_time, end_time): while start_time < end_time: yield (start_time, min(start_time + 3600, end_time)) start_time = min(start_time + 3600, end_time) return
class ContainerManager(object): def __init__(self, container, handlers=DEFAULT_HANDLERS): self.container = container self.running = False # make sure start() completes before an event is handled, # and any event is either handled before stop() begins, # or the handler begins after stop() completes and the event is dropped self.lock = Lock() self.handlers = handlers[:] def start(self): self.container.stats_mgr = ContainerStatsManager(self.container) self.container.stats_mgr.start() ## create queue listener and publisher self.sender = EventPublisher(event_type="ContainerManagementResult") self.receiver = EventSubscriber(event_type="ContainerManagementRequest", callback=self._receive_event) with self.lock: self.running = True self.receiver.start() log.debug('Container ready for container management requests') def stop(self): log.debug('container management stopping') with self.lock: self.receiver.stop() self.sender.close() self.running = False log.debug('container management stopped') self.container.stats_mgr.stop() def add_handler(self, handler): self.handlers.append(handler) def _get_handlers(self, action): out = [] for handler in self.handlers: if handler.can_handle_request(action): out.append(handler) return out def _receive_event(self, event, headers): with self.lock: if not isinstance(event, ContainerManagementRequest): log.trace('ignoring wrong type event: %r', event) return if not self.running: log.warn('ignoring admin message received after shutdown: %s', event.action) return predicate = ContainerSelector.from_object(event.predicate) if predicate.should_handle(self.container): log.trace('handling admin message: %s', event.action) self._perform_action(event.action) else: log.trace('ignoring admin action: %s', event.action) if SEND_RESULT_IF_NOT_SELECTED: self.sender.publish_event(origin=self.container.id, action=event.action, outcome='not selected') log.debug('received action: %s, outcome: not selected', event.action) def _perform_action(self, action): handlers = self._get_handlers(action) if not handlers: log.info('action accepted but no handlers found: %s', action) result = 'unhandled' self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result)) log.debug('received action: %s, outcome: %s', action, result) else: for handler in handlers: try: result = handler.handle_request(action) or "completed" except Exception,e: log.error("handler %r failed to perform action: %s", handler, action, exc_info=True) result = e self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result)) log.debug('performed action: %s, outcome: %s', action, result)
class ScienceGranuleIngestionWorker(TransformStreamListener, BaseIngestionWorker): CACHE_LIMIT = CFG.get_safe('container.ingestion_cache', 5) def __init__(self, *args, **kwargs): TransformStreamListener.__init__(self, *args, **kwargs) BaseIngestionWorker.__init__(self, *args, **kwargs) #-------------------------------------------------------------------------------- # Ingestion Cache # - Datasets # - Coverage instances #-------------------------------------------------------------------------------- self._datasets = collections.OrderedDict() self._coverages = collections.OrderedDict() self._bad_coverages = {} self.time_stats = Accumulator(format='%3f') # unique ID to identify this worker in log msgs self._id = uuid.uuid1() def on_start(self): #pragma no cover #-------------------------------------------------------------------------------- # Explicit on_start #-------------------------------------------------------------------------------- # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created # We want explicit management of the thread and subscriber object for ingestion TransformStreamProcess.on_start(self) self.queue_name = self.CFG.get_safe('process.queue_name', self.id) self.subscriber = StreamSubscriber(process=self, exchange_name=self.queue_name, callback=self.receive_callback) self.thread_lock = RLock() #-------------------------------------------------------------------------------- # Normal on_start after this point #-------------------------------------------------------------------------------- BaseIngestionWorker.on_start(self) self._rpc_server = self.container.proc_manager._create_listening_endpoint( from_name=self.id, process=self) self.add_endpoint(self._rpc_server) self.event_publisher = EventPublisher(OT.DatasetModified) self.stored_value_manager = StoredValueManager(self.container) self.lookup_docs = self.CFG.get_safe('process.lookup_docs', []) self.input_product = self.CFG.get_safe('process.input_product', '') self.new_lookups = Queue() self.lookup_monitor = EventSubscriber( event_type=OT.ExternalReferencesUpdatedEvent, callback=self._add_lookups, auto_delete=True) self.add_endpoint(self.lookup_monitor) self.connection_id = '' self.connection_index = None self.start_listener() def on_quit(self): #pragma no cover self.event_publisher.close() if self.subscriber_thread: self.stop_listener() for stream, coverage in self._coverages.iteritems(): try: coverage.close(timeout=5) except: log.exception('Problems closing the coverage') self._coverages.clear() TransformStreamListener.on_quit(self) BaseIngestionWorker.on_quit(self) def start_listener(self): # We use a lock here to prevent possible race conditions from starting multiple listeners and coverage clobbering with self.thread_lock: self.subscriber_thread = self._process.thread_manager.spawn( self.subscriber.listen, thread_name='%s-subscriber' % self.id) def stop_listener(self): # Avoid race conditions with coverage operations (Don't start a listener at the same time as closing one) with self.thread_lock: self.subscriber.close() self.subscriber_thread.join(timeout=10) for stream, coverage in self._coverages.iteritems(): try: coverage.close(timeout=5) except: log.exception('Problems closing the coverage') self._coverages.clear() self.subscriber_thread = None def pause(self): if self.subscriber_thread is not None: self.stop_listener() def resume(self): if self.subscriber_thread is None: self.start_listener() def _add_lookups(self, event, *args, **kwargs): if event.origin == self.input_product: if isinstance(event.reference_keys, list): self.new_lookups.put(event.reference_keys) def _new_dataset(self, stream_id): ''' Adds a new dataset to the internal cache of the ingestion worker ''' rr_client = self.container.resource_registry datasets, _ = rr_client.find_subjects(subject_type=RT.Dataset, predicate=PRED.hasStream, object=stream_id, id_only=True) if datasets: return datasets[0] return None def _get_data_products(self, dataset_id): rr_client = self.container.resource_registry data_products, _ = rr_client.find_subjects(object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct, id_only=False) return data_products #-------------------------------------------------------------------------------- # Metadata Handlers #-------------------------------------------------------------------------------- def initialize_metadata(self, dataset_id, rdt): ''' Initializes a metadata document in the object store. The document contains information about the bounds and extents of the dataset as well other metadata to improve performance. ''' object_store = self.container.object_store key = dataset_id bounds = {} extents = {} last_values = {} rough_size = 0 for k, v in rdt.iteritems(): v = v[:].flatten() if v.dtype.char not in ('S', 'O', 'U', 'V'): bounds[k] = (np.min(v), np.max(v)) last_values[k] = v[-1] extents[k] = len(rdt) rough_size += len(rdt) * 4 doc = { 'bounds': bounds, 'extents': extents, 'last_values': last_values, 'size': rough_size } doc = numpy_walk(doc) object_store.create_doc(doc, object_id=key) return def update_metadata(self, dataset_id, rdt): ''' Updates the metada document with the latest information available ''' self.update_data_product_metadata(dataset_id, rdt) # Grab the document object_store = self.container.object_store key = dataset_id try: doc = object_store.read_doc(key) except NotFound: return self.initialize_metadata(dataset_id, rdt) # These are the fields we're interested in bounds = doc['bounds'] extents = doc['extents'] last_values = doc['last_values'] rough_size = doc['size'] for k, v in rdt.iteritems(): if k not in bounds: continue v = v[:].flatten() # Get the numpy representation (dense array). if v.dtype.char not in ('S', 'O', 'U', 'V'): l_min = np.min(v) l_max = np.max(v) o_min, o_max = bounds[k] bounds[k] = (min(l_min, o_min), max(l_max, o_max)) last_values[k] = v[-1] # Update the bounds # Increase the extents extents[k] = extents[k] + len(rdt) # How about the last value? rough_size += len(rdt) * 4 doc['size'] = rough_size # Sanitize it doc = numpy_walk(doc) object_store.update_doc(doc) def update_data_product_metadata(self, dataset_id, rdt): data_products = self._get_data_products(dataset_id) for data_product in data_products: self.update_time(data_product, rdt[rdt.temporal_parameter][:]) self.update_geo(data_product, rdt) try: self.container.resource_registry.update(data_product) except: # TODO: figure out WHICH Exception gets raised here when the bounds are off log.error("Problem updating the data product metadata", exc_info=True) # Carry on :( def update_time(self, data_product, t): ''' Sets the nominal_datetime for a data product correctly Accounts for things like NTP and out of order data ''' t0, t1 = self.get_datetime_bounds(data_product) #TODO: Account for non NTP-based timestamps min_t = np.min(t) - 2208988800 max_t = np.max(t) - 2208988800 if t0: t0 = min(t0, min_t) else: t0 = min_t if t1: t1 = max(t1, max_t) else: t1 = max_t if t0 > t1: log.error("This should never happen but t0 > t1") data_product.nominal_datetime.start_datetime = float(t0) data_product.nominal_datetime.end_datetime = float(t1) def get_datetime(self, nominal_datetime): ''' Returns a floating point value for the datetime or None if it's an empty string ''' t = None # So normally this is a string if isinstance(nominal_datetime, (float, int)): t = nominal_datetime # simple enough elif isinstance(nominal_datetime, basestring): if nominal_datetime: # not an empty string # Try to convert it to a float try: t = float(nominal_datetime) except ValueError: pass return t def get_datetime_bounds(self, data_product): '''Returns the min and max for the bounds in the nominal_datetime attr ''' t0 = self.get_datetime(data_product.nominal_datetime.start_datetime) t1 = self.get_datetime(data_product.nominal_datetime.end_datetime) return (t0, t1) def update_geo(self, data_product, rdt): ''' Finds the maximum bounding box ''' lat = None lon = None for p in rdt: if rdt._rd[p] is None: continue # TODO: Not an all encompassing list of acceptable names for lat and lon if p.lower() in ('lat', 'latitude', 'y_axis'): lat = np.asscalar(rdt[p][-1]) elif p.lower() in ('lon', 'longitude', 'x_axis'): lon = np.asscalar(rdt[p][-1]) if lat and lon: break if lat and lon: data_product.geospatial_bounds.geospatial_latitude_limit_north = lat data_product.geospatial_bounds.geospatial_latitude_limit_south = lat data_product.geospatial_bounds.geospatial_longitude_limit_east = lon data_product.geospatial_bounds.geospatial_longitude_limit_west = lon #-------------------------------------------------------------------------------- # Cache managemnt #-------------------------------------------------------------------------------- def get_dataset(self, stream_id): ''' Memoization (LRU) of _new_dataset ''' try: result = self._datasets.pop(stream_id) except KeyError: result = self._new_dataset(stream_id) if result is None: return None if len(self._datasets) >= self.CACHE_LIMIT: self._datasets.popitem(0) self._datasets[stream_id] = result return result def get_coverage(self, stream_id): ''' Memoization (LRU) of _get_coverage ''' try: result = self._coverages.pop(stream_id) except KeyError: dataset_id = self.get_dataset(stream_id) if dataset_id is None: return None result = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a') if result is None: return None if len(self._coverages) >= self.CACHE_LIMIT: k, coverage = self._coverages.popitem(0) coverage.close(timeout=5) self._coverages[stream_id] = result return result #-------------------------------------------------------------------------------- # Granule Parsing and Handling #-------------------------------------------------------------------------------- @handle_stream_exception() def recv_packet(self, msg, stream_route, stream_id): ''' The consumer callback to parse and manage the granule. The message is ACK'd once the function returns ''' log.trace('received granule for stream %s', stream_id) if msg == {}: log.error('Received empty message from stream: %s', stream_id) return # Message validation if not isinstance(msg, Granule): log.error('Ingestion received a message that is not a granule: %s', msg) return rdt = RecordDictionaryTool.load_from_granule(msg) if rdt is None: log.error('Invalid granule (no RDT) for stream %s', stream_id) return if not len(rdt): log.debug('Empty granule for stream %s', stream_id) return self.persist_or_timeout(stream_id, rdt) def persist_or_timeout(self, stream_id, rdt): ''' A loop that tries to parse and store a granule for up to five minutes, and waits an increasing amount of time each iteration. ''' done = False timeout = 2 start = time.time() while not done: if self.parse_granule(stream_id, rdt, start, done): return # We're all done, everything worked if (time.time() - start) > MAX_RETRY_TIME: # After a while, give up dataset_id = self.get_dataset(stream_id) log.error( "We're giving up, the coverage needs to be inspected %s", DatasetManagementService._get_coverage_path(dataset_id)) raise if stream_id in self._coverages: log.info('Popping coverage for stream %s', stream_id) self._coverages.pop(stream_id) gevent.sleep(timeout) timeout = min(60 * 5, timeout * 2) def parse_granule(self, stream_id, rdt, start, done): try: self.add_granule(stream_id, rdt) return True except Exception as e: log.exception('An issue with coverage, retrying after a bit') return False return True # never reaches here, Added for clarity def dataset_changed(self, dataset_id, window): self.event_publisher.publish_event(origin=dataset_id, author=self.id, window=window) def build_data_dict(self, rdt): np_dict = {} time_array = rdt[rdt.temporal_parameter] if time_array is None: raise ValueError("A granule needs a time array") for k, v in rdt.iteritems(): # Sparse values are different and aren't constructed using NumpyParameterData if isinstance(rdt.param_type(k), SparseConstantType): value = v[0] if hasattr(value, 'dtype'): value = np.asscalar(value) time_start = np.asscalar(time_array[0]) np_dict[k] = ConstantOverTime(k, value, time_start=time_start, time_end=None) # From now on continue elif isinstance(rdt.param_type(k), CategoryType): log.warning("Category types temporarily unsupported") continue elif isinstance(rdt.param_type(k), RecordType): value = v else: value = v try: if k == 'temp_sample': print repr(value) np_dict[k] = NumpyParameterData(k, value, time_array) except: raise return np_dict def insert_values(self, coverage, rdt, stream_id): np_dict = self.build_data_dict(rdt) if 'ingestion_timestamp' in coverage.list_parameters(): timestamps = np.array([(time.time() + 2208988800) for i in rdt[rdt.temporal_parameter]]) np_dict['ingestion_timestamp'] = NumpyParameterData( 'ingestion_timestamp', timestamps, rdt[rdt.temporal_parameter]) # If it's sparse only if self.sparse_only(rdt): del np_dict[rdt.temporal_parameter] try: coverage.set_parameter_values(np_dict) except IOError as e: log.error("Couldn't insert values for coverage: %s", coverage.persistence_dir, exc_info=True) try: coverage.close() finally: self._bad_coverages[stream_id] = 1 raise CorruptionError(e.message) except KeyError as e: if 'has not been initialized' in e.message: coverage.refresh() raise except Exception as e: print repr(rdt) raise def add_granule(self, stream_id, rdt): ''' Appends the granule's data to the coverage and persists it. ''' if stream_id in self._bad_coverages: log.info( 'Message attempting to be inserted into bad coverage: %s', DatasetManagementService._get_coverage_path( self.get_dataset(stream_id))) #-------------------------------------------------------------------------------- # Coverage determiniation and appending #-------------------------------------------------------------------------------- dataset_id = self.get_dataset(stream_id) if not dataset_id: log.error('No dataset could be determined on this stream: %s', stream_id) return try: coverage = self.get_coverage(stream_id) except IOError as e: log.error( "Couldn't open coverage: %s", DatasetManagementService._get_coverage_path( self.get_dataset(stream_id))) raise CorruptionError(e.message) if not coverage: log.error( 'Could not persist coverage from granule, coverage is None') return #-------------------------------------------------------------------------------- # Actual persistence #-------------------------------------------------------------------------------- if rdt[rdt.temporal_parameter] is None: log.warning("Empty granule received") return # Parse the RDT and set hte values in the coverage self.insert_values(coverage, rdt, stream_id) # Force the data to be flushed DatasetManagementService._save_coverage(coverage) self.update_metadata(dataset_id, rdt) try: window = rdt[rdt.temporal_parameter][[0, -1]] window = window.tolist() except (ValueError, IndexError): window = None self.dataset_changed(dataset_id, window) def sparse_only(self, rdt): ''' A sparse only rdt will have only a time array AND sparse values, no other data ''' if rdt[rdt.temporal_parameter] is None: return False # No time, so it's just empty at_least_one = False for key in rdt.iterkeys(): # Skip time, that needs to be there if key == rdt.temporal_parameter: continue if not isinstance(rdt.param_type(key), SparseConstantType): return False else: at_least_one = True return at_least_one
def test_pub_on_different_subtypes(self): ar = event.AsyncResult() gq = queue.Queue() self.count = 0 def cb(event, *args, **kwargs): self.count += 1 gq.put(event) if event.description == "end": ar.set() sub = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1", callback=cb) sub.start() pub1 = EventPublisher(event_type="ResourceModifiedEvent") pub2 = EventPublisher(event_type="ContainerLifecycleEvent") pub1.publish_event(origin="two", sub_type="st2", description="2") pub2.publish_event(origin="three", sub_type="st1", description="3") pub1.publish_event(origin="one", sub_type="st1", description="1") pub1.publish_event(origin="four", sub_type="st1", description="end") ar.get(timeout=5) sub.stop() res = [] for x in xrange(self.count): res.append(gq.get(timeout=5)) self.assertEquals(len(res), 2) self.assertEquals(res[0].description, "1")
class ScienceGranuleIngestionWorker(TransformStreamListener, BaseIngestionWorker): CACHE_LIMIT = CFG.get_safe('container.ingestion_cache', 5) def __init__(self, *args, **kwargs): TransformStreamListener.__init__(self, *args, **kwargs) BaseIngestionWorker.__init__(self, *args, **kwargs) #-------------------------------------------------------------------------------- # Ingestion Cache # - Datasets # - Coverage instances #-------------------------------------------------------------------------------- self._datasets = collections.OrderedDict() self._coverages = collections.OrderedDict() self._bad_coverages = {} self.time_stats = Accumulator(format='%3f') # unique ID to identify this worker in log msgs self._id = uuid.uuid1() def on_start(self): #pragma no cover #-------------------------------------------------------------------------------- # Explicit on_start #-------------------------------------------------------------------------------- # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created # We want explicit management of the thread and subscriber object for ingestion TransformStreamProcess.on_start(self) self.queue_name = self.CFG.get_safe('process.queue_name', self.id) self.subscriber = StreamSubscriber(process=self, exchange_name=self.queue_name, callback=self.receive_callback) self.thread_lock = RLock() #-------------------------------------------------------------------------------- # Normal on_start after this point #-------------------------------------------------------------------------------- BaseIngestionWorker.on_start(self) self._rpc_server = self.container.proc_manager._create_listening_endpoint( from_name=self.id, process=self) self.add_endpoint(self._rpc_server) self.event_publisher = EventPublisher(OT.DatasetModified) self.stored_value_manager = StoredValueManager(self.container) self.lookup_docs = self.CFG.get_safe('process.lookup_docs', []) self.input_product = self.CFG.get_safe('process.input_product', '') self.qc_enabled = self.CFG.get_safe('process.qc_enabled', True) self.ignore_gaps = self.CFG.get_safe('service.ingestion.ignore_gaps', False) self.new_lookups = Queue() self.lookup_monitor = EventSubscriber( event_type=OT.ExternalReferencesUpdatedEvent, callback=self._add_lookups, auto_delete=True) self.add_endpoint(self.lookup_monitor) self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent) self.connection_id = '' self.connection_index = None self.start_listener() def on_quit(self): #pragma no cover self.event_publisher.close() self.qc_publisher.close() if self.subscriber_thread: self.stop_listener() for stream, coverage in self._coverages.iteritems(): try: coverage.close(timeout=5) except: log.exception('Problems closing the coverage') self._coverages.clear() TransformStreamListener.on_quit(self) BaseIngestionWorker.on_quit(self) def start_listener(self): # We use a lock here to prevent possible race conditions from starting multiple listeners and coverage clobbering with self.thread_lock: self.subscriber_thread = self._process.thread_manager.spawn( self.subscriber.listen, thread_name='%s-subscriber' % self.id) def stop_listener(self): # Avoid race conditions with coverage operations (Don't start a listener at the same time as closing one) with self.thread_lock: self.subscriber.close() self.subscriber_thread.join(timeout=10) for stream, coverage in self._coverages.iteritems(): try: coverage.close(timeout=5) except: log.exception('Problems closing the coverage') self._coverages.clear() self.subscriber_thread = None def pause(self): if self.subscriber_thread is not None: self.stop_listener() def resume(self): if self.subscriber_thread is None: self.start_listener() def _add_lookups(self, event, *args, **kwargs): if event.origin == self.input_product: if isinstance(event.reference_keys, list): self.new_lookups.put(event.reference_keys) def _new_dataset(self, stream_id): ''' Adds a new dataset to the internal cache of the ingestion worker ''' rr_client = ResourceRegistryServiceClient() datasets, _ = rr_client.find_subjects(subject_type=RT.Dataset, predicate=PRED.hasStream, object=stream_id, id_only=True) if datasets: return datasets[0] return None def get_dataset(self, stream_id): ''' Memoization (LRU) of _new_dataset ''' try: result = self._datasets.pop(stream_id) except KeyError: result = self._new_dataset(stream_id) if result is None: return None if len(self._datasets) >= self.CACHE_LIMIT: self._datasets.popitem(0) self._datasets[stream_id] = result return result def get_coverage(self, stream_id): ''' Memoization (LRU) of _get_coverage ''' try: result = self._coverages.pop(stream_id) except KeyError: dataset_id = self.get_dataset(stream_id) if dataset_id is None: return None result = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a') if result is None: return None if len(self._coverages) >= self.CACHE_LIMIT: k, coverage = self._coverages.popitem(0) coverage.close(timeout=5) self._coverages[stream_id] = result return result def gap_coverage(self, stream_id): try: old_cov = self._coverages.pop(stream_id) dataset_id = self.get_dataset(stream_id) sdom, tdom = time_series_domain() new_cov = DatasetManagementService._create_simplex_coverage( dataset_id, old_cov.parameter_dictionary, sdom, tdom, old_cov._persistence_layer.inline_data_writes) old_cov.close() result = new_cov except KeyError: result = self.get_coverage(stream_id) self._coverages[stream_id] = result return result def dataset_changed(self, dataset_id, extents, window): self.event_publisher.publish_event(origin=dataset_id, author=self.id, extents=extents, window=window) def evaluate_qc(self, rdt, dataset_id): if self.qc_enabled: for field in rdt.fields: if not (field.endswith('glblrng_qc') or field.endswith('loclrng_qc')): continue try: values = rdt[field] if values is not None: if not all(values): topology = np.where(values == 0) timestamps = rdt[rdt.temporal_parameter][ topology[0]] self.flag_qc_parameter(dataset_id, field, timestamps.tolist(), {}) except: continue def flag_qc_parameter(self, dataset_id, parameter, temporal_values, configuration): data_product_ids, _ = self.container.resource_registry.find_subjects( object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct, id_only=True) for data_product_id in data_product_ids: description = 'Automated Quality Control Alerted on %s' % parameter self.qc_publisher.publish_event(origin=data_product_id, qc_parameter=parameter, temporal_values=temporal_values, configuration=configuration, description=description) def update_connection_index(self, connection_id, connection_index): self.connection_id = connection_id try: connection_index = int(connection_index) self.connection_index = connection_index except ValueError: pass def has_gap(self, connection_id, connection_index): if connection_id: if not self.connection_id: self.update_connection_index(connection_id, connection_index) return False else: if connection_id != self.connection_id: return True if connection_index: if self.connection_index is None: self.update_connection_index(connection_id, connection_index) return False try: connection_index = int(connection_index) if connection_index != self.connection_index + 1: return True except ValueError: pass return False def splice_coverage(self, dataset_id, coverage): log.info('Splicing new coverage') DatasetManagementService._splice_coverage(dataset_id, coverage) @handle_stream_exception() def recv_packet(self, msg, stream_route, stream_id): ''' receive packet for ingestion ''' log.trace('received granule for stream %s', stream_id) if msg == {}: log.error('Received empty message from stream: %s', stream_id) return # Message validation if not isinstance(msg, Granule): log.error('Ingestion received a message that is not a granule: %s', msg) return rdt = RecordDictionaryTool.load_from_granule(msg) if rdt is None: log.error('Invalid granule (no RDT) for stream %s', stream_id) return if not len(rdt): log.debug('Empty granule for stream %s', stream_id) return self.persist_or_timeout(stream_id, rdt) def persist_or_timeout(self, stream_id, rdt): """ retry writing coverage multiple times and eventually time out """ done = False timeout = 2 start = time.time() while not done: try: self.add_granule(stream_id, rdt) done = True except: log.exception('An issue with coverage, retrying after a bit') if (time.time() - start) > MAX_RETRY_TIME: # After an hour just give up dataset_id = self.get_dataset(stream_id) log.error( "We're giving up, the coverage needs to be inspected %s", DatasetManagementService._get_coverage_path( dataset_id)) raise if stream_id in self._coverages: log.info('Popping coverage for stream %s', stream_id) self._coverages.pop(stream_id) gevent.sleep(timeout) if timeout > (60 * 5): timeout = 60 * 5 else: timeout *= 2 def expand_coverage(self, coverage, elements, stream_id): try: coverage.insert_timesteps(elements, oob=False) except IOError as e: log.error("Couldn't insert time steps for coverage: %s", coverage.persistence_dir, exc_info=True) try: coverage.close() finally: self._bad_coverages[stream_id] = 1 raise CorruptionError(e.message) def get_stored_values(self, lookup_value): if not self.new_lookups.empty(): new_values = self.new_lookups.get() self.lookup_docs = new_values + self.lookup_docs lookup_value_document_keys = self.lookup_docs for key in lookup_value_document_keys: try: document = self.stored_value_manager.read_value(key) if lookup_value in document: return document[lookup_value] except NotFound: log.warning('Specified lookup document does not exist') return None def fill_lookup_values(self, rdt): rdt.fetch_lookup_values() for field in rdt.lookup_values(): value = self.get_stored_values(rdt.context(field).lookup_value) if value: rdt[field] = value def insert_sparse_values(self, coverage, rdt, stream_id): self.fill_lookup_values(rdt) for field in rdt.fields: if rdt[field] is None: continue if not isinstance( rdt.context(field).param_type, SparseConstantType): # We only set sparse values before insert continue value = rdt[field] try: coverage.set_parameter_values(param_name=field, value=value) except ValueError as e: if "'lower_bound' cannot be >= 'upper_bound'" in e.message: continue else: raise except IOError as e: log.error("Couldn't insert values for coverage: %s", coverage.persistence_dir, exc_info=True) try: coverage.close() finally: self._bad_coverages[stream_id] = 1 raise CorruptionError(e.message) def insert_values(self, coverage, rdt, stream_id): elements = len(rdt) start_index = coverage.num_timesteps - elements for k, v in rdt.iteritems(): if isinstance(v, SparseConstantValue): continue slice_ = slice(start_index, None) try: coverage.set_parameter_values(param_name=k, tdoa=slice_, value=v) except IOError as e: log.error("Couldn't insert values for coverage: %s", coverage.persistence_dir, exc_info=True) try: coverage.close() finally: self._bad_coverages[stream_id] = 1 raise CorruptionError(e.message) if 'ingestion_timestamp' in coverage.list_parameters(): t_now = time.time() ntp_time = TimeUtils.ts_to_units( coverage.get_parameter_context('ingestion_timestamp').uom, t_now) coverage.set_parameter_values(param_name='ingestion_timestamp', tdoa=slice_, value=ntp_time) def add_granule(self, stream_id, rdt): ''' Appends the granule's data to the coverage and persists it. ''' debugging = log.isEnabledFor(DEBUG) timer = Timer() if debugging else None if stream_id in self._bad_coverages: log.info( 'Message attempting to be inserted into bad coverage: %s', DatasetManagementService._get_coverage_path( self.get_dataset(stream_id))) #-------------------------------------------------------------------------------- # Gap Analysis #-------------------------------------------------------------------------------- if not self.ignore_gaps: gap_found = self.has_gap(rdt.connection_id, rdt.connection_index) if gap_found: log.error( 'Gap Found! New connection: (%s,%s)\tOld Connection: (%s,%s)', rdt.connection_id, rdt.connection_index, self.connection_id, self.connection_index) self.gap_coverage(stream_id) #-------------------------------------------------------------------------------- # Coverage determiniation and appending #-------------------------------------------------------------------------------- dataset_id = self.get_dataset(stream_id) if not dataset_id: log.error('No dataset could be determined on this stream: %s', stream_id) return try: coverage = self.get_coverage(stream_id) except IOError as e: log.error( "Couldn't open coverage: %s", DatasetManagementService._get_coverage_path( self.get_dataset(stream_id))) raise CorruptionError(e.message) if debugging: path = DatasetManagementService._get_coverage_path(dataset_id) log.debug( '%s: add_granule stream %s dataset %s coverage %r file %s', self._id, stream_id, dataset_id, coverage, path) if not coverage: log.error( 'Could not persist coverage from granule, coverage is None') return #-------------------------------------------------------------------------------- # Actual persistence #-------------------------------------------------------------------------------- elements = len(rdt) if rdt[rdt.temporal_parameter] is None: elements = 0 self.insert_sparse_values(coverage, rdt, stream_id) if debugging: timer.complete_step('checks') # lightweight ops, should be zero self.expand_coverage(coverage, elements, stream_id) if debugging: timer.complete_step('insert') self.insert_values(coverage, rdt, stream_id) if debugging: timer.complete_step('keys') DatasetManagementService._save_coverage(coverage) if debugging: timer.complete_step('save') start_index = coverage.num_timesteps - elements self.dataset_changed(dataset_id, coverage.num_timesteps, (start_index, start_index + elements)) if not self.ignore_gaps and gap_found: self.splice_coverage(dataset_id, coverage) self.evaluate_qc(rdt, dataset_id) if debugging: timer.complete_step('notify') self._add_timing_stats(timer) self.update_connection_index(rdt.connection_id, rdt.connection_index) def _add_timing_stats(self, timer): """ add stats from latest coverage operation to Accumulator and periodically log results """ self.time_stats.add(timer) if self.time_stats.get_count() % REPORT_FREQUENCY > 0: return if log.isEnabledFor(TRACE): # report per step for step in 'checks', 'insert', 'keys', 'save', 'notify': log.debug('%s step %s times: %s', self._id, step, self.time_stats.to_string(step)) # report totals log.debug('%s total times: %s', self._id, self.time_stats)
class Directory(object): """ Frontend to a directory functionality backed by a datastore. A directory is a system wide datastore backend tree of entries with attributes and child entries. Entries can be identified by a path. The root is '/'. Every Org can have its own directory. The default directory is for the root Org (ION). """ def __init__(self, orgname=None, datastore_manager=None, container=None): self.container = container or bootstrap.container_instance # Get an instance of datastore configured as directory. datastore_manager = datastore_manager or self.container.datastore_manager self.dir_store = datastore_manager.get_datastore(DataStore.DS_DIRECTORY, DataStore.DS_PROFILE.DIRECTORY) self.orgname = orgname or CFG.system.root_org self.is_root = (self.orgname == CFG.system.root_org) self.events_enabled = CFG.get_safe("service.directory.publish_events") is True # Publish change events? self.event_pub = None self.event_sub = None def start(self): if self.events_enabled: # init change event publisher self.event_pub = EventPublisher() # Register to receive directory changes # self.event_sub = EventSubscriber(event_type="ContainerConfigModifiedEvent", # origin="Directory", # callback=self.receive_directory_change_event) # Create directory root entry (for current org) if not existing self.register("/", "DIR", sys_name=bootstrap.get_sys_name(), create_only=True) def stop(self): self.close() def close(self): """ Close directory and all resources including datastore and event listener. """ if self.event_sub: self.event_sub.deactivate() self.dir_store.close() # ------------------------------------------------------------------------- # Directory register, lookup and find def lookup(self, parent, key=None, return_entry=False): """ Read directory entry by key and parent node. @param return_entry If True, returns DirEntry object if found, otherwise DirEntry attributes dict @retval Either current DirEntry attributes dict or DirEntry object or None if not found. """ path = self._get_path(parent, key) if key else parent direntry = self._read_by_path(path) if return_entry: return direntry else: return direntry.attributes if direntry else None def lookup_mult(self, parent, keys=None, return_entry=False): """ Read several directory entries by keys from the same parent node. @param return_entry If True, returns DirEntry object if found, otherwise DirEntry attributes dict @retval Either list of current DirEntry attributes dict or DirEntry object or None if not found. """ direntry_list = self._read_by_path(parent, mult_keys=keys) if return_entry: return direntry_list else: return [direntry.attributes if direntry else None for direntry in direntry_list] def register(self, parent, key, create_only=False, return_entry=False, ensure_parents=True, **kwargs): """ Add/replace an entry within directory, below a parent node or "/" root. Note: Replaces (not merges) the attribute values of the entry if existing. register will fail when a concurrent write was detected, meaning that the other writer wins. @param create_only If True, does not change an already existing entry @param return_entry If True, returns DirEntry object of prior entry, otherwise DirEntry attributes dict @param ensure_parents If True, make sure that parent nodes exist @retval DirEntry if previously existing """ if not (parent and key): raise BadRequest("Illegal arguments") if not type(parent) is str or not parent.startswith("/"): raise BadRequest("Illegal arguments: parent") dn = self._get_path(parent, key) log.debug("Directory.register(%s): %s", dn, kwargs) entry_old = None cur_time = get_ion_ts() # Must read existing entry by path to make sure to not create path twice direntry = self._read_by_path(dn) if direntry and create_only: # We only wanted to make sure entry exists. Do not change # NOTE: It is ambiguous to the caller whether we ran into this situation. Seems OK. return direntry if return_entry else direntry.attributes elif direntry: old_rev, old_ts, old_attr = direntry._rev, direntry.ts_updated, direntry.attributes direntry.attributes = kwargs direntry.ts_updated = cur_time try: self.dir_store.update(direntry) if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER): self.event_pub.publish_event(event_type="DirectoryModifiedEvent", origin=self.orgname + ".DIR", origin_type="DIR", key=key, parent=parent, org=self.orgname, sub_type="REGISTER." + parent[1:].replace("/", "."), mod_type=DirectoryModificationType.UPDATE) except Conflict: # Concurrent update - we accept that we finished the race second and give up log.warn("Concurrent update to %s detected. We lost: %s", dn, kwargs) if return_entry: # Reset object back to prior state direntry.attributes = old_attr direntry.ts_updated = old_ts direntry._rev = old_rev entry_old = direntry else: entry_old = old_attr else: direntry = self._create_dir_entry(parent, key, attributes=kwargs, ts=cur_time) if ensure_parents: self._ensure_parents_exist([direntry]) try: self.dir_store.create(direntry, create_unique_directory_id()) if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER): self.event_pub.publish_event(event_type="DirectoryModifiedEvent", origin=self.orgname + ".DIR", origin_type="DIR", key=key, parent=parent, org=self.orgname, sub_type="REGISTER." + parent[1:].replace("/", "."), mod_type=DirectoryModificationType.CREATE) except BadRequest as ex: if not ex.message.startswith("DirEntry already exists"): raise # Concurrent create - we accept that we finished the race second and give up log.warn("Concurrent create of %s detected. We lost: %s", dn, kwargs) return entry_old def register_safe(self, parent, key, **kwargs): """ Use this method to protect caller from any form of directory register error """ try: return self.register(parent, key, **kwargs) except Exception as ex: log.exception("Error registering path=%s/%s, args=%s", parent, key, kwargs) def register_mult(self, entries): """ Registers multiple directory entries efficiently in one datastore access. Note: this fails if entries are already existing, so works for create only. """ if type(entries) not in (list, tuple): raise BadRequest("Bad entries type") de_list = [] cur_time = get_ion_ts() for parent, key, attrs in entries: direntry = self._create_dir_entry(parent, key, attributes=attrs, ts=cur_time) de_list.append(direntry) pe_list = self._ensure_parents_exist(de_list, create=False) de_list.extend(pe_list) deid_list = [create_unique_directory_id() for i in xrange(len(de_list))] self.dir_store.create_mult(de_list, deid_list) if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER): for de in de_list: self.event_pub.publish_event(event_type="DirectoryModifiedEvent", origin=self.orgname + ".DIR", origin_type="DIR", key=de.key, parent=de.parent, org=self.orgname, sub_type="REGISTER." + de.parent[1:].replace("/", "."), mod_type=DirectoryModificationType.CREATE) def unregister(self, parent, key=None, return_entry=False): """ Remove entry from directory. Returns attributes of deleted DirEntry """ path = self._get_path(parent, key) if key else parent log.debug("Removing content at path %s" % path) direntry = self._read_by_path(path) if direntry: self.dir_store.delete(direntry) if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER): self.event_pub.publish_event(event_type="DirectoryModifiedEvent", origin=self.orgname + ".DIR", origin_type="DIR", key=key, parent=parent, org=self.orgname, sub_type="UNREGISTER." + parent[1:].replace("/", "."), mod_type=DirectoryModificationType.DELETE) if direntry and not return_entry: return direntry.attributes else: return direntry def unregister_safe(self, parent, key): try: return self.unregister(parent, key) except Exception as ex: log.exception("Error unregistering path=%s/%s", parent, key) def find_child_entries(self, parent='/', direct_only=True, **kwargs): """ Return all child entries (ordered by path) for the given parent path. Does not return the parent itself. Optionally returns child of child entries. Additional kwargs are applied to constrain the search results (limit, descending, skip). @param parent Path to parent (must start with "/") @param direct_only If False, includes child of child entries @retval A list of DirEntry objects for the matches """ if not type(parent) is str or not parent.startswith("/"): raise BadRequest("Illegal argument parent: %s" % parent) if direct_only: start_key = [self.orgname, parent, 0] end_key = [self.orgname, parent] res = self.dir_store.find_by_view('directory', 'by_parent', start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs) else: path = parent[1:].split("/") start_key = [self.orgname, path, 0] end_key = [self.orgname, list(path) + ["ZZZZZZ"]] res = self.dir_store.find_by_view('directory', 'by_path', start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs) match = [value for docid, indexkey, value in res] return match def find_by_key(self, key=None, parent='/', **kwargs): """ Returns a list of DirEntry for each directory entry that matches the given key name. If a parent is provided, only checks in this parent and all subtree. These entries are in the same org's directory but have different parents. """ if key is None: raise BadRequest("Illegal arguments") if parent is None: raise BadRequest("Illegal arguments") start_key = [self.orgname, key, parent] end_key = [self.orgname, key, parent + "ZZZZZZ"] res = self.dir_store.find_by_view('directory', 'by_key', start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs) match = [value for docid, indexkey, value in res] return match def find_by_value(self, subtree='/', attribute=None, value=None, **kwargs): """ Returns a list of DirEntry with entries that have an attribute with the given value. """ if attribute is None: raise BadRequest("Illegal arguments") if subtree is None: raise BadRequest("Illegal arguments") start_key = [self.orgname, attribute, value, subtree] end_key = [self.orgname, attribute, value, subtree + "ZZZZZZ"] res = self.dir_store.find_by_view('directory', 'by_attribute', start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs) match = [value for docid, indexkey, value in res] return match def remove_child_entries(self, parent, delete_parent=False): pass # ------------------------------------------------------------------------- # Concurrency Control def acquire_lock(self, key, timeout=LOCK_EXPIRES_DEFAULT, lock_holder=None, lock_info=None): """ Attempts to atomically acquire a lock with the given key and namespace. If holder is given and holder already has the lock, renew. Checks for expired locks. @param timeout Secs until lock expiration or 0 for no expiration @param lock_holder Str value identifying lock holder for subsequent exclusive access @param lock_info Dict value for additional attributes describing lock @retval bool - could lock be acquired? """ if not key: raise BadRequest("Missing argument: key") if "/" in key: raise BadRequest("Invalid argument value: key") lock_attrs = {LOCK_EXPIRES_ATTR: get_ion_ts_millis() + int(1000*timeout) if timeout else 0, LOCK_HOLDER_ATTR: lock_holder or ""} if lock_info: lock_attrs.update(lock_info) expires = int(lock_attrs[LOCK_EXPIRES_ATTR]) # Check type just to be sure if expires and get_ion_ts_millis() > expires: raise BadRequest("Invalid lock expiration value: %s", expires) direntry = self._create_dir_entry(LOCK_DIR_PATH, key, attributes=lock_attrs) lock_result = False try: # This is an atomic operation. It relies on the unique key constraint of the directory service self.dir_store.create(direntry, create_unique_directory_id()) lock_result = True except BadRequest as ex: if ex.message.startswith("DirEntry already exists"): de_old = self.lookup(LOCK_DIR_PATH, key, return_entry=True) if de_old: if self._is_lock_expired(de_old): # Lock is expired: remove, try to relock # Note: even as holder, it's safer to reacquire in this case than renew log.warn("Removing expired lock: %s/%s", de_old.parent, de_old.key) try: # This is safe, because of lock was deleted + recreated in the meantime, it has different id self._delete_lock(de_old) # Try recreate - may fail again due to concurrency self.dir_store.create(direntry, create_unique_directory_id()) lock_result = True except BadRequest as ex: if not ex.message.startswith("DirEntry already exists"): log.exception("Error releasing/reacquiring expired lock %s", de_old.key) except Exception: log.exception("Error releasing/reacquiring expired lock %s", de_old.key) elif lock_holder and de_old.attributes[LOCK_HOLDER_ATTR] == lock_holder: # Holder currently holds the lock: renew log.debug("Renewing lock %s/%s for holder %s", de_old.parent, de_old.key, lock_holder) de_old.attributes = lock_attrs try: self.dir_store.update(de_old) lock_result = True except Exception: log.exception("Error renewing expired lock %s", de_old.key) # We do nothing if we could not find the lock now... else: raise log.debug("Directory.acquire_lock(%s): %s -> %s", key, lock_attrs, lock_result) return lock_result def is_locked(self, key): if not key: raise BadRequest("Missing argument: key") if "/" in key: raise BadRequest("Invalid argument value: key") lock_entry = self.lookup(LOCK_DIR_PATH, key, return_entry=True) return lock_entry and not self._is_lock_expired(lock_entry) def release_lock(self, key, lock_holder=None): """ Releases lock identified by key. Raises NotFound if lock does not exist. """ if not key: raise BadRequest("Missing argument: key") if "/" in key: raise BadRequest("Invalid argument value: key") log.debug("Directory.release_lock(%s)", key) dir_entry = self.lookup(LOCK_DIR_PATH, key, return_entry=True) if dir_entry: if lock_holder and dir_entry.attributes[LOCK_HOLDER_ATTR] != lock_holder: raise BadRequest("Cannot release lock - not currently lock holder") self._delete_lock(dir_entry) else: raise NotFound("Lock %s not found" % key) def release_expired_locks(self): """Removes all expired locks """ de_list = self.find_child_entries(LOCK_DIR_PATH, direct_only=True) for de in de_list: if self._is_lock_expired(de): log.warn("Removing expired lock %s/%s", de.parent, de.key) try: # This is safe, because if lock was deleted + recreated in the meantime, it has different id self._delete_lock(de) except Exception: log.exception("Error releasing expired lock %s", de.key) def _is_lock_expired(self, lock_entry): if not lock_entry: raise BadRequest("No lock entry provided") return 0 < lock_entry.attributes[LOCK_EXPIRES_ATTR] <= get_ion_ts_millis() def _delete_lock(self, lock_entry): lock_entry_id = lock_entry._id self.dir_store.delete(lock_entry_id) # ------------------------------------------------------------------------- # Internal functions def receive_directory_change_event(self, event_msg, headers): # @TODO add support to fold updated config into container config pass def _get_path(self, parent, key): """ Returns the qualified directory path for a directory entry. """ if parent == "/": return parent + key elif parent.startswith("/"): return parent + "/" + key else: raise BadRequest("Illegal parent: %s" % parent) def _get_key(self, path): """ Returns the key from a qualified directory path """ parent, key = path.rsplit("/", 1) return key def _create_dir_entry(self, parent, key, orgname=None, ts=None, attributes=None): """ Standard way to create a DirEntry object. """ orgname = orgname or self.orgname ts = ts or get_ion_ts() attributes = attributes if attributes is not None else {} parent = parent or "/" de = DirEntry(org=orgname, parent=parent, key=key, attributes=attributes, ts_created=ts, ts_updated=ts) return de def _read_by_path(self, path, orgname=None, mult_keys=None): """ Given a qualified path, find entry in directory and return DirEntry object or None if not found. """ if path is None: raise BadRequest("Illegal arguments") orgname = orgname or self.orgname if mult_keys: parent = path or "/" key = mult_keys else: parent, key = path.rsplit("/", 1) parent = parent or "/" find_key = [orgname, key, parent] view_res = self.dir_store.find_by_view('directory', 'by_key', key=find_key, id_only=True, convert_doc=True) match = [doc for docid, index, doc in view_res] if mult_keys: entries_by_key = {doc.key: doc for doc in match} entries = [entries_by_key.get(key, None) for key in mult_keys] return entries else: if len(match) > 1: log.error("More than one directory entry found for key %s" % path) return match[0] elif match: return match[0] return None def _get_unique_parents(self, entry_list): """Returns a sorted, unique list of parents of DirEntries (excluding the root /)""" if entry_list and type(entry_list) not in (list, tuple): entry_list = [entry_list] parents = set() for entry in entry_list: parents.add(entry.parent) if "/" in parents: parents.remove("/") return sorted(parents) def _ensure_parents_exist(self, entry_list, create=True): parents_list = self._get_unique_parents(entry_list) pe_list = [] try: for parent in parents_list: pe = self.lookup(parent) if pe is None: pp, pk = parent.rsplit("/", 1) direntry = self._create_dir_entry(parent=pp, key=pk) pe_list.append(direntry) if create: try: self.dir_store.create(direntry, create_unique_directory_id()) except BadRequest as ex: if not ex.message.startswith("DirEntry already exists"): raise # Else: Concurrent create except Exception as ex: log.warn("_ensure_parents_exist(): Error creating directory parents", exc_info=True) return pe_list def _cleanup_outdated_entries(self, dir_entries, common="key"): """ This function takes all DirEntry from the list and removes all but the most recent one by ts_updated timestamp. It returns the most recent DirEntry and removes the others by direct datastore operations. If there are multiple entries with most recent timestamp, the first encountered is kept and the others non-deterministically removed. Note: This operation can be called for DirEntries without common keys, e.g. for all entries registering an agent for a device. """ if not dir_entries: return newest_entry = dir_entries[0] try: for de in dir_entries: if int(de.ts_updated) > int(newest_entry.ts_updated): newest_entry = de remove_list = [de for de in dir_entries if de is not newest_entry] log.info("Attempting to cleanup these directory entries: %s" % remove_list) for de in remove_list: try: self.dir_store.delete(de) except Exception as ex: log.warn("Removal of outdated %s directory entry failed: %s" % (common, de)) log.info("Cleanup of %s old %s directory entries succeeded" % (len(remove_list), common)) except Exception as ex: log.warn("Cleanup of multiple directory entries for %s failed: %s" % ( common, str(ex))) return newest_entry
def test_lookup_values(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_lookups() stream_def_id = self.pubsubcli.create_stream_definition( 'lookup', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id) data_product = DataProduct(name='lookup data product') tdom, sdom = time_series_domain() data_product.temporal_domain = tdom.dump() data_product.spatial_domain = sdom.dump() data_product_id = self.dpsc_cli.create_data_product( data_product, stream_definition_id=stream_def_id) self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id) data_producer = DataProducer(name='producer') data_producer.producer_context = DataProcessProducerContext() data_producer.producer_context.configuration['qc_keys'] = [ 'offset_document' ] data_producer_id, _ = self.rrclient.create(data_producer) self.addCleanup(self.rrclient.delete, data_producer_id) assoc, _ = self.rrclient.create_association( subject=data_product_id, object=data_producer_id, predicate=PRED.hasDataProducer) self.addCleanup(self.rrclient.delete_association, assoc) document_keys = self.damsclient.list_qc_references(data_product_id) self.assertEquals(document_keys, ['offset_document']) svm = StoredValueManager(self.container) svm.stored_value_cas('offset_document', {'offset_a': 2.0}) self.dpsc_cli.activate_data_product_persistence(data_product_id) dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True) dataset_id = dataset_ids[0] dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temp'] = [20.] granule = rdt.to_granule() stream_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(10)) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['temp'], rdt2['temp']) np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0])) svm.stored_value_cas('updated_document', {'offset_a': 3.0}) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent) ep.publish_event(origin=data_product_id, reference_keys=['updated_document']) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [1] rdt['temp'] = [20.] granule = rdt.to_granule() gevent.sleep(2) # Yield so that the event goes through publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(10)) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt2['temp'], np.array([20., 20.])) np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0, 23.0]))
def test_pub_and_sub(self): ar = event.AsyncResult() gq = queue.Queue() self.count = 0 def cb(*args, **kwargs): self.count += 1 gq.put(args[0]) if self.count == 2: ar.set() sub = EventSubscriber(event_type="ResourceEvent", callback=cb, origin="specific") pub = EventPublisher(event_type="ResourceEvent") self._listen(sub) pub.publish_event(origin="specific", description="hello") event_obj = bootstrap.IonObject('ResourceEvent', origin='specific', description='more testing') self.assertEqual(event_obj, pub.publish_event_object(event_obj)) with self.assertRaises(BadRequest) as cm: event_obj = bootstrap.IonObject('ResourceEvent', origin='specific', description='more testing', ts_created='2423') pub.publish_event_object(event_obj) self.assertIn('The ts_created value is not a valid timestamp', cm.exception.message) with self.assertRaises(BadRequest) as cm: event_obj = bootstrap.IonObject('ResourceEvent', origin='specific', description='more testing', ts_created='1000494978462') pub.publish_event_object(event_obj) self.assertIn('This ts_created value is too old', cm.exception.message) with self.assertRaises(BadRequest) as cm: event_obj = bootstrap.IonObject('ResourceEvent', origin='specific', description='more testing') event_obj._id = '343434' pub.publish_event_object(event_obj) self.assertIn('The event object cannot contain a _id field', cm.exception.message) ar.get(timeout=5) res = [] for x in xrange(self.count): res.append(gq.get(timeout=5)) self.assertEquals(len(res), self.count) self.assertEquals(res[0].description, "hello") self.assertAlmostEquals(int(res[0].ts_created), int(get_ion_ts()), delta=5000) self.assertEquals(res[1].description, "more testing") self.assertAlmostEquals(int(res[1].ts_created), int(get_ion_ts()), delta=5000)
def test_pub_on_different_subsubtypes(self): res_list = [ DotDict(ar=event.AsyncResult(), gq=queue.Queue(), count=0) for i in xrange(4) ] def cb_gen(num): def cb(event, *args, **kwargs): res_list[num].count += 1 res_list[num].gq.put(event) if event.description == "end": res_list[num].ar.set() return cb sub0 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1.*", callback=cb_gen(0)) sub0.start() sub1 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1.a", callback=cb_gen(1)) sub1.start() sub2 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="*.a", callback=cb_gen(2)) sub2.start() sub3 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1", callback=cb_gen(3)) sub3.start() pub1 = EventPublisher(event_type="ResourceModifiedEvent") pub1.publish_event(origin="one", sub_type="st1.a", description="1") pub1.publish_event(origin="two", sub_type="st1", description="2") pub1.publish_event(origin="three", sub_type="st1.b", description="3") pub1.publish_event(origin="four", sub_type="st2.a", description="4") pub1.publish_event(origin="five", sub_type="st2", description="5") pub1.publish_event(origin="six", sub_type="a", description="6") pub1.publish_event(origin="seven", sub_type="", description="7") pub1.publish_event(origin="end", sub_type="st1.a", description="end") pub1.publish_event(origin="end", sub_type="st1", description="end") [res_list[i].ar.get(timeout=5) for i in xrange(3)] sub0.stop() sub1.stop() sub2.stop() sub3.stop() for i in xrange(4): res_list[i].res = [] for x in xrange(res_list[i].count): res_list[i].res.append(res_list[i].gq.get(timeout=5)) self.assertEquals(len(res_list[0].res), 3) self.assertEquals(res_list[0].res[0].description, "1") self.assertEquals(len(res_list[1].res), 2) self.assertEquals(res_list[1].res[0].description, "1") self.assertEquals(len(res_list[2].res), 3) self.assertEquals(res_list[2].res[0].description, "1") self.assertEquals(len(res_list[3].res), 2) self.assertEquals(res_list[3].res[0].description, "2")