def create_ingestion_configuration(self, exchange_point_id='', couch_storage=None, hdf_storage=None, number_of_workers=0): """ @brief Setup ingestion workers to ingest all the data from a single exchange point. @param exchange_point_id is the resource id for the exchagne point to ingest from @param couch_storage is the specification of the couch database to use @param hdf_storage is the specification of the filesystem to use for hdf data files @param number_of_workers is the number of ingestion workers to create """ if self.process_definition_id is None: process_definition = ProcessDefinition( name='ingestion_worker_process', description='Worker transform process for ingestion of datasets' ) process_definition.executable[ 'module'] = 'ion.processes.data.ingestion.ingestion_worker' process_definition.executable['class'] = 'IngestionWorker' self.process_definition_id = self.clients.process_dispatcher.create_process_definition( process_definition=process_definition) # Give each ingestion configuration its own queue name to receive data on exchange_name = 'ingestion_queue' ##------------------------------------------------------------------------------------ ## declare our intent to subscribe to all messages on the exchange point query = ExchangeQuery() subscription_id = self.clients.pubsub_management.create_subscription(query=query,\ exchange_name=exchange_name, name='Ingestion subscription', description='Subscription for ingestion workers') ##------------------------------------------------------------------------------------------ # create an ingestion_configuration instance and update the registry # @todo: right now sending in the exchange_point_id as the name... ingestion_configuration = IngestionConfiguration(name=self.XP) ingestion_configuration.description = '%s exchange point ingestion configuration' % self.XP ingestion_configuration.number_of_workers = number_of_workers if hdf_storage is not None: ingestion_configuration.hdf_storage.update(hdf_storage) if couch_storage is not None: ingestion_configuration.couch_storage.update(couch_storage) ingestion_configuration_id, _ = self.clients.resource_registry.create( ingestion_configuration) self._launch_transforms(ingestion_configuration.number_of_workers, subscription_id, ingestion_configuration_id, ingestion_configuration, self.process_definition_id) return ingestion_configuration_id
def create_ingestion_configuration(self, exchange_point_id='', couch_storage=None, hdf_storage=None,number_of_workers=0): """ @brief Setup ingestion workers to ingest all the data from a single exchange point. @param exchange_point_id is the resource id for the exchagne point to ingest from @param couch_storage is the specification of the couch database to use @param hdf_storage is the specification of the filesystem to use for hdf data files @param number_of_workers is the number of ingestion workers to create """ if self.process_definition_id is None: res, _ = self.clients.resource_registry.find_resources(restype=RT.ProcessDefinition,name='ingestion_worker_process', id_only=True) if not len(res): raise BadRequest('No ingestion work process definition found') self.process_definition_id = res[0] # Give each ingestion configuration its own queue name to receive data on #----------------------------- TODO --------------------------------------------- # Add support right here for user specified queue based on this set of ingestion # workers #----------------------------- TODO --------------------------------------------- exchange_name = 'ingestion_queue' ##------------------------------------------------------------------------------------ ## declare our intent to subscribe to all messages on the exchange point query = ExchangeQuery() subscription_id = self.clients.pubsub_management.create_subscription(query=query, exchange_name=exchange_name, name='Ingestion subscription', description='Subscription for ingestion workers') ##------------------------------------------------------------------------------------------ # create an ingestion_configuration instance and update the registry # @todo: right now sending in the exchange_point_id as the name... ingestion_configuration = IngestionConfiguration( name = self.XP) ingestion_configuration.description = '%s exchange point ingestion configuration' % self.XP ingestion_configuration.number_of_workers = number_of_workers if hdf_storage is not None: ingestion_configuration.hdf_storage.update(hdf_storage) if couch_storage is not None: ingestion_configuration.couch_storage.update(couch_storage) ingestion_configuration_id, _ = self.clients.resource_registry.create(ingestion_configuration) self._launch_transforms( ingestion_configuration.number_of_workers, subscription_id, ingestion_configuration_id, ingestion_configuration, self.process_definition_id ) return ingestion_configuration_id
def create_ingestion_configuration(self, exchange_point_id='', couch_storage=None, hdf_storage=None,number_of_workers=0): """ @brief Setup ingestion workers to ingest all the data from a single exchange point. @param exchange_point_id is the resource id for the exchagne point to ingest from @param couch_storage is the specification of the couch database to use @param hdf_storage is the specification of the filesystem to use for hdf data files @param number_of_workers is the number of ingestion workers to create """ if self.process_definition_id is None: process_definition = ProcessDefinition(name='ingestion_worker_process', description='Worker transform process for ingestion of datasets') process_definition.executable['module']='ion.processes.data.ingestion.ingestion_worker' process_definition.executable['class'] = 'IngestionWorker' self.process_definition_id = self.clients.process_dispatcher.create_process_definition(process_definition=process_definition) # Give each ingestion configuration its own queue name to receive data on exchange_name = 'ingestion_queue' ##------------------------------------------------------------------------------------ ## declare our intent to subscribe to all messages on the exchange point query = ExchangeQuery() subscription_id = self.clients.pubsub_management.create_subscription(query=query,\ exchange_name=exchange_name, name='Ingestion subscription', description='Subscription for ingestion workers') ##------------------------------------------------------------------------------------------ # create an ingestion_configuration instance and update the registry # @todo: right now sending in the exchange_point_id as the name... ingestion_configuration = IngestionConfiguration( name = self.XP) ingestion_configuration.description = '%s exchange point ingestion configuration' % self.XP ingestion_configuration.number_of_workers = number_of_workers if hdf_storage is not None: ingestion_configuration.hdf_storage.update(hdf_storage) if couch_storage is not None: ingestion_configuration.couch_storage.update(couch_storage) ingestion_configuration_id, _ = self.clients.resource_registry.create(ingestion_configuration) self._launch_transforms( ingestion_configuration.number_of_workers, subscription_id, ingestion_configuration_id, ingestion_configuration, self.process_definition_id ) return ingestion_configuration_id
def test_setup_queues(self): ingestion_config = IngestionConfiguration() ingestion_config.queues = [IngestionQueue()] setattr(ingestion_config,'_id','config_id') self.pubsub_create_sub.return_value = 'subscription_id' self.ingestion_management._existing_dataset = Mock() retval = self.ingestion_management.setup_queues(ingestion_config, 'stream_id', 'dataset_id') self.assertTrue(retval) self.ingestion_management._existing_dataset.assert_called_once_with('stream_id','dataset_id')
def create_ingestion_configuration(self,name='', exchange_point_id='', queues=None): validate_is_instance(queues,list,'The queues parameter is not a proper list.') validate_true(len(queues)>0, 'Ingestion needs at least one queue to ingest from') for queue in queues: validate_is_instance(queue, IngestionQueue) ingestion_config = IngestionConfiguration() ingestion_config.name = name ingestion_config.exchange_point = exchange_point_id ingestion_config.queues = queues config_id, rev = self.clients.resource_registry.create(ingestion_config) return config_id
def test_setup_queues(self): ingestion_config = IngestionConfiguration() ingestion_config.queues = [IngestionQueue()] setattr(ingestion_config, '_id', 'config_id') self.pubsub_create_sub.return_value = 'subscription_id' self.ingestion_management._existing_dataset = Mock() retval = self.ingestion_management.setup_queues( ingestion_config, 'stream_id', 'dataset_id') self.assertTrue(retval) self.ingestion_management._existing_dataset.assert_called_once_with( 'stream_id', 'dataset_id')
def create_ingestion_configuration(self, name='', exchange_point_id='', queues=None): validate_is_instance(queues, list, 'The queues parameter is not a proper list.') validate_true( len(queues) > 0, 'Ingestion needs at least one queue to ingest from') for queue in queues: validate_is_instance(queue, IngestionQueue) ingestion_config = IngestionConfiguration() ingestion_config.name = name ingestion_config.exchange_point = exchange_point_id ingestion_config.queues = queues config_id, rev = self.clients.resource_registry.create( ingestion_config) return config_id
def test_persist_data_stream(self): config = IngestionConfiguration() self.ingestion_management.read_ingestion_configuration = Mock() self.ingestion_management.read_ingestion_configuration.return_value = config self.ingestion_management.is_persisted = Mock() self.ingestion_management.is_persisted.return_value = False self.ingestion_management.setup_queues = Mock() self.ingestion_management.setup_queues.return_value = True self.pubsub_read.return_value = DotDict(persisted=False) retval = self.ingestion_management.persist_data_stream( 'stream_id', 'config_id', 'dataset_id') self.assertEquals(retval, 'dataset_id')