def create_ingestion_configuration(self,
                                       exchange_point_id='',
                                       couch_storage=None,
                                       hdf_storage=None,
                                       number_of_workers=0):
        """
        @brief Setup ingestion workers to ingest all the data from a single exchange point.
        @param exchange_point_id is the resource id for the exchagne point to ingest from
        @param couch_storage is the specification of the couch database to use
        @param hdf_storage is the specification of the filesystem to use for hdf data files
        @param number_of_workers is the number of ingestion workers to create
        """

        if self.process_definition_id is None:
            process_definition = ProcessDefinition(
                name='ingestion_worker_process',
                description='Worker transform process for ingestion of datasets'
            )
            process_definition.executable[
                'module'] = 'ion.processes.data.ingestion.ingestion_worker'
            process_definition.executable['class'] = 'IngestionWorker'
            self.process_definition_id = self.clients.process_dispatcher.create_process_definition(
                process_definition=process_definition)

        # Give each ingestion configuration its own queue name to receive data on
        exchange_name = 'ingestion_queue'

        ##------------------------------------------------------------------------------------
        ## declare our intent to subscribe to all messages on the exchange point
        query = ExchangeQuery()

        subscription_id = self.clients.pubsub_management.create_subscription(query=query,\
            exchange_name=exchange_name, name='Ingestion subscription', description='Subscription for ingestion workers')

        ##------------------------------------------------------------------------------------------

        # create an ingestion_configuration instance and update the registry
        # @todo: right now sending in the exchange_point_id as the name...
        ingestion_configuration = IngestionConfiguration(name=self.XP)
        ingestion_configuration.description = '%s exchange point ingestion configuration' % self.XP
        ingestion_configuration.number_of_workers = number_of_workers

        if hdf_storage is not None:
            ingestion_configuration.hdf_storage.update(hdf_storage)

        if couch_storage is not None:
            ingestion_configuration.couch_storage.update(couch_storage)

        ingestion_configuration_id, _ = self.clients.resource_registry.create(
            ingestion_configuration)

        self._launch_transforms(ingestion_configuration.number_of_workers,
                                subscription_id, ingestion_configuration_id,
                                ingestion_configuration,
                                self.process_definition_id)
        return ingestion_configuration_id
    def create_ingestion_configuration(self, exchange_point_id='', couch_storage=None, hdf_storage=None,number_of_workers=0):
        """
        @brief Setup ingestion workers to ingest all the data from a single exchange point.
        @param exchange_point_id is the resource id for the exchagne point to ingest from
        @param couch_storage is the specification of the couch database to use
        @param hdf_storage is the specification of the filesystem to use for hdf data files
        @param number_of_workers is the number of ingestion workers to create
        """

        if self.process_definition_id is None:
            res, _ = self.clients.resource_registry.find_resources(restype=RT.ProcessDefinition,name='ingestion_worker_process', id_only=True)
            if not len(res):
                raise BadRequest('No ingestion work process definition found')
            self.process_definition_id = res[0]
 

        # Give each ingestion configuration its own queue name to receive data on
        #----------------------------- TODO ---------------------------------------------
        # Add support right here for user specified queue based on this set of ingestion 
        # workers
        #----------------------------- TODO ---------------------------------------------
        exchange_name = 'ingestion_queue'

        ##------------------------------------------------------------------------------------
        ## declare our intent to subscribe to all messages on the exchange point
        query = ExchangeQuery()

        subscription_id = self.clients.pubsub_management.create_subscription(query=query,
            exchange_name=exchange_name, name='Ingestion subscription', description='Subscription for ingestion workers')

        ##------------------------------------------------------------------------------------------

        # create an ingestion_configuration instance and update the registry
        # @todo: right now sending in the exchange_point_id as the name...
        ingestion_configuration = IngestionConfiguration( name = self.XP)
        ingestion_configuration.description = '%s exchange point ingestion configuration' % self.XP
        ingestion_configuration.number_of_workers = number_of_workers

        if hdf_storage is not None:
            ingestion_configuration.hdf_storage.update(hdf_storage)

        if couch_storage is not None:
            ingestion_configuration.couch_storage.update(couch_storage)


        ingestion_configuration_id, _ = self.clients.resource_registry.create(ingestion_configuration)

        self._launch_transforms(
            ingestion_configuration.number_of_workers,
            subscription_id,
            ingestion_configuration_id,
            ingestion_configuration,
            self.process_definition_id
        )
        return ingestion_configuration_id
    def create_ingestion_configuration(self, exchange_point_id='', couch_storage=None, hdf_storage=None,number_of_workers=0):
        """
        @brief Setup ingestion workers to ingest all the data from a single exchange point.
        @param exchange_point_id is the resource id for the exchagne point to ingest from
        @param couch_storage is the specification of the couch database to use
        @param hdf_storage is the specification of the filesystem to use for hdf data files
        @param number_of_workers is the number of ingestion workers to create
        """

        if self.process_definition_id is None:
            process_definition = ProcessDefinition(name='ingestion_worker_process', description='Worker transform process for ingestion of datasets')
            process_definition.executable['module']='ion.processes.data.ingestion.ingestion_worker'
            process_definition.executable['class'] = 'IngestionWorker'
            self.process_definition_id = self.clients.process_dispatcher.create_process_definition(process_definition=process_definition)
 

        # Give each ingestion configuration its own queue name to receive data on
        exchange_name = 'ingestion_queue'

        ##------------------------------------------------------------------------------------
        ## declare our intent to subscribe to all messages on the exchange point
        query = ExchangeQuery()

        subscription_id = self.clients.pubsub_management.create_subscription(query=query,\
            exchange_name=exchange_name, name='Ingestion subscription', description='Subscription for ingestion workers')

        ##------------------------------------------------------------------------------------------

        # create an ingestion_configuration instance and update the registry
        # @todo: right now sending in the exchange_point_id as the name...
        ingestion_configuration = IngestionConfiguration( name = self.XP)
        ingestion_configuration.description = '%s exchange point ingestion configuration' % self.XP
        ingestion_configuration.number_of_workers = number_of_workers

        if hdf_storage is not None:
            ingestion_configuration.hdf_storage.update(hdf_storage)

        if couch_storage is not None:
            ingestion_configuration.couch_storage.update(couch_storage)


        ingestion_configuration_id, _ = self.clients.resource_registry.create(ingestion_configuration)

        self._launch_transforms(
            ingestion_configuration.number_of_workers,
            subscription_id,
            ingestion_configuration_id,
            ingestion_configuration,
            self.process_definition_id
        )
        return ingestion_configuration_id
    def test_setup_queues(self):
        ingestion_config = IngestionConfiguration()
        ingestion_config.queues = [IngestionQueue()]
        setattr(ingestion_config,'_id','config_id')

        self.pubsub_create_sub.return_value = 'subscription_id'

        self.ingestion_management._existing_dataset = Mock()

        retval = self.ingestion_management.setup_queues(ingestion_config, 'stream_id', 'dataset_id')

        self.assertTrue(retval)

        self.ingestion_management._existing_dataset.assert_called_once_with('stream_id','dataset_id')
    def create_ingestion_configuration(self,name='', exchange_point_id='', queues=None):
        validate_is_instance(queues,list,'The queues parameter is not a proper list.')
        validate_true(len(queues)>0, 'Ingestion needs at least one queue to ingest from')
        for queue in queues:
            validate_is_instance(queue, IngestionQueue)

        ingestion_config = IngestionConfiguration()

        ingestion_config.name = name
        ingestion_config.exchange_point = exchange_point_id
        ingestion_config.queues = queues

        config_id, rev = self.clients.resource_registry.create(ingestion_config)

        return config_id
Exemple #6
0
    def test_setup_queues(self):
        ingestion_config = IngestionConfiguration()
        ingestion_config.queues = [IngestionQueue()]
        setattr(ingestion_config, '_id', 'config_id')

        self.pubsub_create_sub.return_value = 'subscription_id'

        self.ingestion_management._existing_dataset = Mock()

        retval = self.ingestion_management.setup_queues(
            ingestion_config, 'stream_id', 'dataset_id')

        self.assertTrue(retval)

        self.ingestion_management._existing_dataset.assert_called_once_with(
            'stream_id', 'dataset_id')
Exemple #7
0
    def create_ingestion_configuration(self,
                                       name='',
                                       exchange_point_id='',
                                       queues=None):
        validate_is_instance(queues, list,
                             'The queues parameter is not a proper list.')
        validate_true(
            len(queues) > 0,
            'Ingestion needs at least one queue to ingest from')
        for queue in queues:
            validate_is_instance(queue, IngestionQueue)

        ingestion_config = IngestionConfiguration()

        ingestion_config.name = name
        ingestion_config.exchange_point = exchange_point_id
        ingestion_config.queues = queues

        config_id, rev = self.clients.resource_registry.create(
            ingestion_config)

        return config_id
Exemple #8
0
    def test_persist_data_stream(self):
        config = IngestionConfiguration()

        self.ingestion_management.read_ingestion_configuration = Mock()
        self.ingestion_management.read_ingestion_configuration.return_value = config

        self.ingestion_management.is_persisted = Mock()
        self.ingestion_management.is_persisted.return_value = False

        self.ingestion_management.setup_queues = Mock()
        self.ingestion_management.setup_queues.return_value = True

        self.pubsub_read.return_value = DotDict(persisted=False)

        retval = self.ingestion_management.persist_data_stream(
            'stream_id', 'config_id', 'dataset_id')

        self.assertEquals(retval, 'dataset_id')