def on_start(self):
        super(IngestionManagementService,self).on_start()
        self.event_publisher = DatasetIngestionConfigurationEventPublisher(node = self.container.node)


        #########################################################################################################
        #   The code for process_definition may not really belong here, but we do not have a different way so
        #   far to preload the process definitions. This will later probably be part of a set of predefinitions
        #   for processes.
        #########################################################################################################
        process_definition = ProcessDefinition()
        process_definition.executable['module']='ion.processes.data.ingestion.ingestion_worker'
        process_definition.executable['class'] = 'IngestionWorker'
        self.process_definition_id = self.clients.process_dispatcher.create_process_definition(process_definition=process_definition)
class IngestionManagementService(BaseIngestionManagementService):
    """
    id_p = cc.spawn_process('ingestion_worker', 'ion.services.dm.ingestion.ingestion_management_service', 'IngestionManagementService')
    cc.proc_manager.procs['%s.%s' %(cc.id,id_p)].start()
    """

    base_exchange_name = 'ingestion_queue'

    def __init__(self):
        BaseIngestionManagementService.__init__(self)

        xs_dot_xp = CFG.core_xps.science_data
        try:
            self.XS, xp_base = xs_dot_xp.split('.')
            self.XP = '.'.join([bootstrap.get_sys_name(), xp_base])
        except ValueError:
            raise StandardError('Invalid CFG for core_xps.science_data: "%s"; must have "xs.xp" structure' % xs_dot_xp)

        self.serializer = IonObjectSerializer()


    def on_start(self):
        super(IngestionManagementService,self).on_start()
        self.event_publisher = DatasetIngestionConfigurationEventPublisher(node = self.container.node)


        #########################################################################################################
        #   The code for process_definition may not really belong here, but we do not have a different way so
        #   far to preload the process definitions. This will later probably be part of a set of predefinitions
        #   for processes.
        #########################################################################################################
        process_definition = ProcessDefinition()
        process_definition.executable['module']='ion.processes.data.ingestion.ingestion_worker'
        process_definition.executable['class'] = 'IngestionWorker'
        self.process_definition_id = self.clients.process_dispatcher.create_process_definition(process_definition=process_definition)

    def on_quit(self):
        #self.clients.process_dispatcher.delete_process_definition(process_definition_id=self.process_definition_id)
        super(IngestionManagementService,self).on_quit()

    def create_ingestion_configuration(self, exchange_point_id='', couch_storage=None, hdf_storage=None,number_of_workers=0):
        """
        @brief Setup ingestion workers to ingest all the data from a single exchange point.
        @param exchange_point_id is the resource id for the exchagne point to ingest from
        @param couch_storage is the specification of the couch database to use
        @param hdf_storage is the specification of the filesystem to use for hdf data files
        @param number_of_workers is the number of ingestion workers to create
        """


        # Give each ingestion configuration its own queue name to receive data on
        exchange_name = 'ingestion_queue'

        ##------------------------------------------------------------------------------------
        ## declare our intent to subscribe to all messages on the exchange point
        query = ExchangeQuery()

        subscription_id = self.clients.pubsub_management.create_subscription(query=query,\
            exchange_name=exchange_name, name='Ingestion subscription', description='Subscription for ingestion workers')

        ##------------------------------------------------------------------------------------------

        # create an ingestion_configuration instance and update the registry
        # @todo: right now sending in the exchange_point_id as the name...
        ingestion_configuration = IngestionConfiguration( name = self.XP)
        ingestion_configuration.description = '%s exchange point ingestion configuration' % self.XP
        ingestion_configuration.number_of_workers = number_of_workers

        if hdf_storage is not None:
            ingestion_configuration.hdf_storage.update(hdf_storage)

        if couch_storage is not None:
            ingestion_configuration.couch_storage.update(couch_storage)


        ingestion_configuration_id, _ = self.clients.resource_registry.create(ingestion_configuration)

        self._launch_transforms(
            ingestion_configuration.number_of_workers,
            subscription_id,
            ingestion_configuration_id,
            ingestion_configuration,
            self.process_definition_id
        )
        return ingestion_configuration_id

    def _launch_transforms(self, number_of_workers, subscription_id, ingestion_configuration_id, ingestion_configuration, process_definition_id):
        """
        This method spawns the two transform processes without activating them...Note: activating the transforms does the binding
        """

        description = 'Ingestion worker'

        configuration = self.serializer.serialize(ingestion_configuration)
        configuration.pop('type_')
        configuration['configuration_id'] = ingestion_configuration_id


        # launch the transforms
        for i in xrange(number_of_workers):
            name = '(%s)_Ingestion_Worker_%s' % (ingestion_configuration_id, i+1)
            transform_id = self.clients.transform_management.create_transform(
                name = name,
                description = description,
                in_subscription_id= subscription_id,
                out_streams = {},
                process_definition_id=process_definition_id,
                configuration=ingestion_configuration)

            # create association between ingestion configuration and the transforms that act as Ingestion Workers
            if not transform_id:
                raise IngestionManagementServiceException('Transform could not be launched by ingestion.')
            self.clients.resource_registry.create_association(ingestion_configuration_id, PRED.hasTransform, transform_id)


    def update_ingestion_configuration(self, ingestion_configuration=None):
        """Change the number of workers or the default policy for ingesting data on each stream

        @param ingestion_configuration    IngestionConfiguration
        """
        log.debug("Updating ingestion configuration")
        id, rev = self.clients.resource_registry.update(ingestion_configuration)

    def read_ingestion_configuration(self, ingestion_configuration_id=''):
        """Get an existing ingestion configuration object.

        @param ingestion_configuration_id    str
        @retval ingestion_configuration    IngestionConfiguration
        @throws NotFound    if ingestion configuration did not exist
        """
        log.debug("Reading ingestion configuration object id: %s", ingestion_configuration_id)
        ingestion_configuration = self.clients.resource_registry.read(ingestion_configuration_id)
        if ingestion_configuration is None:
            raise NotFound("Ingestion configuration %s does not exist" % ingestion_configuration_id)
        return ingestion_configuration

    def delete_ingestion_configuration(self, ingestion_configuration_id=''):
        """Delete an existing ingestion configuration object.

        @param ingestion_configuration_id    str
        @throws NotFound    if ingestion configuration did not exist
        """
        log.debug("Deleting ingestion configuration: %s", ingestion_configuration_id)


        #ingestion_configuration = self.read_ingestion_configuration(ingestion_configuration_id)
        #@todo Should we check to see if the ingestion configuration exists?

        #delete the transforms associated with the ingestion_configuration_id
        transform_ids = self.clients.resource_registry.find_objects(ingestion_configuration_id, PRED.hasTransform, RT.Transform, True)

        if len(transform_ids) < 1:
            raise NotFound('No transforms associated with this ingestion configuration!')

        log.debug('len(transform_ids): %s' % len(transform_ids))

        for transform_id in transform_ids:
            # To Delete - we need to actually remove each of the transforms
            self.clients.transform_management.delete_transform(transform_id)


        # delete the associations too...
        associations = self.clients.resource_registry.find_associations(ingestion_configuration_id,PRED.hasTransform)
        log.info('associations: %s' % associations)
        for association in associations:
            self.clients.resource_registry.delete_association(association)
            #@todo How should we deal with failure?


        self.clients.resource_registry.delete(ingestion_configuration_id)


    def activate_ingestion_configuration(self, ingestion_configuration_id=''):
        """Activate an ingestion configuration and the transform processes that execute it

        @param ingestion_configuration_id    str
        @throws NotFound    The ingestion configuration id did not exist
        """

        log.debug("Activating ingestion configuration")

        # check whether the ingestion configuration object exists
        #ingestion_configuration = self.read_ingestion_configuration(ingestion_configuration_id)
        #@todo Should we check to see if the ingestion configuration exists?

        # read the transforms
        transform_ids, _ = self.clients.resource_registry.find_objects(ingestion_configuration_id, PRED.hasTransform, RT.Transform, True)
        if len(transform_ids) < 1:
            raise NotFound('The ingestion configuration %s does not exist' % str(ingestion_configuration_id))

        # since all ingestion worker transforms have the same subscription, only deactivate one
        self.clients.transform_management.activate_transform(transform_ids[0])

        return True


    def deactivate_ingestion_configuration(self, ingestion_configuration_id=''):
        """Deactivate one of the transform processes that uses an ingestion configuration

        @param ingestion_configuration_id    str
        @throws NotFound    The ingestion configuration id did not exist
        """
        log.debug("Deactivating ingestion configuration")

        # check whether the ingestion configuration object exists
        #ingestion_configuration = self.read_ingestion_configuration(ingestion_configuration_id)
        #@todo Should we check to see if the ingestion configuration exists?


        # use the deactivate method in transformation management service
        transform_ids, _ = self.clients.resource_registry.find_objects(ingestion_configuration_id, PRED.hasTransform, RT.Transform, True)
        if len(transform_ids) < 1:
            raise NotFound('The ingestion configuration %s does not exist' % str(ingestion_configuration_id))

        # since all ingestion worker transforms have the same subscription, only deactivate one
        self.clients.transform_management.deactivate_transform(transform_ids[0])

        return True

    def create_dataset_configuration(self, dataset_id='', archive_data=True, archive_metadata=True, ingestion_configuration_id=''):
        """Create a configuration for ingestion of a particular dataset and associate it to a ingestion configuration.

        @param dataset_id    str
        @param archive_data    bool
        @param archive_metadata    bool
        @param ingestion_configuration_id    str
        @retval dataset_ingestion_configuration_id    str
        """

        if not dataset_id:
            raise IngestionManagementServiceException('Must pass a dataset id to create_dataset_configuration')

        log.debug("Creating dataset configuration")


        dataset = self.clients.dataset_management.read_dataset(dataset_id=dataset_id)

        stream_id =dataset.primary_view_key

        # Read the stream to get the stream definition
        #stream = self.clients.pubsub_management.read_stream(stream_id=stream_id)

        # Get the associated stream definition!
        stream_defs, _ = self.clients.resource_registry.find_objects(stream_id, PRED.hasStreamDefinition)

        if len(stream_defs)!=1:
            raise IngestionManagementServiceException('The stream is associated with more than one stream definition!')

        stream_def_resource = stream_defs[0]
        # Get the container object out of the stream def resource and set the stream id field in the local instance
        stream_def_container = stream_def_resource.container
        stream_def_container.stream_resource_id = stream_id

        # Get the ingestion configuration
        ingestion_configuration = self.clients.resource_registry.read(ingestion_configuration_id)
        couch_storage = ingestion_configuration.couch_storage

        log.info('Adding stream definition for stream "%s" to ingestion database "%s"' % (stream_id, couch_storage.datastore_name))
        db = self.container.datastore_manager.get_datastore(couch_storage.datastore_name, self.CFG)

        # put it in couch db!
        db.create(stream_def_container)
        db.close()


        #@todo Add business logic to create the right kind of dataset ingestion configuration
        config = DatasetIngestionByStream(
            archive_data=archive_data,
            archive_metadata=archive_metadata,
            stream_id=stream_id)

        dset_ingest_config = DatasetIngestionConfiguration(
            name = 'Dataset config %s' % dataset_id,
            description = 'configuration for dataset %s' % dataset_id,
            configuration = config,
            type = DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM
            )

        dset_ingest_config_id , _ = self.clients.resource_registry.create(dset_ingest_config)

        self.clients.resource_registry.create_association(dset_ingest_config_id, PRED.hasIngestionConfiguration, ingestion_configuration_id)


        self.event_publisher.create_and_publish_event(
            origin=ingestion_configuration_id, # Use the ingestion configuration ID as the origin!
            description = dset_ingest_config.description,
            configuration = config,
            type = DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM,
            resource_id = dset_ingest_config_id
            )


        return dset_ingest_config_id

    def update_dataset_config(self, dataset_ingestion_configuration=None):
        """Update the ingestion configuration for a dataset

        @param dataset_ingestion_configuration    DatasetIngestionConfiguration
        """

        #@todo - make it an exception to change the dataset_id or the stream_id in the dataset config!

        log.info('dataset configuration to update: %s' % dataset_ingestion_configuration)

        log.debug("Updating dataset config")
        dset_ingest_config_id, rev = self.clients.resource_registry.update(dataset_ingestion_configuration)

        ingest_config_ids, _ = self.clients.resource_registry.find_objects(dset_ingest_config_id, PRED.hasIngestionConfiguration, id_only=True)

        if len(ingest_config_ids)!=1:
            raise IngestionManagementServiceException('The dataset ingestion configuration is associated with more than one ingestion configuration!')

        ingest_config_id = ingest_config_ids[0]

        #@todo - what is it okay to update?
        self.event_publisher.create_and_publish_event(
            origin=ingest_config_id,
            description = dataset_ingestion_configuration.description,
            configuration = dataset_ingestion_configuration.configuration,
            type = DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM,
            resource_id = dset_ingest_config_id
        )


    def read_dataset_config(self, dataset_ingestion_configuration_id=''):
        """Get an existing dataset configuration.

        @param dataset_ingestion_configuration_id    str
        @retval dataset_ingestion_configuration    DatasetIngestionConfiguration
        @throws NotFound    if ingestion configuration did not exist
        """

        log.debug("Reading dataset configuration")
        dataset_ingestion_configuration = self.clients.resource_registry.read(dataset_ingestion_configuration_id)

        return dataset_ingestion_configuration

    def delete_dataset_config(self,dataset_ingestion_configuration_id=''):
        """Delete an existing dataset configuration.

        @param dataset_ingestion_configuration_id    str
        @throws NotFound    if ingestion configuration did not exist
        """

        dataset_ingestion_configuration = self.clients.resource_registry.read(dataset_ingestion_configuration_id)

        log.debug("Deleting dataset configuration")
        self.clients.resource_registry.delete(dataset_ingestion_configuration_id)

        ingest_config_ids, association_ids = self.clients.resource_registry.find_objects(dataset_ingestion_configuration_id, PRED.hasIngestionConfiguration, id_only=True)

        if len(ingest_config_ids)!=1:
            raise IngestionManagementServiceException('The dataset ingestion configuration is associated with more than one ingestion configuration!')

        ingest_config_id = ingest_config_ids[0]

        self.clients.resource_registry.delete_association(association=association_ids[0])

        self.event_publisher.create_and_publish_event(
            origin=ingest_config_id,
            configuration = dataset_ingestion_configuration.configuration,
            type = DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM,
            resource_id = dataset_ingestion_configuration_id,
            deleted = True
        )