Example #1
0
    def _build_stream_config(self):
        """
        """
        # Create a pubsub client to create streams.
        pubsub_client = PubsubManagementServiceClient(node=self.container.node)
        dataset_management = DatasetManagementServiceClient()

        encoder = IonObjectSerializer()

        # Create streams and subscriptions for each stream named in driver.
        self._stream_config = {}

        stream_name = 'ctdpf_parsed'
        param_dict_name = 'ctdpf_parsed'
        pd_id = dataset_management.read_parameter_dictionary_by_name(param_dict_name, id_only=True)
        stream_def_id = pubsub_client.create_stream_definition(name=stream_name, parameter_dictionary_id=pd_id)
        stream_def = pubsub_client.read_stream_definition(stream_def_id)
        stream_def_dict = encoder.serialize(stream_def)
        pd = stream_def.parameter_dictionary
        stream_id, stream_route = pubsub_client.create_stream(name=stream_name,
                                                exchange_point='science_data',
                                                stream_definition_id=stream_def_id)
        stream_config = dict(routing_key=stream_route.routing_key,
                                 exchange_point=stream_route.exchange_point,
                                 stream_id=stream_id,
                                 parameter_dictionary=pd,
                                 stream_def_dict=stream_def_dict)
        self._stream_config[stream_name] = stream_config
Example #2
0
    def test_complex_version(self):

        io_serializer = IonObjectSerializer()
        obj = IonObject('SampleComplexEvent', {'num': 9, 'other_field': 'test value'})
        obj_dict = io_serializer.serialize(obj,True)
        self.assertEquals(obj_dict['persisted_version'], 1)
        # simulate a previous version data of SampleComplexEvent_V2
        obj_dict['type_'] = 'SampleComplexEvent_V2'

        # verify that the simulated previous version data has resource
        self.assertEquals('resource' in obj_dict, True)
        # verify that the simulated previous version data does not have new_attribute
        self.assertEquals('new_resource' in obj_dict, False)
        # simulate reading the previous version that does not have new_attribute
        io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry())
        obj = io_deserializer.deserialize(obj_dict)
        # verify that new attribute is added and initialized with default value
        self.assertEquals(obj.new_resource.new_attribute['key'], 'value')
        # verify that old attributes are still there
        self.assertEquals(obj.num, 9)
        # verify that old attributes are still there
        self.assertEquals(obj.other_field, 'test value')
        # verify that on read version is not yet updated
        self.assertEquals(obj_dict['persisted_version'], 1)

        # simulate create/update
        obj_dict = io_serializer.serialize(obj,True)
        # verify that version is updated
        self.assertEquals(obj_dict['persisted_version'], 2)
Example #3
0
def _process_gateway_request(resource_id, operation, json_request, requester):

    if requester is not None:
        json_request["agentRequest"]["requester"] = requester

    decoder = IonObjectSerializer()
    decoded_msg = decoder.serialize(json_request)
    payload = simplejson.dumps(decoded_msg)

    response = _agent_gateway_request(resource_id + '/' + operation, payload)

    if response['data'].has_key(GATEWAY_ERROR):
        log.error(response['data'][GATEWAY_ERROR][GATEWAY_ERROR_MESSAGE])
        #raise BadRequest(response['data'][GATEWAY_ERROR][GATEWAY_ERROR_MESSAGE])
        ex_cls = response['data'][GATEWAY_ERROR][GATEWAY_ERROR_EXCEPTION]
        ex_msg = response['data'][GATEWAY_ERROR][GATEWAY_ERROR_MESSAGE]
        if hasattr(pyex, ex_cls):
            raise getattr(pyex, ex_cls)(ex_msg)

        else:
            raise Exception(ex_msg)

    try:
        if "type_" in response['data'][GATEWAY_RESPONSE]:
            del response['data'][GATEWAY_RESPONSE]["type_"]
    except Exception, e:
        pass
Example #4
0
    def test_persisted_version(self):

        # create an initial version of SampleResource
        io_serializer = IonObjectSerializer()
        obj = IonObject('SampleResource', {'num': 9, 'other_field': 'test value'})
        obj_dict = io_serializer.serialize(obj,True)
        self.assertEquals(obj_dict['persisted_version'], 1)
        # verify that the simulated previous version does not have new_attribute
        self.assertEquals('new_attribute' in obj_dict, False)

        # simulate version increment to SampleResource that adds new_attribute
        obj_dict['type_'] = 'SampleResource_V2'
        # simulate reading the previous version after version increment
        io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry())
        obj = io_deserializer.deserialize(obj_dict)
        # verify that “new_attribute” is added and initialized with default value
        self.assertEquals(obj.new_attribute['key'], 'value')
        # verify that old attributes are still there and retain values
        self.assertEquals(obj.num, 9)
        # verify that old attributes are still there and retain values
        self.assertEquals(obj.other_field, 'test value')
        # verify that persisted_version is not updated at read
        self.assertEquals(obj_dict['persisted_version'], 1)

        # simulate update
        obj_dict = io_serializer.serialize(obj,True)
        # verify that version is updated
        self.assertEquals(obj_dict['persisted_version'], 2)
def _process_gateway_request(resource_id, operation, json_request, requester):

    if requester is not None:
        json_request["agentRequest"]["requester"] = requester

    
    decoder = IonObjectSerializer()
    decoded_msg = decoder.serialize(json_request)
    payload = simplejson.dumps(decoded_msg)

    response = _agent_gateway_request(resource_id + '/' + operation,   payload)

    if response['data'].has_key(GATEWAY_ERROR):
        log.error(response['data'][GATEWAY_ERROR][GATEWAY_ERROR_MESSAGE])
        #raise BadRequest(response['data'][GATEWAY_ERROR][GATEWAY_ERROR_MESSAGE])
        ex_cls = response['data'][GATEWAY_ERROR][GATEWAY_ERROR_EXCEPTION]
        ex_msg = response['data'][GATEWAY_ERROR][GATEWAY_ERROR_MESSAGE]
        if hasattr(pyex, ex_cls):
            raise getattr(pyex, ex_cls)(ex_msg)
        
        else:
            raise Exception(ex_msg)

    try:
        if "type_" in response['data'][GATEWAY_RESPONSE]:
            del response['data'][GATEWAY_RESPONSE]["type_"]
    except Exception, e:
        pass
Example #6
0
    def __init__(self, datastore_name='prototype'):
        self.datastore_name = datastore_name
        log.debug('Creating in-memory dict of dicts that will simulate data stores')
        self.root = {}

        # serializers
        self._io_serializer     = IonObjectSerializer()
        self._io_deserializer   = IonObjectDeserializer(obj_registry=get_obj_registry())
Example #7
0
    def __init__(self, container, datastore_name=""):
        self.container = container
        self.datastore_name = datastore_name

        # Object serialization/deserialization
        self._io_serializer = IonObjectSerializer()
        self._io_deserializer = IonObjectDeserializer(
            obj_registry=get_obj_registry())
Example #8
0
 def obj_to_tree(definition):
     from pyon.core.object import IonObjectSerializer
     if not isinstance(definition,StreamDefinitionContainer):
         return
     serializer = IonObjectSerializer()
     definition = serializer.serialize(definition)
     tree = DefinitionTree.traverse(definition,definition['data_stream_id'])
     return tree
    def _generate_stream_config(self):
        log.debug("_generate_stream_config for %s", self.agent_instance_obj.name)
        dsm = self.clients.dataset_management
        psm = self.clients.pubsub_management

        agent_obj  = self._get_agent()
        device_obj = self._get_device()

        streams_dict = {}
        for stream_cfg in agent_obj.stream_configurations:
            #create a stream def for each param dict to match against the existing data products
            streams_dict[stream_cfg.stream_name] = {'param_dict_name':stream_cfg.parameter_dictionary_name,
                                                    #'stream_def_id':stream_def_id,
                                                    'records_per_granule': stream_cfg.records_per_granule,
                                                    'granule_publish_rate':stream_cfg.granule_publish_rate,
                                                     }

        #retrieve the output products
        device_id = device_obj._id
        data_product_objs = self.RR2.find_data_products_of_instrument_device_using_has_output_product(device_id)

        stream_config = {}
        for d in data_product_objs:
            stream_def_id = self.RR2.find_stream_definition_id_of_data_product_using_has_stream_definition(d._id)
            for model_stream_name, stream_info_dict  in streams_dict.items():
                # read objects from cache to be compared
                pdict = self.RR2.find_resource_by_name(RT.ParameterDictionary, stream_info_dict.get('param_dict_name'))
                stream_def_id = self._meet_in_the_middle(d._id, pdict._id)

                if stream_def_id:
                    #model_param_dict = self.RR2.find_resources_by_name(RT.ParameterDictionary,
                    #                                         stream_info_dict.get('param_dict_name'))[0]
                    #model_param_dict = self._get_param_dict_by_name(stream_info_dict.get('param_dict_name'))
                    #stream_route = self.RR2.read(product_stream_id).stream_route
                    product_stream_id = self.RR2.find_stream_id_of_data_product_using_has_stream(d._id)
                    stream_def = psm.read_stream_definition(stream_def_id)
                    stream_route = psm.read_stream_route(stream_id=product_stream_id)
                    
                    from pyon.core.object import IonObjectSerializer
                    stream_def_dict = IonObjectSerializer().serialize(stream_def)
                    sdtype = stream_def_dict.pop('type_')

                    if model_stream_name in stream_config:
                        log.warn("Overwiting stream_config[%s]", model_stream_name)

                    stream_config[model_stream_name] = {'routing_key'           : stream_route.routing_key,
                                                        'stream_id'             : product_stream_id,
                                                        'stream_definition_ref' : stream_def_id,
                                                        'stream_def_dict'       : stream_def_dict,
                                                        'exchange_point'        : stream_route.exchange_point,
                                                        'parameter_dictionary'  : stream_def.parameter_dictionary,
                                                        'records_per_granule'   : stream_info_dict.get('records_per_granule'),
                                                        'granule_publish_rate'  : stream_info_dict.get('granule_publish_rate'),
                    }

        log.debug("Stream config generated")
        log.trace("generate_stream_config: %s", str(stream_config) )
        return stream_config
    def _generate_stream_config(self):
        log.debug("_generate_stream_config for %s", self.agent_instance_obj.name)
        dsm = self.clients.dataset_management
        psm = self.clients.pubsub_management

        agent_obj  = self._get_agent()
        device_obj = self._get_device()

        streams_dict = {}
        for stream_cfg in agent_obj.stream_configurations:
            #create a stream def for each param dict to match against the existing data products
            streams_dict[stream_cfg.stream_name] = {'param_dict_name':stream_cfg.parameter_dictionary_name}

        #retrieve the output products
        # TODO: What about platforms? other things?
        device_id = device_obj._id
        data_product_objs = self.RR2.find_data_products_of_instrument_device_using_has_output_product(device_id)

        stream_config = {}
        for dp in data_product_objs:
            stream_def_id = self.RR2.find_stream_definition_id_of_data_product_using_has_stream_definition(dp._id)
            for stream_name, stream_info_dict in streams_dict.items():
                # read objects from cache to be compared
                pdict = self.RR2.find_resource_by_name(RT.ParameterDictionary, stream_info_dict.get('param_dict_name'))
                stream_def_id = self._find_streamdef_for_dp_and_pdict(dp._id, pdict._id)

                if stream_def_id:
                    #model_param_dict = self.RR2.find_resources_by_name(RT.ParameterDictionary,
                    #                                         stream_info_dict.get('param_dict_name'))[0]
                    #model_param_dict = self._get_param_dict_by_name(stream_info_dict.get('param_dict_name'))
                    #stream_route = self.RR2.read(product_stream_id).stream_route
                    product_stream_id = self.RR2.find_stream_id_of_data_product_using_has_stream(dp._id)
                    stream_def = psm.read_stream_definition(stream_def_id)
                    stream_route = psm.read_stream_route(stream_id=product_stream_id)

                    from pyon.core.object import IonObjectSerializer
                    stream_def_dict = IonObjectSerializer().serialize(stream_def)
                    stream_def_dict.pop('type_')

                    if stream_name in stream_config:
                        log.warn("Overwriting stream_config[%s]", stream_name)

                    stream_config[stream_name] = {  'routing_key'           : stream_route.routing_key,  # TODO: Serialize stream_route together
                                                    'stream_id'             : product_stream_id,
                                                    'stream_definition_ref' : stream_def_id,
                                                    'stream_def_dict'       : stream_def_dict,  # This is very large
                                                    'exchange_point'        : stream_route.exchange_point,
                                                    # This is redundant and very large - the param dict is in the stream_def_dict
                                                    #'parameter_dictionary'  : stream_def.parameter_dictionary,

                    }
        if len(stream_config) < len(streams_dict):
            log.warn("Found only %s matching streams by stream definition (%s) than %s defined in the agent (%s).",
                     len(stream_config), stream_config.keys(), len(streams_dict), streams_dict.keys())

        log.debug("Stream config generated")
        log.trace("generate_stream_config: %s", stream_config)
        return stream_config
Example #11
0
    def _serialize_port_assigments(self, port_assignments=None):
        serializer = IonObjectSerializer()
        serialized_port_assignments = {}
        if isinstance(port_assignments, dict):
            for device_id, platform_port in port_assignments.iteritems():
                flatpp = serializer.serialize(platform_port)
                serialized_port_assignments[device_id] = flatpp

        return serialized_port_assignments
Example #12
0
 def obj_to_tree(definition):
     from pyon.core.object import IonObjectSerializer
     if not isinstance(definition, StreamDefinitionContainer):
         return
     serializer = IonObjectSerializer()
     definition = serializer.serialize(definition)
     tree = DefinitionTree.traverse(definition,
                                    definition['data_stream_id'])
     return tree
    def _serialize_port_assigments(self, port_assignments=None):
        serializer = IonObjectSerializer()
        serialized_port_assignments = {}
        if isinstance(port_assignments, dict):
            for device_id, platform_port in port_assignments.iteritems():
                flatpp = serializer.serialize(platform_port)
                serialized_port_assignments[device_id] = flatpp

        return serialized_port_assignments
Example #14
0
 def size(self):
     '''
     Truly poor way to calculate the size of a granule...
     returns the size in bytes.
     '''
     granule = self.to_granule()
     serializer = IonObjectSerializer()
     flat = serializer.serialize(granule)
     byte_stream = msgpack.packb(flat, default=encode_ion)
     return len(byte_stream)
 def size(self):
     '''
     Truly poor way to calculate the size of a granule...
     returns the size in bytes.
     '''
     granule = self.to_granule()
     serializer = IonObjectSerializer()
     flat = serializer.serialize(granule)
     byte_stream = msgpack.packb(flat, default=encode_ion)
     return len(byte_stream)
Example #16
0
def _process_gateway_request(service_name, operation, json_request, requester):

    if requester is not None:
        json_request["serviceRequest"]["requester"] = requester

    decoder = IonObjectSerializer()
    decoded_msg = decoder.serialize(json_request)
    payload = simplejson.dumps(decoded_msg)

    response = _service_gateway_request(service_name + '/' + operation,
                                        payload)

    return response
def _process_gateway_request(service_name, operation, json_request, requester):

    if requester is not None:
        json_request["serviceRequest"]["requester"] = requester


    decoder = IonObjectSerializer()
    decoded_msg = decoder.serialize(json_request)
    payload = simplejson.dumps(decoded_msg)

    response = _service_gateway_request(service_name + '/' + operation,   payload)

    return response
Example #18
0
    def test_complex_version_del_attrib(self):

        io_serializer = IonObjectSerializer()
        # verify that extraneous fields given while creating an IonObject raises an error.
        with self.assertRaises(AttributeError):
            IonObject('SampleComplexEvent_V2', {'num': 9, 'other_field': 'test value','more_new_resource': {'key':'value'}})

        obj = IonObject('SampleComplexEvent_V2', {'num': 9, 'other_field': 'test value','new_resource': {'num': 9, 'other_field': 'test value','new_attribute':{'key':'value'}}})
        # create simulated saved data
        obj_dict = io_serializer.serialize(obj,True)
        self.assertEquals(obj_dict['persisted_version'], 2)
        # simulate a next version data of SampleComplexEvent_V2
        obj_dict['type_'] = 'SampleComplexEvent_V3'

        # verify that the simulated previous version data does have new_resource
        self.assertEquals('new_resource' in obj_dict, True)
        # note the schema version of new_resource
        self.assertEquals(obj_dict['new_resource']['persisted_version'], 2)

        # simulate reading the next version that has a new type of new_resource
        io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry())
        obj = io_deserializer.deserialize(obj_dict)

        # verify that new_resource exists
        self.assertTrue('new_resource' in obj)
        # however, verify that new_resource does not have new_attribute since type of new_resource has changed
        self.assertFalse('new_attribute' in obj.new_resource)
        # verify that the new type of new_resource has another_new_attribute that is initialized to default data
        self.assertEquals(obj.new_resource.another_new_attribute['key'], 'new_value')
        # verify on read that the schema version of new_resource replaces the old persisted_version
        self.assertEquals(obj.new_resource.persisted_version, 3)

        # verify that old attributes values of new_resource have been thrown away
        self.assertNotEquals(obj.new_resource.num, 9)
        # verify that attributes values of new_resource have been initialized to default values
        self.assertEquals(obj.new_resource.num, 0)

        # However, verify that old attributes of the resource (SampleComplexEvent) are still there
        self.assertEquals(obj.num, 9)
        # verify that old attributes are still there
        self.assertEquals(obj.other_field, 'test value')
        # verify that on read, version is not yet updated
        self.assertEquals(obj.persisted_version, 2)


        # simulate create/update
        obj_dict = io_serializer.serialize(obj,True)
        # verify that version is updated
        self.assertEquals(obj_dict['persisted_version'], 3)
        # verify that version is updated fo the subsumed object
        self.assertEquals(obj_dict['new_resource']['persisted_version'], 3)
Example #19
0
    def _build_stream_config(self):
        """
        """
        # Create a pubsub client to create streams.
        pubsub_client = PubsubManagementServiceClient(node=self.container.node)
        dataset_management = DatasetManagementServiceClient()

        encoder = IonObjectSerializer()

        # Create streams and subscriptions for each stream named in driver.
        self._stream_config = {}

        stream_name = 'parsed'
        param_dict_name = 'ctd_parsed_param_dict'
        pd_id = dataset_management.read_parameter_dictionary_by_name(
            param_dict_name, id_only=True)
        stream_def_id = pubsub_client.create_stream_definition(
            name=stream_name, parameter_dictionary_id=pd_id)
        stream_def = pubsub_client.read_stream_definition(stream_def_id)
        stream_def_dict = encoder.serialize(stream_def)
        pd = stream_def.parameter_dictionary
        stream_id, stream_route = pubsub_client.create_stream(
            name=stream_name,
            exchange_point='science_data',
            stream_definition_id=stream_def_id)
        stream_config = dict(routing_key=stream_route.routing_key,
                             exchange_point=stream_route.exchange_point,
                             stream_id=stream_id,
                             parameter_dictionary=pd,
                             stream_def_dict=stream_def_dict)
        self._stream_config[stream_name] = stream_config

        stream_name = 'raw'
        param_dict_name = 'ctd_raw_param_dict'
        pd_id = dataset_management.read_parameter_dictionary_by_name(
            param_dict_name, id_only=True)
        stream_def_id = pubsub_client.create_stream_definition(
            name=stream_name, parameter_dictionary_id=pd_id)
        stream_def = pubsub_client.read_stream_definition(stream_def_id)
        stream_def_dict = encoder.serialize(stream_def)
        pd = stream_def.parameter_dictionary
        stream_id, stream_route = pubsub_client.create_stream(
            name=stream_name,
            exchange_point='science_data',
            stream_definition_id=stream_def_id)
        stream_config = dict(routing_key=stream_route.routing_key,
                             exchange_point=stream_route.exchange_point,
                             stream_id=stream_id,
                             parameter_dictionary=pd,
                             stream_def_dict=stream_def_dict)
        self._stream_config[stream_name] = stream_config
Example #20
0
    def test_attribute_version(self):

        io_serializer = IonObjectSerializer()

        # verify that extraneous fields given while creating an IonObject raises an error.
        with self.assertRaises(AttributeError):
            IonObject('SampleComplexEvent_V2', {'num': 9, 'other_field': 'test value','more_new_resource':
                {'key':'value'}})

        obj = IonObject('SampleComplexEvent_V2', {'num': 9, 'other_field': 'test value','new_resource':
            {'num': 9, 'other_field': 'test value','new_attribute':{'key':'value'}}})
        obj_dict = io_serializer.serialize(obj,True)
        self.assertEquals(obj_dict['persisted_version'], 2)

        # verify that the simulated previous version data does have new_resource
        self.assertEquals('new_resource' in obj_dict, True)
        # verify that the new_resource has type SampleResource_V2
        self.assertEquals(obj_dict['new_resource']['type_'],"SampleResource_V2")

        # set type to SampleComplexEvent_V3
        obj_dict['type_']="SampleComplexEvent_V3"
        obj_dict['persisted_version']=3
        # set new_resource's type to SampleResource_V3
        # so we pretend that version, not the type, of the attribute has been changed
        obj_dict['new_resource']['type_']="SampleResource_V3"

        # simulate reading SampleComplexEvent_V3 after a new version of new_resource has been introduced
        io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry())
        obj = io_deserializer.deserialize(obj_dict)

        # verify that new resource is not deleted
        self.assertTrue('new_resource' in obj)
        # verify that new resource does not have new_attribute
        self.assertFalse('new_attribute' in obj.new_resource)
        # verify that the next version of new_resource has default data in the another_new_attribute
        self.assertEquals(obj.new_resource.another_new_attribute['key'], 'new_value')
        # verify that old attributes values of new_resource have not been thrown away
        self.assertEquals(obj.new_resource.num, 9)
        # verify that values from old attributes of SampleComplexEvent_V2 are still there
        self.assertEquals(obj.num, 9)
        self.assertEquals(obj.other_field, 'test value')

        # verify that on read version is not yet updated for the subsumed object
        self.assertEquals(obj.new_resource.persisted_version, 2)

        # simulate create/update
        obj_dict = io_serializer.serialize(obj,True)
        # verify that versions are unchanged
        self.assertEquals(obj_dict['persisted_version'], 3)
        # verify that versions are updated in the subsumed object
        self.assertEquals(obj_dict['new_resource']['persisted_version'], 3)
    def __init__(self):
        BaseIngestionManagementService.__init__(self)

        xs_dot_xp = CFG.core_xps.science_data
        try:
            self.XS, xp_base = xs_dot_xp.split('.')
            self.XP = '.'.join([bootstrap.get_sys_name(), xp_base])
        except ValueError:
            raise StandardError(
                'Invalid CFG for core_xps.science_data: "%s"; must have "xs.xp" structure'
                % xs_dot_xp)

        self.serializer = IonObjectSerializer()
        self.process_definition_id = None
Example #22
0
    def __init__(self, container, datastore_name=""):
        self.container = container
        self.datastore_name = datastore_name

        # Object serialization/deserialization
        self._io_serializer = IonObjectSerializer()
        self._io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry())
Example #23
0
    def __init__(self, host=None, port=None, datastore_name='prototype', options="", profile=DataStore.DS_PROFILE.BASIC):
        log.debug('__init__(host=%s, port=%s, datastore_name=%s, options=%s)', host, port, datastore_name, options)
        self.host = host or CFG.server.couchdb.host
        self.port = port or CFG.server.couchdb.port
        # The scoped name of the datastore
        self.datastore_name = datastore_name
        self.auth_str = ""
        try:
            if CFG.server.couchdb.username and CFG.server.couchdb.password:
                self.auth_str = "%s:%s@" % (CFG.server.couchdb.username, CFG.server.couchdb.password)
                log.debug("Using username:password authentication to connect to datastore")
        except AttributeError:
            log.error("CouchDB username:password not configured correctly. Trying anonymous...")

        connection_str = "http://%s%s:%s" % (self.auth_str, self.host, self.port)
        #connection_str = "http://%s:%s" % (self.host, self.port)
        # TODO: Security risk to emit password into log. Remove later.
        log.info('Connecting to CouchDB server: %s' % connection_str)
        self.server = couchdb.Server(connection_str)

        # Datastore specialization (views)
        self.profile = profile

        # serializers
        self._io_serializer     = IonObjectSerializer()
        # TODO: Not nice to have this class depend on ION objects
        self._io_deserializer   = IonObjectDeserializer(obj_registry=get_obj_registry())
        self._datastore_cache = {}
Example #24
0
class CodecInterceptor(Interceptor):
    """
    Transforms IonObject <-> dict
    """
    def __init__(self):
        Interceptor.__init__(self)
        self._io_serializer = IonObjectSerializer()
        self._io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry())

    def outgoing(self, invocation):
        #log.debug("CodecInterceptor.outgoing: %s", invocation)

        #log.debug("Payload, pre-transform: %s", invocation.message)
        invocation.message = self._io_serializer.serialize(invocation.message)
        #log.debug("Payload, post-transform: %s", invocation.message)

        return invocation

    def incoming(self, invocation):
        #log.debug("CodecInterceptor.incoming: %s", invocation)

        payload = invocation.message
        #log.debug("Payload, pre-transform: %s", payload)

        invocation.message = self._io_deserializer.deserialize(payload)
        #log.debug("Payload, post-transform: %s", invocation.message)

        return invocation
Example #25
0
class CodecInterceptor(Interceptor):
    """
    Transforms IonObject <-> dict
    """
    def __init__(self):
        Interceptor.__init__(self)
        self._io_serializer = IonObjectSerializer()
        self._io_deserializer = IonObjectDeserializer(
            obj_registry=get_obj_registry())

    def outgoing(self, invocation):
        log.debug("CodecInterceptor.outgoing: %s", invocation)

        log.debug("Payload, pre-transform: %s", invocation.message)
        invocation.message = self._io_serializer.serialize(invocation.message)
        log.debug("Payload, post-transform: %s", invocation.message)

        return invocation

    def incoming(self, invocation):
        log.debug("CodecInterceptor.incoming: %s", invocation)

        payload = invocation.message
        log.debug("Payload, pre-transform: %s", payload)

        invocation.message = self._io_deserializer.deserialize(payload)
        log.debug("Payload, post-transform: %s", invocation.message)

        return invocation
    def __init__(self):
        BaseIngestionManagementService.__init__(self)

        xs_dot_xp = CFG.core_xps.science_data
        try:
            self.XS, xp_base = xs_dot_xp.split('.')
            self.XP = '.'.join([bootstrap.get_sys_name(), xp_base])
        except ValueError:
            raise StandardError('Invalid CFG for core_xps.science_data: "%s"; must have "xs.xp" structure' % xs_dot_xp)

        self.serializer = IonObjectSerializer()
Example #27
0
class IonSerializerDictionaryRepresentation(Representation):
    def __init__(self, id_factory):
        self.encoder = IonObjectSerializer()
        self.decoder = IonObjectDeserializer(obj_registry=get_obj_registry())
        self.id_factory = id_factory
    def encode(self, obj, add_id=False):
        out = self.encoder.serialize(obj)
        if add_id and '_id' not in out.keys():
            out['_id'] = self.id_factory.create_id()
        return out
    def decode(self, data):
        return self.decoder.deserialize(data)
Example #28
0
    def test_version_del_attrib(self):

        io_serializer = IonObjectSerializer()

        # verify that extraneous fields given while creating an IonObject raises an error.
        with self.assertRaises(AttributeError):
            IonObject('SampleResource_V2', {'num': 9, 'other_field': 'test value','more_new_attribute': {'key':'value'}})
        # simulate creating a version 2 of SampleResource that has "new_attribute"
        obj = IonObject('SampleResource_V2', {'num': 9, 'other_field': 'test value','new_attribute': {'key':'value'}})
        obj_dict = io_serializer.serialize(obj,True)
        # verify that version is 2
        self.assertEquals(obj_dict['persisted_version'], 2)
        # verify that the simulated version 2 data does have new_attribute
        self.assertEquals('new_attribute' in obj_dict, True)


        # simulate incrementing to version 3 that does not have "new_attribute"
        obj_dict['type_'] = 'SampleResource_V3'

        # simulate reading after version increment to 3
        io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry())
        obj = io_deserializer.deserialize(obj_dict)

        # verify that new attribute is deleted
        self.assertFalse('new_attribute' in obj)
        # verify that the simulated next version data does have more_new_attribute
        self.assertEquals(obj.another_new_attribute['key'], 'new_value')

        # verify that old attributes are still there and retain their data
        self.assertEquals(obj.num, 9)
        # verify that old attributes are still there and retain their data
        self.assertEquals(obj.other_field, 'test value')
        # verify that persisted_version is not yet updated i.e. it is still 2
        self.assertEquals(obj_dict['persisted_version'], 2)

        # simulate update
        obj_dict = io_serializer.serialize(obj,True)
        # verify that version is updated
        self.assertEquals(obj_dict['persisted_version'], 3)
Example #29
0
class IonSerializerDictionaryRepresentation(Representation):
    def __init__(self, id_factory):
        self.encoder = IonObjectSerializer()
        self.decoder = IonObjectDeserializer(obj_registry=get_obj_registry())
        self.id_factory = id_factory

    def encode(self, obj, add_id=False):
        out = self.encoder.serialize(obj)
        if add_id and '_id' not in out.keys():
            out['_id'] = self.id_factory.create_id()
        return out

    def decode(self, data):
        return self.decoder.deserialize(data)
Example #30
0
    def __init__(self,
                 datastore_name=None,
                 config=None,
                 scope=None,
                 profile=None):
        """
        @param datastore_name  Name of datastore within server. May be scoped to sysname
        @param config  A server config dict with connection params
        @param scope  Prefix for the datastore name (e.g. sysname) to separate multiple systems
        """

        PostgresDataStore.__init__(self,
                                   datastore_name=datastore_name,
                                   config=config
                                   or CFG.get_safe("server.postgresql"),
                                   profile=profile
                                   or DataStore.DS_PROFILE.BASIC,
                                   scope=scope)

        # IonObject Serializers
        self._io_serializer = IonObjectSerializer()
        self._io_deserializer = IonObjectDeserializer(
            obj_registry=get_obj_registry())
Example #31
0
    def __init__(self, datastore_name=None, config=None, scope=None, profile=None):
        """
        @param datastore_name  Name of datastore within server. May be scoped to sysname
        @param config  A server config dict with connection params
        @param scope  Prefix for the datastore name (e.g. sysname) to separate multiple systems
        """

        PostgresDataStore.__init__(self, datastore_name=datastore_name,
                                     config=config or CFG.get_safe("server.postgresql"),
                                     profile=profile or DataStore.DS_PROFILE.BASIC,
                                     scope=scope)

        # IonObject Serializers
        self._io_serializer = IonObjectSerializer()
        self._io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry())
Example #32
0
    def test_event_version_del_attrib(self):

        io_serializer = IonObjectSerializer()

        # verify that extraneous fields given while creating an IonObject raises an error.
        with self.assertRaises(AttributeError):
            IonObject('SampleEvent_V2', {'num': 9, 'other_field': 'test value','more_new_attribute': {'key':'value'}})

        obj = IonObject('SampleEvent_V2', {'num': 9, 'other_field': 'test value','new_attribute': {'key':'value'}})
        obj_dict = io_serializer.serialize(obj,True)
        self.assertEquals(obj_dict['persisted_version'], 2)
        # simulate a next version data of SampleEvent_V2
        obj_dict['type_'] = 'SampleEvent_V3'

        # verify that the simulated previous version data does have new_attribute
        self.assertEquals('new_attribute' in obj_dict, True)
        # simulate reading the next version that does not have new_attribute
        io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry())
        obj = io_deserializer.deserialize(obj_dict)

        # verify that new attribute is deleted
        self.assertFalse('new_attribute' in obj)
        # verify that the simulated next version data does have more_new_attribute
        self.assertEquals(obj.another_new_attribute['key'], 'new_value')

        # verify that old attributes are still there
        self.assertEquals(obj.num, 9)
        # verify that old attributes are still there
        self.assertEquals(obj.other_field, 'test value')
        # verify that on read version is not yet updated
        self.assertEquals(obj_dict['persisted_version'], 2)

        # simulate create/update
        obj_dict = io_serializer.serialize(obj,True)
        # verify that version is updated
        self.assertEquals(obj_dict['persisted_version'], 3)
def gw_agent_execute_agent(resource_id, cmd, requester=None):

    agent_cmd_params = IonObjectSerializer().serialize(cmd)

    agent_execute_request = {
        "agentRequest": {
            "agentId": resource_id,
            "agentOp": "execute_agent",
            "expiry": 0,
            "params": {
                "command": agent_cmd_params
            }
        }
    }

    ret_values = process_gateway_request(resource_id, "execute_agent",
                                         agent_execute_request, requester)

    ret_obj = IonObject('AgentCommandResult', ret_values)
    return ret_obj
    def recv_packet(self, msg, stream_route, stream_id):
        validate_is_instance(msg, Granule,
                             'Incoming packet must be of type granule')

        cov = self.get_coverage(stream_id)
        if cov:
            cov.insert_timesteps(1)

            if 'raw' in cov.list_parameters():
                gran = IonObjectSerializer().serialize(msg)
                cov.set_parameter_values(param_name='raw', value=[gran])

            if 'ingestion_timestamp' in cov.list_parameters():
                t_now = time.time()
                ntp_time = TimeUtils.ts_to_units(
                    cov.get_parameter_context('ingestion_timestamp').uom,
                    t_now)
                cov.set_parameter_values(param_name='ingestion_timestamp',
                                         value=ntp_time)

            self.dataset_changed(self.get_dataset(stream_id),
                                 cov.num_timesteps)
Example #35
0
class CodecInterceptor(Interceptor):
    """
    Transforms IonObject <-> dict
    """
    def __init__(self):
        Interceptor.__init__(self)
        self._io_serializer = IonObjectSerializer()
        self._io_deserializer = IonObjectDeserializer(obj_registry=obj_registry)

    def outgoing(self, invocation):
        log.debug("CodecInterceptor.outgoing: %s", invocation)

        log.debug("Payload, pre-transform: %s", invocation.message)
        invocation.message = self._io_serializer.serialize(invocation.message)
        log.debug("Payload, post-transform: %s", invocation.message)

        return invocation

    def incoming(self, invocation):
        log.debug("CodecInterceptor.incoming: %s", invocation)

        payload = invocation.message
        log.debug("Payload, pre-transform: %s", payload)

        # Horrible, hacky workaround for msgpack issue
        # See http://jira.msgpack.org/browse/MSGPACK-15
        #@todo replace this with use_list in msgpack.unpackb !!!
        def convert_tuples_to_lists(obj):
            if isinstance(obj, tuple):
                res = list(obj)
                return res
            return obj

        payload = walk(payload, convert_tuples_to_lists)

        invocation.message = self._io_deserializer.deserialize(payload)
        log.debug("Payload, post-transform: %s", invocation.message)

        return invocation
Example #36
0
    def _gw_execute(self, op, resource_id, cmd, requester=None, timeout=300):

        agent_cmd_params = IonObjectSerializer().serialize(cmd)

        agent_execute_request = {
            "agentRequest": {
                "agentId": resource_id,
                "agentOp": op,
                #            "expiry": 0,
                "timeout": 300,
                "params": {
                    "timeout": timeout,
                    "command": agent_cmd_params
                }
            }
        }

        ret_values = _process_gateway_request(resource_id, op,
                                              agent_execute_request, requester)

        ret_obj = IonObject('AgentCommandResult', ret_values)
        return ret_obj
Example #37
0
    def __init__(self):
        BaseTransformManagementService.__init__(self)

        self.serializer = IonObjectSerializer()
Example #38
0
class MockDB_DataStore(DataStore):
    """
    Data store implementation utilizing in-memory dict of dicts
    to persist documents.
    """
    def __init__(self, datastore_name='prototype'):
        self.datastore_name = datastore_name
        log.debug(
            'Creating in-memory dict of dicts that will simulate data stores')
        self.root = {}

        # serializers
        self._io_serializer = IonObjectSerializer()
        self._io_deserializer = IonObjectDeserializer(
            obj_registry=obj_registry)

    def create_datastore(self, datastore_name="", create_indexes=True):
        if not datastore_name:
            datastore_name = self.datastore_name
        log.info('Creating data store %s' % datastore_name)
        if self.datastore_exists(datastore_name):
            raise BadRequest("Data store with name %s already exists" %
                             datastore_name)
        if datastore_name not in self.root:
            self.root[datastore_name] = {}

    def delete_datastore(self, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        log.info('Deleting data store %s' % datastore_name)
        if datastore_name in self.root:
            del self.root[datastore_name]
        else:
            log.info('Data store %s does not exist' % datastore_name)

    def list_datastores(self):
        log.debug('Listing all data stores')
        dsList = self.root.keys()
        log.debug('Data stores: %s' % str(dsList))
        return dsList

    def info_datastore(self, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        log.debug('Listing information about data store %s' % datastore_name)
        if datastore_name in self.root:
            info = 'Data store exists'
        else:
            raise BadRequest("Data store with name %s does not exist" %
                             datastore_name)
        log.debug('Data store info: %s' % str(info))
        return info

    def datastore_exists(self, datastore_name=""):
        return datastore_name in self.root

    def list_objects(self, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        log.debug('Listing all objects in data store %s' % datastore_name)
        objs = []
        for key, value in self.root[datastore_name].items():
            if key.find('_version_counter') == -1 and key.find(
                    '_version_') == -1:
                objs.append(key)
        log.debug('Objects: %s' % str(objs))
        return objs

    def list_object_revisions(self, object_id, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        log.debug('Listing all versions of object %s/%s' %
                  (datastore_name, str(object_id)))
        res = []
        for key, value in self.root[datastore_name].items():
            if (key.find('_version_counter') == -1
                    and (key.find(object_id + '_version_') == 0)):
                res.append(key)
        log.debug('Versions: %s' % str(res))
        return res

    def create(self, obj, object_id=None, datastore_name=""):
        if not isinstance(obj, IonObjectBase):
            raise BadRequest("Obj param is not instance of IonObjectBase")
        return self.create_doc(self._ion_object_to_persistence_dict(obj),
                               object_id=object_id,
                               datastore_name=datastore_name)

    def create_doc(self, doc, object_id=None, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        if '_id' in doc:
            raise BadRequest("Doc must not have '_id'")
        if '_rev' in doc:
            raise BadRequest("Doc must not have '_rev'")
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name +
                             ' does not exist.')

        if object_id:
            if object_id in datastore_dict:
                raise BadRequest("Object with id %s already exist" % object_id)

        # Assign an id to doc
        doc["_id"] = object_id or uuid4().hex
        object_id = doc["_id"]

        log.debug('Creating new object %s/%s' % (datastore_name, object_id))

        # Create key for version counter entry.  Will be used
        # on update to increment version number easily.
        version_counter_key = '__' + object_id + '_version_counter'
        version_counter = 1

        # Assign initial version to doc
        doc["_rev"] = str(version_counter)

        # Write HEAD, version and version counter dicts
        datastore_dict[object_id] = doc
        datastore_dict[version_counter_key] = version_counter
        datastore_dict[object_id + '_version_' + str(version_counter)] = doc

        # Return list that identifies the id of the new doc and its version
        res = [object_id, str(version_counter)]
        log.debug('Create result: %s' % str(res))
        return res

    def create_mult(self, objects, object_ids=None):
        if any([not isinstance(obj, IonObjectBase) for obj in objects]):
            raise BadRequest("Obj param is not instance of IonObjectBase")
        return self.create_doc_mult(
            [self._ion_object_to_persistence_dict(obj) for obj in objects],
            object_ids)

    def create_doc_mult(self, docs, object_ids=None):
        if any(["_id" in doc for doc in docs]):
            raise BadRequest("Docs must not have '_id'")
        if any(["_rev" in doc for doc in docs]):
            raise BadRequest("Docs must not have '_rev'")
        if object_ids and len(object_ids) != len(docs):
            raise BadRequest("Invalid object_ids")

        # Assign an id to doc (recommended in CouchDB documentation)
        object_ids = object_ids or [uuid4().hex for i in xrange(len(docs))]

        res = []
        for doc, oid in zip(docs, object_ids):
            oid, rev = self.create_doc(doc, oid)
            res.append((True, oid, rev))
        return res

    def read(self, object_id, rev_id="", datastore_name=""):
        if not isinstance(object_id, str):
            raise BadRequest("Object id param is not string")
        doc = self.read_doc(object_id, rev_id, datastore_name)

        # Convert doc into Ion object
        obj = self._persistence_dict_to_ion_object(doc)
        log.debug('Ion object: %s' % str(obj))
        return obj

    def read_doc(self, object_id, rev_id="", datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name +
                             ' does not exist.')

        try:
            key = object_id
            if rev_id != None and rev_id != "":
                log.debug('Reading version %s of object %s/%s' %
                          (str(rev_id), datastore_name, str(object_id)))
                key += '_version_' + str(rev_id)
            else:
                log.debug('Reading head version of object %s/%s' %
                          (datastore_name, str(object_id)))
            doc = datastore_dict[key]
        except KeyError:
            raise NotFound('Object with id %s does not exist.' %
                           str(object_id))
        log.debug('Read result: %s' % str(doc))
        return doc

    def read_mult(self, object_ids, datastore_name=""):
        if any([not isinstance(object_id, str) for object_id in object_ids]):
            raise BadRequest("Object id param is not string")
        docs = self.read_doc_mult(object_ids, datastore_name)
        # Convert docs into Ion objects
        obj_list = [self._persistence_dict_to_ion_object(doc) for doc in docs]
        return obj_list

    def read_doc_mult(self, object_ids, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name +
                             ' does not exist.')

        doc_list = []
        try:
            for object_id in object_ids:
                log.debug('Reading head version of object %s/%s' %
                          (datastore_name, str(object_id)))
                doc = datastore_dict[object_id]

                doc_list.append(doc.copy())
        except KeyError:
            raise NotFound('Object with id %s does not exist.' %
                           str(object_id))
        return doc_list

    def update(self, obj, datastore_name=""):
        if not isinstance(obj, IonObjectBase):
            raise BadRequest("Obj param is not instance of IonObjectBase")
        return self.update_doc(self._ion_object_to_persistence_dict(obj))

    def update_doc(self, doc, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        if '_id' not in doc:
            raise BadRequest("Doc must have '_id'")
        if '_rev' not in doc:
            raise BadRequest("Doc must have '_rev'")
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name +
                             ' does not exist.')

        try:
            object_id = doc["_id"]

            # Find the next doc version
            version_counter_key = '__' + object_id + '_version_counter'
            baseVersion = doc["_rev"]
            version_counter = datastore_dict[version_counter_key] + 1
            if baseVersion != str(version_counter - 1):
                raise Conflict('Object not based on most current version')
        except KeyError:
            raise BadRequest("Object missing required _id and/or _rev values")

        log.debug('Saving new version of object %s/%s' %
                  (datastore_name, doc["_id"]))
        doc["_rev"] = str(version_counter)

        # Overwrite HEAD and version counter dicts, add new version dict
        datastore_dict[object_id] = doc
        datastore_dict[version_counter_key] = version_counter
        datastore_dict[object_id + '_version_' + str(version_counter)] = doc
        res = [object_id, str(version_counter)]
        log.debug('Update result: %s' % str(res))
        return res

    def delete(self, obj, datastore_name=""):
        if not isinstance(obj, IonObjectBase) and not isinstance(obj, str):
            raise BadRequest(
                "Obj param is not instance of IonObjectBase or string id")
        if type(obj) is str:
            return self.delete_doc(obj, datastore_name=datastore_name)
        return self.delete_doc(self._ion_object_to_persistence_dict(obj),
                               datastore_name=datastore_name)

    def delete_doc(self, doc, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name +
                             ' does not exist.')

        if type(doc) is str:
            object_id = doc
        else:
            object_id = doc["_id"]

        log.info('Deleting object %s/%s' % (datastore_name, object_id))
        if object_id in datastore_dict.keys():

            if self._is_in_association(object_id, datastore_name):
                obj = self.read(object_id, "", datastore_name)
                log.warn(
                    "XXXXXXX Attempt to delete object %s that still has associations"
                    % str(obj))
#                raise BadRequest("Object cannot be deleted until associations are broken")

# Find all version dicts and delete them
            for key in datastore_dict.keys():
                if key.find(object_id + '_version_') == 0:
                    del datastore_dict[key]
            # Delete the HEAD dict
            del datastore_dict[object_id]
            # Delete the version counter dict
            del datastore_dict['__' + object_id + '_version_counter']
        else:
            raise NotFound('Object with id ' + object_id + ' does not exist.')
        log.info('Delete result: True')

    def _is_in_association(self, obj_id, datastore_name=""):
        log.debug("_is_in_association(%s)" % obj_id)
        if not obj_id:
            raise BadRequest("Must provide object id")

        if not datastore_name:
            datastore_name = self.datastore_name
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name +
                             ' does not exist.')

        for objname, obj in datastore_dict.iteritems():
            if (objname.find('_version_') > 0) or (not type(obj) is dict):
                continue
            if 'type_' in obj and obj['type_'] == "Association":
                association = obj
                if association["s"] == obj_id or association["o"] == obj_id:
                    log.debug("association found(%s)" % association)
                    return True
        return False

    def find_objects(self,
                     subject,
                     predicate=None,
                     object_type=None,
                     id_only=False):
        log.debug(
            "find_objects(subject=%s, predicate=%s, object_type=%s, id_only=%s"
            % (subject, predicate, object_type, id_only))
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        if not subject:
            raise BadRequest("Must provide subject")
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + self.datastore_name +
                             ' does not exist.')

        if type(subject) is str:
            subject_id = subject
        else:
            if "_id" not in subject:
                raise BadRequest("Object id not available in subject")
            else:
                subject_id = subject._id
        assoc_list = []
        target_id_list = []
        target_list = []
        for objname, obj in datastore_dict.iteritems():
            if (objname.find('_version_') > 0) or (not type(obj) is dict):
                continue
            if 'type_' in obj and obj['type_'] == "Association":
                if obj['s'] == subject_id:
                    if predicate and obj['p'] == predicate:
                        if (object_type and obj['ot']
                                == object_type) or not object_type:
                            assoc_list.append(obj)
                            target_id_list.append(obj['o'])
                            target_list.append(self.read(obj['o']))
                    elif not predicate:
                        assoc_list.append(obj)
                        target_id_list.append(obj['o'])
                        target_list.append(self.read(obj['o']))

        log.debug("find_objects() found %s objects" % (len(target_list)))
        if id_only:
            return (target_id_list, assoc_list)
        else:
            return (target_list, assoc_list)

    def find_subjects(self,
                      subject_type=None,
                      predicate=None,
                      obj=None,
                      id_only=False):
        log.debug(
            "find_subjects(subject_type=%s, predicate=%s, object=%s, id_only=%s"
            % (subject_type, predicate, obj, id_only))
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        if not obj:
            raise BadRequest("Must provide object")
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + self.datastore_name +
                             ' does not exist.')

        if type(obj) is str:
            object_id = obj
        else:
            if "_id" not in obj:
                raise BadRequest("Object id not available in object")
            else:
                object_id = obj._id
        assoc_list = []
        target_id_list = []
        target_list = []
        for objname, obj in datastore_dict.iteritems():
            if (objname.find('_version_') > 0) or (not type(obj) is dict):
                continue
            if 'type_' in obj and obj['type_'] == "Association":
                if obj['o'] == object_id:
                    if predicate and obj['p'] == predicate:
                        if (subject_type and obj['st']
                                == subject_type) or not subject_type:
                            assoc_list.append(obj)
                            target_id_list.append(obj['s'])
                            target_list.append(self.read(obj['s']))
                    elif not predicate:
                        assoc_list.append(obj)
                        target_id_list.append(obj['s'])
                        target_list.append(self.read(obj['s']))

        log.debug("find_subjects() found %s subjects" % (len(target_list)))
        if id_only:
            return (target_id_list, assoc_list)
        else:
            return (target_list, assoc_list)

    def find_associations(self,
                          subject=None,
                          predicate=None,
                          obj=None,
                          assoc_type=None,
                          id_only=True):
        log.debug(
            "find_associations(subject=%s, predicate=%s, object=%s, assoc_type=%s)"
            % (subject, predicate, obj, assoc_type))
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        if subject and obj or predicate:
            pass
        else:
            raise BadRequest("Illegal parameters")
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + self.datastore_name +
                             ' does not exist.')

        if subject and obj:
            if type(subject) is str:
                subject_id = subject
            else:
                if "_id" not in subject:
                    raise BadRequest("Object id not available in subject")
                else:
                    subject_id = subject._id
            if type(obj) is str:
                object_id = obj
            else:
                if "_id" not in obj:
                    raise BadRequest("Object id not available in object")
                else:
                    object_id = obj._id
            target_list = []
            for objname, obj in datastore_dict.iteritems():
                if (objname.find('_version_') > 0) or (not type(obj) is dict):
                    continue
                if 'type_' in obj and obj['type_'] == "Association":
                    if obj['s'] == subject_id and obj['o'] == object_id:
                        if assoc_type:
                            if obj['at'] == assoc_type:
                                target_list.append(obj)
                        else:
                            target_list.append(obj)
        else:
            target_list = []
            for objname, obj in datastore_dict.iteritems():
                if (objname.find('_version_') > 0) or (not type(obj) is dict):
                    continue
                if 'type_' in obj and obj['type_'] == "Association":
                    if obj['p'] == predicate:
                        target_list.append(obj)

        if id_only:
            assocs = [row['_id'] for row in target_list]
        else:
            assocs = [
                self._persistence_dict_to_ion_object(row)
                for row in target_list
            ]
        log.debug("find_associations() found %s associations" % (len(assocs)))
        return assocs

    def find_res_by_type(self, restype, lcstate=None, id_only=False):
        log.debug("find_res_by_type(restype=%s, lcstate=%s)" %
                  (restype, lcstate))
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + self.datastore_name +
                             ' does not exist.')

        assoc_list = []
        target_id_list = []
        target_list = []
        for objname, obj in datastore_dict.iteritems():
            if (objname.find('_version_') > 0) or (not type(obj) is dict):
                continue
            if 'type_' in obj and (obj['type_'] == restype or
                                   (not restype
                                    and obj['type_'] != "Association")):
                if (lcstate and 'lcstate' in obj and obj['lcstate']
                        == lcstate) or not lcstate or not restype:
                    target_id_list.append(obj['_id'])
                    target_list.append(
                        self._persistence_dict_to_ion_object(obj))
                    assoc_list.append([])

        log.debug("find_res_by_type() found %s resources" % (len(target_list)))
        if id_only:
            return (target_id_list, assoc_list)
        else:
            return (target_list, assoc_list)

    def find_res_by_lcstate(self, lcstate, restype=None, id_only=False):
        log.debug("find_res_by_type(lcstate=%s, restype=%s)" %
                  (lcstate, restype))
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + self.datastore_name +
                             ' does not exist.')

        if lcstate in CommonResourceLifeCycleSM.STATE_ALIASES:
            lcstate_match = CommonResourceLifeCycleSM.STATE_ALIASES[lcstate]
        else:
            lcstate_match = [lcstate]
        assoc_list = []
        target_id_list = []
        target_list = []
        for objname, obj in datastore_dict.iteritems():
            if (objname.find('_version_') > 0) or (not type(obj) is dict):
                continue
            if 'lcstate' in obj and obj['lcstate'] in lcstate_match:
                if (restype and obj['type_'] == restype) or not restype:
                    target_id_list.append(obj['_id'])
                    target_list.append(
                        self._persistence_dict_to_ion_object(obj))
                    assoc_list.append([])

        log.debug("find_res_by_lcstate() found %s resources" %
                  (len(target_list)))
        if id_only:
            return (target_id_list, assoc_list)
        else:
            return (target_list, assoc_list)

    def _pass(self):
        pass

    def find_res_by_name(self, name, restype=None, id_only=False):
        log.debug("find_res_by_name(name=%s, restype=%s)" % (name, restype))
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + self.datastore_name +
                             ' does not exist.')

        assoc_list = []
        target_id_list = []
        target_list = []
        for objname, obj in datastore_dict.iteritems():
            if (objname.find('_version_') > 0) or (not type(obj) is dict):
                continue
            if 'name' in obj and obj['name'] == name:
                if (restype and obj['type_'] == restype) or not restype:
                    target_id_list.append(obj['_id'])
                    target_list.append(
                        self._persistence_dict_to_ion_object(obj))
                    assoc_list.append([])

        log.debug("find_res_by_name() found %s resources" % (len(target_list)))
        if id_only:
            return (target_id_list, assoc_list)
        else:
            return (target_list, assoc_list)

    def find_dir_entries(self, qname):
        raise NotImplementedError()

    def _ion_object_to_persistence_dict(self, ion_object):
        if ion_object is None: return None

        obj_dict = self._io_serializer.serialize(ion_object)
        return obj_dict

    def _persistence_dict_to_ion_object(self, obj_dict):
        if obj_dict is None: return None

        ion_object = self._io_deserializer.deserialize(obj_dict)
        return ion_object
class TransformManagementService(BaseTransformManagementService):
    """Provides the main orchestration for stream processing
    subscription, data process definition and computation
    request (scheduling). The transformation service handles content format
    transformation, mediation, qualification, verification and validation
    """
    def __init__(self):
        BaseTransformManagementService.__init__(self)

        self.serializer = IonObjectSerializer()

    def on_start(self):
        super(TransformManagementService,self).on_start()
        restart_flag = self.CFG.get_safe('service.transform_management.restart', False)
        if restart_flag:
            transform_ids, meta = self.clients.resource_registry.find_resources(restype=RT.Transform, id_only=True)
            for transform_id in transform_ids:
                self._restart_transform(transform_id)

    def _restart_transform(self, transform_id):
        transform = self.clients.resource_registry.read(transform_id)
        configuration = transform.configuration
        proc_def_ids,other = self.clients.resource_registry.find_objects(subject=transform_id,predicate=PRED.hasProcessDefinition,id_only=True)

        if len(proc_def_ids) < 1:
            log.warning('Transform did not have a correct process definition.')
            return

        pid = self.clients.process_dispatcher.schedule_process(
            process_definition_id=proc_def_ids[0],
            configuration=configuration
        )

        transform.process_id = pid
        self.clients.resource_registry.update(transform)




    def _strip_types(self, obj):
        if not isinstance(obj, dict):
            return
        for k,v in obj.iteritems():
            if isinstance(v,dict):
                self._strip_types(v)
        if "type_" in obj:
            del obj['type_']


    def create_transform(self,
                         name='',
                         description='',
                         in_subscription_id='',
                         out_streams=None,
                         process_definition_id='',
                         configuration=None):

        """Creates the transform and registers it with the resource registry
        @param process_definition_id The process definition contains the module and class of the process to be spawned
        @param in_subscription_id The subscription id corresponding to the input subscription
        @param out_stream_id The stream id for the output
        @param configuration {}

        @return The transform_id to the transform
        """

        # ------------------------------------------------------------------------------------
        # Resources and Initial Configs
        # ------------------------------------------------------------------------------------
        # Determine Transform Name

        if isinstance(configuration, IonObjectBase):
            configuration = self.serializer.serialize(configuration)
            # strip the type
            self._strip_types(configuration)


        elif not configuration:
            configuration = {}

        # Handle the name uniqueness factor
        res, _ = self.clients.resource_registry.find_resources(name=name, id_only=True)
        if len(res)>0:
            raise BadRequest('The transform resource with name: %s, already exists.' % name)

        transform_name=name

        if not process_definition_id:
            raise NotFound('No process definition was provided')


        # Transform Resource for association management and pid
        transform_res = Transform(name=name, description=description)

        transform_id, _ = self.clients.resource_registry.create(transform_res)

        transform_res = self.clients.resource_registry.read(transform_id)


        # ------------------------------------------------------------------------------------
        # Spawn Configuration and Parameters
        # ------------------------------------------------------------------------------------

        subscription = self.clients.pubsub_management.read_subscription(subscription_id = in_subscription_id)
        listen_name = subscription.exchange_name


        configuration['process'] = dict({
            'name':transform_name,
            'type':'stream_process',
            'listen_name':listen_name,
            'transform_id':transform_id
        })
        if out_streams:
            configuration['process']['publish_streams'] = out_streams
            stream_ids = list(v for k,v in out_streams.iteritems())
        else:
            stream_ids = []
        transform_res.configuration = configuration


        # ------------------------------------------------------------------------------------
        # Process Spawning
        # ------------------------------------------------------------------------------------
        # Spawn the process
        pid = self.clients.process_dispatcher.schedule_process(
            process_definition_id=process_definition_id,
            configuration=configuration
        )
        transform_res.process_id =  pid

        # ------------------------------------------------------------------------------------
        # Handle Resources
        # ------------------------------------------------------------------------------------

        self.clients.resource_registry.update(transform_res)

        self.clients.resource_registry.create_association(transform_id,PRED.hasProcessDefinition,process_definition_id)
        self.clients.resource_registry.create_association(transform_id,PRED.hasSubscription,in_subscription_id)


        for stream_id in stream_ids:
            self.clients.resource_registry.create_association(transform_id,PRED.hasOutStream,stream_id)

        return transform_id



    def update_transform(self, configuration=None):
        """Not currently possible to update a transform
        @throws NotImplementedError
        """
        raise NotImplementedError


    def read_transform(self, transform_id=''):
        """Reads a transform from the resource registry
        @param transform_id The unique transform identifier
        @return Transform resource
        @throws NotFound when transform doesn't exist
        """

        log.debug('(%s): Reading Transform: %s' % (self.name,transform_id))
        transform = self.clients.resource_registry.read(object_id=transform_id,rev_id='')
        return transform
        

    def delete_transform(self, transform_id=''):
        """Deletes and stops an existing transform process
        @param transform_id The unique transform identifier
        @throws NotFound when a transform doesn't exist
        """

        # get the transform resource (also verifies it's existence before continuing)
        transform_res = self.read_transform(transform_id=transform_id)
        pid = transform_res.process_id

        # get the resources
        process_definition_ids, _ = self.clients.resource_registry.find_objects(transform_id,
                                PRED.hasProcessDefinition, RT.ProcessDefinition, True)
        in_subscription_ids, _ = self.clients.resource_registry.find_objects(transform_id,
                                PRED.hasSubscription, RT.Subscription, True)
        out_stream_ids, _ = self.clients.resource_registry.find_objects(transform_id,
                                PRED.hasOutStream, RT.Stream, True)

        # build a list of all the ids above
        id_list = process_definition_ids + in_subscription_ids + out_stream_ids

        # stop the transform process

        #@note: terminate_process does not raise or confirm if there termination was successful or not

        self.clients.process_dispatcher.cancel_process(pid)

        log.debug('(%s): Terminated Process (%s)' % (self.name,pid))


        # delete the associations
        for predicate in [PRED.hasProcessDefinition, PRED.hasSubscription, PRED.hasOutStream]:
            associations = self.clients.resource_registry.find_associations(transform_id,predicate)
            for association in associations:
                self.clients.resource_registry.delete_association(association)


        #@todo: should I delete the resources, or should dpms?

        # iterate through the list and delete each
        #for res_id in id_list:
        #    self.clients.resource_registry.delete(res_id)

        self.clients.resource_registry.delete(transform_id)
        return True



# ---------------------------------------------------------------------------

    def execute_transform(self, process_definition_id='', data=None, configuration=None):
        process_definition = self.clients.process_dispatcher.read_process_definition(process_definition_id)
        module = process_definition.executable.get('module')
        cls = process_definition.executable.get('class')



        module = __import__(module, fromlist=[cls])
        cls = getattr(module,cls)
        instance = cls()

        result = gevent.event.AsyncResult()
        def execute(data):
            result.set(instance.execute(data))

        g = gevent.greenlet.Greenlet(execute, data)
        g.start()

        retval = result.get(timeout=10)


        return retval


    def activate_transform(self, transform_id=''):
        """Activate the subscription to bind (start) the transform
        @param transform_id
        @retval True on success
        @throws NotFound if either the subscription doesn't exist or the transform object doesn't exist.
        """
        subscription_ids, _ = self.clients.resource_registry.find_objects(transform_id,
                                                            PRED.hasSubscription, RT.Subscription, True)
        if len(subscription_ids) < 1:
            raise NotFound

        for subscription_id in subscription_ids:
            self.clients.pubsub_management.activate_subscription(subscription_id)


        return True

    def deactivate_transform(self, transform_id=''):
        """Decativates the subscriptions for the specified transform
        @param transform_id
        @retval True on success
        @throws NotFound if either the subscription doesn't exist or the transform object doesn't exist
        """
        subscription_ids, _ = self.clients.resource_registry.find_objects(transform_id,
                                                            PRED.hasSubscription, RT.Subscription, True)
        if len(subscription_ids) < 1:
            raise NotFound

        for subscription_id in subscription_ids:
            self.clients.pubsub_management.deactivate_subscription(subscription_id)

        return True



    def schedule_transform(self, transform_id=''):
        """Not currently implemented
        @throws NotImplementedError
        """
        raise NotImplementedError
Example #40
0
 def __init__(self, id_factory):
     self.encoder = IonObjectSerializer()
     self.decoder = IonObjectDeserializer(obj_registry=get_obj_registry())
     self.id_factory = id_factory
Example #41
0
class PostgresPyonDataStore(PostgresDataStore):
    """
    Base class common to both CouchDB and Couchbase datastores.
    """

    def __init__(self, datastore_name=None, config=None, scope=None, profile=None):
        """
        @param datastore_name  Name of datastore within server. May be scoped to sysname
        @param config  A server config dict with connection params
        @param scope  Prefix for the datastore name (e.g. sysname) to separate multiple systems
        """

        PostgresDataStore.__init__(self, datastore_name=datastore_name,
                                     config=config or CFG.get_safe("server.postgresql"),
                                     profile=profile or DataStore.DS_PROFILE.BASIC,
                                     scope=scope)

        # IonObject Serializers
        self._io_serializer = IonObjectSerializer()
        self._io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry())

    # -------------------------------------------------------------------------
    # Couch document operations

    def create(self, obj, object_id=None, attachments=None, datastore_name=""):
        """
        Converts ion objects to python dictionary before persisting them using the optional
        suggested identifier and creates attachments to the object.
        Returns an identifier and revision number of the object
        """
        if not isinstance(obj, IonObjectBase):
            raise BadRequest("Obj param is not instance of IonObjectBase")

        return self.create_doc(self._ion_object_to_persistence_dict(obj),
                                   object_id=object_id, datastore_name=datastore_name,
                                   attachments=attachments)

    def create_mult(self, objects, object_ids=None, allow_ids=None):
        if any([not isinstance(obj, IonObjectBase) for obj in objects]):
            raise BadRequest("Obj param is not instance of IonObjectBase")

        return self.create_doc_mult([self._ion_object_to_persistence_dict(obj) for obj in objects], object_ids)


    def update(self, obj, datastore_name=""):
        if not isinstance(obj, IonObjectBase):
            raise BadRequest("Obj param is not instance of IonObjectBase")

        return self.update_doc(self._ion_object_to_persistence_dict(obj))

    def update_mult(self, objects):
        if any([not isinstance(obj, IonObjectBase) for obj in objects]):
            raise BadRequest("Obj param is not instance of IonObjectBase")

        return self.update_doc_mult([self._ion_object_to_persistence_dict(obj) for obj in objects])


    def read(self, object_id, rev_id="", datastore_name="", object_type=None):
        if not isinstance(object_id, str):
            raise BadRequest("Object id param is not string")

        doc = self.read_doc(object_id, rev_id, datastore_name=datastore_name, object_type=object_type)
        obj = self._persistence_dict_to_ion_object(doc)

        return obj

    def read_mult(self, object_ids, datastore_name="", strict=True):
        if any([not isinstance(object_id, str) for object_id in object_ids]):
            raise BadRequest("Object ids are not string: %s" % str(object_ids))

        docs = self.read_doc_mult(object_ids, datastore_name, strict=strict)
        obj_list = [self._persistence_dict_to_ion_object(doc) if doc is not None else None for doc in docs]

        return obj_list

    def delete(self, obj, datastore_name="", object_type=None):
        if not isinstance(obj, IonObjectBase) and not isinstance(obj, str):
            raise BadRequest("Obj param is not instance of IonObjectBase or string id")
        if type(obj) is str:
            self.delete_doc(obj, datastore_name=datastore_name, object_type=object_type)
        else:
            if '_id' not in obj:
                raise BadRequest("Doc must have '_id'")
            if '_rev' not in obj:
                raise BadRequest("Doc must have '_rev'")
            self.delete_doc(self._ion_object_to_persistence_dict(obj),
                            datastore_name=datastore_name, object_type=object_type)

    def delete_mult(self, object_ids, datastore_name=None):
        return self.delete_doc_mult(object_ids, datastore_name)

    # -------------------------------------------------------------------------
    # View operations

    def find_objects_mult(self, subjects, id_only=False):
        """
        Returns a list of associations for a given list of subjects
        """
        #ds, datastore_name = self._get_datastore()
        #validate_is_instance(subjects, list, 'subjects is not a list of resource_ids')
        #view_args = dict(keys=subjects, include_docs=True)
        #results = self.query_view(self._get_view_name("association", "by_bulk"), view_args)
        #ids = [i['value'] for i in results]
        #assocs = [i['doc'] for i in results]
        #self._count(find_assocs_mult_call=1, find_assocs_mult_obj=len(ids))
        #if id_only:
        #    return ids, assocs
        #else:
        #    return self.read_mult(ids), assocs

        # TODO: Port this implementation to Postgres single query
        res_list = [[], []]
        if not subjects:
            return res_list
        for sub in subjects:
            res_ids, res_assocs = self.find_objects(subject=sub, id_only=id_only)
            res_list[0].extend(res_ids)
            res_list[1].extend(res_assocs)
        return res_list

    def find_subjects_mult(self, objects, id_only=False):
        """
        Returns a list of associations for a given list of objects
        """
        #ds, datastore_name = self._get_datastore()
        #validate_is_instance(objects, list, 'objects is not a list of resource_ids')
        #view_args = dict(keys=objects, include_docs=True)
        #results = self.query_view(self._get_view_name("association", "by_subject_bulk"), view_args)
        #ids = [i['value'] for i in results]
        #assocs = [i['doc'] for i in results]
        #self._count(find_assocs_mult_call=1, find_assocs_mult_obj=len(ids))
        #if id_only:
        #    return ids, assocs
        #else:
        #    return self.read_mult(ids), assocs

        # TODO: Port this implementation to Postgres single query
        res_list = [[], []]
        if not objects:
            return res_list
        for obj in objects:
            res_ids, res_assocs = self.find_subjects(obj=obj, id_only=id_only)
            res_list[0].extend(res_ids)
            res_list[1].extend(res_assocs)
        return res_list

    def find_objects(self, subject, predicate=None, object_type=None, id_only=False, **kwargs):
        #log.debug("find_objects(subject=%s, predicate=%s, object_type=%s, id_only=%s", subject, predicate, object_type, id_only)

        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        if not subject:
            raise BadRequest("Must provide subject")
        if object_type and not predicate:
            raise BadRequest("Cannot provide object type without a predicate")

        if type(subject) is str:
            subject_id = subject
        else:
            if "_id" not in subject:
                raise BadRequest("Object id not available in subject")
            else:
                subject_id = subject._id

        qual_ds_name = self._get_datastore_name()
        view_args = self._get_view_args(kwargs)

        if id_only:
            query = "SELECT o, doc FROM %(dsa)s WHERE retired<>true " % dict(dsa=qual_ds_name+"_assoc")
        else:
            query = "SELECT %(ds)s.doc, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.o=%(ds)s.id " % dict(ds=qual_ds_name, dsa=qual_ds_name+"_assoc")
        query_args = dict(s=subject_id, ot=object_type, p=predicate)

        query_clause = "AND s=%(s)s"
        if predicate:
            query_clause += " AND p=%(p)s"
            if object_type:
                query_clause += " AND ot=%(ot)s"

        extra_clause = view_args.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        obj_assocs = [self._persistence_dict_to_ion_object(row[-1]) for row in rows]
        #log.debug("find_objects() found %s objects", len(obj_assocs))
        if id_only:
            res_ids = [self._prep_id(row[0]) for row in rows]
            return res_ids, obj_assocs
        else:
            res_objs = [self._persistence_dict_to_ion_object(row[0]) for row in rows]
            return res_objs, obj_assocs

    def find_subjects(self, subject_type=None, predicate=None, obj=None, id_only=False, **kwargs):
        #log.debug("find_subjects(subject_type=%s, predicate=%s, object=%s, id_only=%s", subject_type, predicate, obj, id_only)

        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        if not obj:
            raise BadRequest("Must provide object")
        if subject_type and not predicate:
            raise BadRequest("Cannot provide subject type without a predicate")

        if type(obj) is str:
            object_id = obj
        else:
            if "_id" not in obj:
                raise BadRequest("Object id not available in object")
            else:
                object_id = obj._id

        qual_ds_name = self._get_datastore_name()
        view_args = self._get_view_args(kwargs)

        if id_only:
            query = "SELECT s, doc FROM %(dsa)s WHERE retired<>true " % dict(dsa=qual_ds_name+"_assoc")
        else:
            query = "SELECT %(ds)s.doc, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.s=%(ds)s.id " % dict(ds=qual_ds_name, dsa=qual_ds_name+"_assoc")
        query_args = dict(o=object_id, st=subject_type, p=predicate)

        query_clause = "AND o=%(o)s"
        if predicate:
            query_clause += " AND p=%(p)s"
            if subject_type:
                query_clause += " AND st=%(st)s"

        extra_clause = view_args.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        obj_assocs = [self._persistence_dict_to_ion_object(row[-1]) for row in rows]
        #log.debug("find_subjects() found %s subjects", len(obj_assocs))
        if id_only:
            res_ids = [self._prep_id(row[0]) for row in rows]
            return res_ids, obj_assocs
        else:
            res_objs = [self._persistence_dict_to_ion_object(row[0]) for row in rows]
            return res_objs, obj_assocs

    def find_associations(self, subject=None, predicate=None, obj=None, assoc_type=None, id_only=True, anyside=None, **kwargs):
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        if not (subject or obj or predicate or anyside):
            raise BadRequest("Illegal parameters: No S/P/O or anyside")
            #if assoc_type:
        #    raise BadRequest("Illegal parameters: assoc_type deprecated")
        if anyside and (subject or obj):
            raise BadRequest("Illegal parameters: anyside cannot be combined with S/O")
        if anyside and predicate and type(anyside) in (list, tuple):
            raise BadRequest("Illegal parameters: anyside list cannot be combined with P")

        subject_id, object_id, anyside_ids = None, None, None
        if subject:
            if type(subject) is str:
                subject_id = subject
            else:
                if "_id" not in subject:
                    raise BadRequest("Object id not available in subject")
                else:
                    subject_id = subject._id
        if obj:
            if type(obj) is str:
                object_id = obj
            else:
                if "_id" not in obj:
                    raise BadRequest("Object id not available in object")
                else:
                    object_id = obj._id
        if anyside:
            if type(anyside) is str:
                anyside_ids = [anyside]
            elif type(anyside) in (list, tuple):
                if not all([type(o) in (str, list, tuple) for o in anyside]):
                    raise BadRequest("List of object ids or (object id, predicate) expected")
                anyside_ids = anyside
            else:
                if "_id" not in anyside:
                    raise BadRequest("Object id not available in anyside")
                else:
                    anyside_ids = [anyside._id]

        #log.debug("find_associations(subject=%s, predicate=%s, object=%s, anyside=%s)", subject_id, predicate, object_id, anyside_ids)

        qual_ds_name = self._get_datastore_name()
        table = qual_ds_name + "_assoc"
        view_args = self._get_view_args(kwargs)

        if id_only:
            query = "SELECT id FROM " + table
        else:
            query = "SELECT id, doc, s, st, p, o, ot FROM " + table
        query_clause = " WHERE retired<>true AND "
        query_args = dict(s=subject_id, o=object_id, p=predicate)

        if subject and obj:
            query_clause += "s=%(s)s AND o=%(o)s"
            if predicate:
                query_clause += " AND p=%(p)s"
        elif subject:
            query_clause += "s=%(s)s"
            if predicate:
                query_clause += " AND p=%(p)s"
        elif obj:
            query_clause += "o=%(o)s"
            if predicate:
                query_clause += " AND p=%(p)s"
        elif anyside:
            if predicate:
                query_clause += "p=%(p)s AND (s=%(any)s OR o=%(any)s)"
                query_args["any"] = anyside
            elif type(anyside_ids[0]) is str:
                # keys are IDs of resources
                for i, key in enumerate(anyside_ids):
                    if i > 0:
                        query_clause += " OR "
                    argname = "id%s" % i
                    query_args[argname] = key
                    query_clause += "(s=%("+argname+")s OR o=%("+argname+")s)"
            else:
                # keys are tuples of (id, pred)
                for i, (key, pred) in enumerate(anyside_ids):
                    if i > 0:
                        query_clause += " OR "
                    argname_id = "id%s" % i
                    argname_p = "p%s" % i
                    query_args[argname_id] = key
                    query_args[argname_p] = pred
                    query_clause += "(p=%("+argname_p+")s AND (s=%("+argname_id+")s OR o=%("+argname_id+")s))"

        elif predicate:
            if predicate == "*":
                query_clause += "p is not null"
            else:
                query_clause += "p=%(p)s"
        else:
            raise BadRequest("Illegal arguments")

        extra_clause = view_args.get("extra_clause", "")
        sql = query + query_clause + extra_clause
        #print "find_associations(): SQL=", sql, query_args
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(sql, query_args)
            rows = cur.fetchall()

        if id_only:
            assocs = [self._prep_id(row[0]) for row in rows]
        else:
            assocs = [self._persistence_dict_to_ion_object(row[1]) for row in rows]
        #log.debug("find_associations() found %s associations", len(assocs))

        return assocs

    def _prepare_find_return(self, rows, res_assocs=None, id_only=True, **kwargs):
        if id_only:
            res_ids = [self._prep_id(row[0]) for row in rows]
            return res_ids, res_assocs
        else:
            res_docs = [self._persistence_dict_to_ion_object(row[-1]) for row in rows]
            return res_docs, res_assocs

    def find_resources(self, restype="", lcstate="", name="", id_only=True):
        return self.find_resources_ext(restype=restype, lcstate=lcstate, name=name, id_only=id_only)

    def find_resources_ext(self, restype="", lcstate="", name="",
                           keyword=None, nested_type=None,
                           attr_name=None, attr_value=None, alt_id=None, alt_id_ns=None,
                           limit=None, skip=None, descending=None, id_only=True):
        filter_kwargs = self._get_view_args(dict(limit=limit, skip=skip, descending=descending))
        if name:
            if lcstate:
                raise BadRequest("find by name does not support lcstate")
            return self.find_res_by_name(name, restype, id_only, filter=filter_kwargs)
        elif keyword:
            return self.find_res_by_keyword(keyword, restype, id_only, filter=filter_kwargs)
        elif alt_id or alt_id_ns:
            return self.find_res_by_alternative_id(alt_id, alt_id_ns, id_only, filter=filter_kwargs)
        elif nested_type:
            return self.find_res_by_nested_type(nested_type, restype, id_only, filter=filter_kwargs)
        elif restype and attr_name:
            return self.find_res_by_attribute(restype, attr_name, attr_value, id_only=id_only, filter=filter_kwargs)
        elif restype and lcstate:
            return self.find_res_by_lcstate(lcstate, restype, id_only, filter=filter_kwargs)
        elif restype:
            return self.find_res_by_type(restype, lcstate, id_only, filter=filter_kwargs)
        elif lcstate:
            return self.find_res_by_lcstate(lcstate, restype, id_only, filter=filter_kwargs)
        elif not restype and not lcstate and not name:
            return self.find_res_by_type(None, None, id_only, filter=filter_kwargs)

    def find_res_by_type(self, restype, lcstate=None, id_only=False, filter=None):
        log.debug("find_res_by_type(restype=%s, lcstate=%s)", restype, lcstate)
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        if lcstate:
            raise BadRequest('lcstate not supported anymore in find_res_by_type')

        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, name, type_, lcstate FROM " + qual_ds_name
        else:
            query = "SELECT id, name, type_, lcstate, doc FROM " + qual_ds_name
        query_clause = " WHERE lcstate<>'RETIRED' "
        query_args = dict(type_=restype, lcstate=lcstate)

        if restype:
            query_clause += "AND type_=%(type_)s"
        else:
            # Returns ALL documents, only limited by filter
            query_clause = ""

        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [dict(id=self._prep_id(row[0]), name=row[1], type=row[2]) for row in rows]
        log.debug("find_res_by_type() found %s objects", len(res_assocs))
        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_lcstate(self, lcstate, restype=None, id_only=False, filter=None):
        log.debug("find_res_by_lcstate(lcstate=%s, restype=%s)", lcstate, restype)
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        if '_' in lcstate:
            log.warn("Search for compound lcstate restricted to maturity: %s", lcstate)
            lcstate,_ = lcstate.split("_", 1)
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, name, type_, lcstate, availability FROM " + qual_ds_name
        else:
            query = "SELECT id, name, type_, lcstate, availability, doc FROM " + qual_ds_name
        query_clause = " WHERE "
        query_args = dict(type_=restype, lcstate=lcstate)

        is_maturity = lcstate not in CommonResourceLifeCycleSM.AVAILABILITY
        if is_maturity:
            query_clause += "lcstate=%(lcstate)s"
        else:
            query_clause += "availability=%(lcstate)s"

        if restype:
            query_clause += " AND type_=%(type_)s"

        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [dict(id=self._prep_id(row[0]), name=row[1], type=row[2], lcstate=row[3] if is_maturity else row[4]) for row in rows]
        log.debug("find_res_by_lcstate() found %s objects", len(res_assocs))
        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_name(self, name, restype=None, id_only=False, filter=None):
        log.debug("find_res_by_name(name=%s, restype=%s)", name, restype)
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, name, type_ FROM " + qual_ds_name
        else:
            query = "SELECT id, name, type_, doc FROM " + qual_ds_name
        query_clause = " WHERE lcstate<>'RETIRED' "
        query_args = dict(name=name, type_=restype)

        query_clause += "AND name=%(name)s"
        if restype:
            query_clause += " AND type_=%(type_)s"

        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [dict(id=self._prep_id(row[0]), name=row[1], type=row[2]) for row in rows]
        log.debug("find_res_by_name() found %s objects", len(res_assocs))

        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_keyword(self, keyword, restype=None, id_only=False, filter=None):
        log.debug("find_res_by_keyword(keyword=%s, restype=%s)", keyword, restype)
        if not keyword or type(keyword) is not str:
            raise BadRequest('Argument keyword illegal')
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, type_ FROM " + qual_ds_name
        else:
            query = "SELECT id, type_, doc FROM " + qual_ds_name
        query_clause = " WHERE lcstate<>'RETIRED' "
        query_args = dict(type_=restype, kw=[keyword])

        query_clause += "AND %(kw)s <@ json_keywords(doc)"
        if restype:
            query_clause += " AND type_=%(type_)s"

        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [dict(id=self._prep_id(row[0]), type=row[1], keyword=keyword) for row in rows]
        log.debug("find_res_by_keyword() found %s objects", len(res_assocs))
        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_nested_type(self, nested_type, restype=None, id_only=False, filter=None):
        log.debug("find_res_by_nested_type(nested_type=%s, restype=%s)", nested_type, restype)
        if not nested_type or type(nested_type) is not str:
            raise BadRequest('Argument nested_type illegal')
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, type_ FROM " + qual_ds_name
        else:
            query = "SELECT id, type_, doc FROM " + qual_ds_name
        query_clause = " WHERE lcstate<>'RETIRED' "
        query_args = dict(type_=restype, nest=[nested_type])

        query_clause += "AND %(nest)s <@ json_nested(doc)"
        if restype:
            query_clause += " AND type_=%(type_)s"

        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [dict(id=self._prep_id(row[0]), type=row[1], nested_type=nested_type) for row in rows]
        log.debug("find_res_by_nested_type() found %s objects", len(res_assocs))
        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_attribute(self, restype, attr_name, attr_value=None, id_only=False, filter=None):
        log.debug("find_res_by_attribute(restype=%s, attr_name=%s, attr_value=%s)", restype, attr_name, attr_value)
        if not attr_name or type(attr_name) is not str:
            raise BadRequest('Argument attr_name illegal')
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, type_, json_specialattr(doc) FROM " + qual_ds_name
        else:
            query = "SELECT id, type_, json_specialattr(doc), doc FROM " + qual_ds_name
        query_clause = " WHERE lcstate<>'RETIRED' "
        query_args = dict(type_=restype, att=attr_name, val=attr_value)

        if attr_value is not None:
            query_clause += "AND json_specialattr(doc)=%(spc)s"
            query_args['spc'] = "%s=%s" % (attr_name, attr_value)
        else:
            query_clause += "AND json_specialattr(doc) LIKE %(spc)s"
            query_args['spc'] = "%s=%%" % (attr_name, )
        if restype:
            query_clause += " AND type_=%(type_)s"

        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [dict(id=self._prep_id(row[0]), type=row[1], attr_name=attr_name, attr_value=row[2].split("=",1)[-1]) for row in rows]
        log.debug("find_res_by_attribute() found %s objects", len(res_assocs))
        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_alternative_id(self, alt_id=None, alt_id_ns=None, id_only=False, filter=None):
        log.debug("find_res_by_alternative_id(restype=%s, alt_id_ns=%s)", alt_id, alt_id_ns)
        if alt_id and type(alt_id) is not str:
            raise BadRequest('Argument alt_id illegal')
        if alt_id_ns and type(alt_id_ns) is not str:
            raise BadRequest('Argument alt_id_ns illegal')
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        #if id_only:
        #    query = "SELECT id, x[1], x[2] FROM (SELECT json_altids(doc) as x, * FROM " + qual_ds_name + ") AS A"
        #else:
        #    query = "SELECT id, x[1], x[2], doc FROM (SELECT json_altids(doc) as x, * FROM " + qual_ds_name + ") AS A"
        #query_args = dict(aid=alt_id, ans=alt_id_ns)
        #query_clause = " WHERE lcstate<>'RETIRED' "
        #
        #if not alt_id and not alt_id_ns:
        #    query_clause += " "
        #elif alt_id and not alt_id_ns:
        #    query_clause += " AND x[2]=%(aid)s"
        #elif alt_id_ns and not alt_id:
        #    query_clause += " AND x[1]=%(ans)s"
        #else:
        #    query_clause += " AND x[1]=%(ans)s AND x[2]=%(aid)s"

        query = "SELECT id, type_, doc FROM " + qual_ds_name
        query_args = dict(aid=[alt_id], ans=[alt_id_ns])
        query_clause = " WHERE lcstate<>'RETIRED' "

        if not alt_id and not alt_id_ns:
            query_clause += "AND json_altids_ns(doc) is not null"
        elif alt_id and not alt_id_ns:
            query_clause += "AND %(aid)s <@ json_altids_id(doc)"
        elif alt_id_ns and not alt_id:
            query_clause += "AND %(ans)s <@ json_altids_ns(doc)"
        else:
            query_clause += "AND %(aid)s <@ json_altids_id(doc) AND %(ans)s <@ json_altids_ns(doc)"

        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        # Need to fake the return format of the Couch view for alt_ids. One record per alt_id, not one per resource.
        res_assocs = []
        res_rows = []
        for row in rows:
            doc_id = self._prep_id(row[0])
            doc = row[-1]
            for aid in doc.get("alt_ids", []):
                aid_parts = aid.split(":", 1)
                aid_ns = aid_parts[0] if len(aid_parts)>1 else "_"
                aid_id = aid_parts[-1]
                if alt_id_ns and alt_id:
                    if alt_id_ns == aid_ns and alt_id == aid_id:
                        res_assocs.append(dict(id=doc_id, alt_id_ns=aid_ns, alt_id=aid_id))
                        res_rows.append((doc_id, doc))
                elif (not alt_id_ns and not alt_id) or (alt_id_ns and alt_id_ns == aid_ns) or (alt_id and alt_id == aid_id):
                    res_assocs.append(dict(id=doc_id, alt_id_ns=aid_ns, alt_id=aid_id))
                    res_rows.append((doc_id, doc))

        log.debug("find_res_by_alternative_id() found %s objects", len(res_assocs))
        return self._prepare_find_return(res_rows, res_assocs, id_only=id_only)

    def find_by_view(self, design_name, view_name, key=None, keys=None, start_key=None, end_key=None,
                     id_only=True, convert_doc=True, **kwargs):
        """
        Generic find function using a defined index
        @param design_name  design document
        @param view_name  view name
        @param key  specific key to find
        @param keys  list of keys to find
        @param start_key  find range start value
        @param end_key  find range end value
        @param id_only  if True, the 4th element of each triple is the document
        @param convert_doc  if True, make IonObject out of doc
        @retval Returns a list of 3-tuples: (document id, index key, index value or document)
        """
        res_rows = self.find_docs_by_view(design_name=design_name, view_name=view_name, key=key, keys=keys,
                                          start_key=start_key, end_key=end_key, id_only=id_only, **kwargs)

        res_rows = [(rid, key,
                     self._persistence_dict_to_ion_object(doc) if convert_doc and isinstance(doc, dict) else doc)
                    for rid, key, doc in res_rows]

        log.debug("find_by_view() found %s objects" % (len(res_rows)))
        return res_rows

    def find_resources_mult(self, query):
        """
        Find resources given a datastore query expression dict.
        @param query  a dict representation of a datastore query
        @retval  list of resource ids or resource objects matching query (dependent on id_only value)
        """
        qual_ds_name = self._get_datastore_name()

        pqb = PostgresQueryBuilder(query, qual_ds_name)
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(pqb.get_query(), pqb.get_values())
            rows = cur.fetchall()
            log.info("find_resources_mult() QUERY: %s (%s rows)", cur.query, cur.rowcount)

        id_only = query["query_args"].get("id_only", True)
        if id_only:
            res_ids = [self._prep_id(row[0]) for row in rows]
            return res_ids
        else:
            res_docs = [self._persistence_dict_to_ion_object(row[-1]) for row in rows]
            return res_docs

    # -------------------------------------------------------------------------
    # Internal operations

    def _ion_object_to_persistence_dict(self, ion_object):
        if ion_object is None:
            return None

        obj_dict = self._io_serializer.serialize(ion_object)
        return obj_dict

    def _persistence_dict_to_ion_object(self, obj_dict):
        if obj_dict is None:
            return None

        ion_object = self._io_deserializer.deserialize(obj_dict)
        return ion_object
Example #42
0
class TransformManagementService(BaseTransformManagementService):
    """Provides the main orchestration for stream processing
    subscription, data process definition and computation
    request (scheduling). The transformation service handles content format
    transformation, mediation, qualification, verification and validation
    """
    def __init__(self):
        BaseTransformManagementService.__init__(self)

        self.serializer = IonObjectSerializer()

    def on_start(self):
        super(TransformManagementService, self).on_start()

    def _strip_types(self, obj):
        if not isinstance(obj, dict):
            return
        for k, v in obj.iteritems():
            if isinstance(v, dict):
                self._strip_types(v)
        if "type_" in obj:
            del obj['type_']

    def create_transform(self,
                         name='',
                         description='',
                         in_subscription_id='',
                         out_streams=None,
                         process_definition_id='',
                         configuration=None):
        """Creates the transform and registers it with the resource registry
        @param process_definition_id The process definition contains the module and class of the process to be spawned
        @param in_subscription_id The subscription id corresponding to the input subscription
        @param out_stream_id The stream id for the output
        @param configuration {}

        @return The transform_id to the transform
        """

        # ------------------------------------------------------------------------------------
        # Resources and Initial Configs
        # ------------------------------------------------------------------------------------
        # Determine Transform Name

        if isinstance(configuration, IonObjectBase):
            configuration = self.serializer.serialize(configuration)
            # strip the type
            self._strip_types(configuration)

        elif not configuration:
            configuration = {}

        # Handle the name uniqueness factor
        res, _ = self.clients.resource_registry.find_resources(name=name,
                                                               id_only=True)
        if len(res) > 0:
            raise BadRequest(
                'The transform resource with name: %s, already exists.' % name)

        transform_name = name

        if not process_definition_id:
            raise NotFound('No process definition was provided')

        # Transform Resource for association management and pid
        transform_res = Transform(name=name, description=description)

        transform_id, _ = self.clients.resource_registry.create(transform_res)

        transform_res = self.clients.resource_registry.read(transform_id)

        # ------------------------------------------------------------------------------------
        # Spawn Configuration and Parameters
        # ------------------------------------------------------------------------------------

        subscription = self.clients.pubsub_management.read_subscription(
            subscription_id=in_subscription_id)
        listen_name = subscription.exchange_name

        configuration['process'] = dict({
            'name': transform_name,
            'type': 'stream_process',
            'listen_name': listen_name,
            'transform_id': transform_id
        })
        if out_streams:
            configuration['process']['publish_streams'] = out_streams
            stream_ids = list(v for k, v in out_streams.iteritems())
        else:
            stream_ids = []
        transform_res.configuration = configuration

        # ------------------------------------------------------------------------------------
        # Process Spawning
        # ------------------------------------------------------------------------------------
        # Spawn the process
        pid = self.clients.process_dispatcher.schedule_process(
            process_definition_id=process_definition_id,
            configuration=configuration)
        transform_res.process_id = pid

        # ------------------------------------------------------------------------------------
        # Handle Resources
        # ------------------------------------------------------------------------------------

        self.clients.resource_registry.update(transform_res)

        self.clients.resource_registry.create_association(
            transform_id, PRED.hasProcessDefinition, process_definition_id)
        self.clients.resource_registry.create_association(
            transform_id, PRED.hasSubscription, in_subscription_id)

        for stream_id in stream_ids:
            self.clients.resource_registry.create_association(
                transform_id, PRED.hasOutStream, stream_id)

        return transform_id

    def update_transform(self, configuration=None):
        """Not currently possible to update a transform
        @throws NotImplementedError
        """
        raise NotImplementedError

    def read_transform(self, transform_id=''):
        """Reads a transform from the resource registry
        @param transform_id The unique transform identifier
        @return Transform resource
        @throws NotFound when transform doesn't exist
        """

        log.debug('(%s): Reading Transform: %s' % (self.name, transform_id))
        transform = self.clients.resource_registry.read(object_id=transform_id,
                                                        rev_id='')
        return transform

    def delete_transform(self, transform_id=''):
        """Deletes and stops an existing transform process
        @param transform_id The unique transform identifier
        @throws NotFound when a transform doesn't exist
        """

        # get the transform resource (also verifies it's existence before continuing)
        transform_res = self.read_transform(transform_id=transform_id)
        pid = transform_res.process_id

        # get the resources
        process_definition_ids, _ = self.clients.resource_registry.find_objects(
            transform_id, PRED.hasProcessDefinition, RT.ProcessDefinition,
            True)
        in_subscription_ids, _ = self.clients.resource_registry.find_objects(
            transform_id, PRED.hasSubscription, RT.Subscription, True)
        out_stream_ids, _ = self.clients.resource_registry.find_objects(
            transform_id, PRED.hasOutStream, RT.Stream, True)

        # build a list of all the ids above
        id_list = process_definition_ids + in_subscription_ids + out_stream_ids

        # stop the transform process

        #@note: terminate_process does not raise or confirm if there termination was successful or not

        self.clients.process_dispatcher.cancel_process(pid)

        log.debug('(%s): Terminated Process (%s)' % (self.name, pid))

        # delete the associations
        for predicate in [
                PRED.hasProcessDefinition, PRED.hasSubscription,
                PRED.hasOutStream
        ]:
            associations = self.clients.resource_registry.find_associations(
                transform_id, predicate)
            for association in associations:
                self.clients.resource_registry.delete_association(association)

        #@todo: should I delete the resources, or should dpms?

        # iterate through the list and delete each
        #for res_id in id_list:
        #    self.clients.resource_registry.delete(res_id)

        self.clients.resource_registry.delete(transform_id)
        return True

# ---------------------------------------------------------------------------

    def execute_transform(self,
                          process_definition_id='',
                          data=None,
                          configuration=None):
        process_definition = self.clients.process_dispatcher.read_process_definition(
            process_definition_id)
        module = process_definition.executable.get('module')
        cls = process_definition.executable.get('class')

        module = __import__(module, fromlist=[cls])
        cls = getattr(module, cls)
        instance = cls()

        result = gevent.event.AsyncResult()

        def execute(data):
            result.set(instance.execute(data))

        g = gevent.greenlet.Greenlet(execute, data)
        g.start()

        retval = result.get(timeout=10)

        return retval

    def activate_transform(self, transform_id=''):
        """Activate the subscription to bind (start) the transform
        @param transform_id
        @retval True on success
        @throws NotFound if either the subscription doesn't exist or the transform object doesn't exist.
        """
        subscription_ids, _ = self.clients.resource_registry.find_objects(
            transform_id, PRED.hasSubscription, RT.Subscription, True)
        if len(subscription_ids) < 1:
            raise NotFound

        for subscription_id in subscription_ids:
            self.clients.pubsub_management.activate_subscription(
                subscription_id)

        return True

    def deactivate_transform(self, transform_id=''):
        """Decativates the subscriptions for the specified transform
        @param transform_id
        @retval True on success
        @throws NotFound if either the subscription doesn't exist or the transform object doesn't exist
        """
        subscription_ids, _ = self.clients.resource_registry.find_objects(
            transform_id, PRED.hasSubscription, RT.Subscription, True)
        if len(subscription_ids) < 1:
            raise NotFound

        for subscription_id in subscription_ids:
            self.clients.pubsub_management.deactivate_subscription(
                subscription_id)

        return True

    def schedule_transform(self, transform_id=''):
        """Not currently implemented
        @throws NotImplementedError
        """
        raise NotImplementedError
class IngestionManagementService(BaseIngestionManagementService):
    """
    id_p = cc.spawn_process('ingestion_worker', 'ion.services.dm.ingestion.ingestion_management_service', 'IngestionManagementService')
    cc.proc_manager.procs['%s.%s' %(cc.id,id_p)].start()
    """

    base_exchange_name = 'ingestion_queue'

    def __init__(self):
        BaseIngestionManagementService.__init__(self)

        xs_dot_xp = CFG.core_xps.science_data
        try:
            self.XS, xp_base = xs_dot_xp.split('.')
            self.XP = '.'.join([bootstrap.get_sys_name(), xp_base])
        except ValueError:
            raise StandardError('Invalid CFG for core_xps.science_data: "%s"; must have "xs.xp" structure' % xs_dot_xp)

        self.serializer = IonObjectSerializer()
        self.process_definition_id = None


    def on_start(self):
        super(IngestionManagementService,self).on_start()
        self.event_publisher = EventPublisher(event_type="DatasetIngestionConfigurationEvent")

        res_list , _ = self.clients.resource_registry.find_resources(
            restype=RT.ProcessDefinition,
            name='ingestion_worker_process',
            id_only=True)
        if len(res_list):
            self.process_definition_id = res_list[0]


    def on_quit(self):
        #self.clients.process_dispatcher.delete_process_definition(process_definition_id=self.process_definition_id)
        super(IngestionManagementService,self).on_quit()

    def create_ingestion_configuration(self, exchange_point_id='', couch_storage=None, hdf_storage=None,number_of_workers=0):
        """
        @brief Setup ingestion workers to ingest all the data from a single exchange point.
        @param exchange_point_id is the resource id for the exchagne point to ingest from
        @param couch_storage is the specification of the couch database to use
        @param hdf_storage is the specification of the filesystem to use for hdf data files
        @param number_of_workers is the number of ingestion workers to create
        """

        if self.process_definition_id is None:
            process_definition = ProcessDefinition(name='ingestion_worker_process', description='Worker transform process for ingestion of datasets')
            process_definition.executable['module']='ion.processes.data.ingestion.ingestion_worker'
            process_definition.executable['class'] = 'IngestionWorker'
            self.process_definition_id = self.clients.process_dispatcher.create_process_definition(process_definition=process_definition)
 

        # Give each ingestion configuration its own queue name to receive data on
        exchange_name = 'ingestion_queue'

        ##------------------------------------------------------------------------------------
        ## declare our intent to subscribe to all messages on the exchange point
        query = ExchangeQuery()

        subscription_id = self.clients.pubsub_management.create_subscription(query=query,\
            exchange_name=exchange_name, name='Ingestion subscription', description='Subscription for ingestion workers')

        ##------------------------------------------------------------------------------------------

        # create an ingestion_configuration instance and update the registry
        # @todo: right now sending in the exchange_point_id as the name...
        ingestion_configuration = IngestionConfiguration( name = self.XP)
        ingestion_configuration.description = '%s exchange point ingestion configuration' % self.XP
        ingestion_configuration.number_of_workers = number_of_workers

        if hdf_storage is not None:
            ingestion_configuration.hdf_storage.update(hdf_storage)

        if couch_storage is not None:
            ingestion_configuration.couch_storage.update(couch_storage)


        ingestion_configuration_id, _ = self.clients.resource_registry.create(ingestion_configuration)

        self._launch_transforms(
            ingestion_configuration.number_of_workers,
            subscription_id,
            ingestion_configuration_id,
            ingestion_configuration,
            self.process_definition_id
        )
        return ingestion_configuration_id

    def _launch_transforms(self, number_of_workers, subscription_id, ingestion_configuration_id, ingestion_configuration, process_definition_id):
        """
        This method spawns the two transform processes without activating them...Note: activating the transforms does the binding
        """

        description = 'Ingestion worker'

        configuration = self.serializer.serialize(ingestion_configuration)
        configuration.pop('type_')
        configuration['configuration_id'] = ingestion_configuration_id


        # launch the transforms
        for i in xrange(number_of_workers):
            name = '(%s)_Ingestion_Worker_%s' % (ingestion_configuration_id, i+1)
            transform_id = self.clients.transform_management.create_transform(
                name = name,
                description = description,
                in_subscription_id= subscription_id,
                out_streams = {},
                process_definition_id=process_definition_id,
                configuration=ingestion_configuration)

            # create association between ingestion configuration and the transforms that act as Ingestion Workers
            if not transform_id:
                raise IngestionManagementServiceException('Transform could not be launched by ingestion.')
            self.clients.resource_registry.create_association(ingestion_configuration_id, PRED.hasTransform, transform_id)


    def update_ingestion_configuration(self, ingestion_configuration=None):
        """Change the number of workers or the default policy for ingesting data on each stream

        @param ingestion_configuration    IngestionConfiguration
        """
        log.debug("Updating ingestion configuration")
        id, rev = self.clients.resource_registry.update(ingestion_configuration)

    def read_ingestion_configuration(self, ingestion_configuration_id=''):
        """Get an existing ingestion configuration object.

        @param ingestion_configuration_id    str
        @retval ingestion_configuration    IngestionConfiguration
        @throws NotFound    if ingestion configuration did not exist
        """
        log.debug("Reading ingestion configuration object id: %s", ingestion_configuration_id)
        ingestion_configuration = self.clients.resource_registry.read(ingestion_configuration_id)
        if ingestion_configuration is None:
            raise NotFound("Ingestion configuration %s does not exist" % ingestion_configuration_id)
        return ingestion_configuration

    def delete_ingestion_configuration(self, ingestion_configuration_id=''):
        """Delete an existing ingestion configuration object.

        @param ingestion_configuration_id    str
        @throws NotFound    if ingestion configuration did not exist
        """
        log.debug("Deleting ingestion configuration: %s", ingestion_configuration_id)


        #ingestion_configuration = self.read_ingestion_configuration(ingestion_configuration_id)
        #@todo Should we check to see if the ingestion configuration exists?

        #delete the transforms associated with the ingestion_configuration_id
        transform_ids = self.clients.resource_registry.find_objects(ingestion_configuration_id, PRED.hasTransform, RT.Transform, True)

        if len(transform_ids) < 1:
            raise NotFound('No transforms associated with this ingestion configuration!')

        log.debug('len(transform_ids): %s' % len(transform_ids))

        for transform_id in transform_ids:
            # To Delete - we need to actually remove each of the transforms
            self.clients.transform_management.delete_transform(transform_id)


        # delete the associations too...
        associations = self.clients.resource_registry.find_associations(ingestion_configuration_id,PRED.hasTransform)
        log.info('associations: %s' % associations)
        for association in associations:
            self.clients.resource_registry.delete_association(association)
            #@todo How should we deal with failure?


        self.clients.resource_registry.delete(ingestion_configuration_id)


    def activate_ingestion_configuration(self, ingestion_configuration_id=''):
        """Activate an ingestion configuration and the transform processes that execute it

        @param ingestion_configuration_id    str
        @throws NotFound    The ingestion configuration id did not exist
        """

        log.debug("Activating ingestion configuration")

        # check whether the ingestion configuration object exists
        #ingestion_configuration = self.read_ingestion_configuration(ingestion_configuration_id)
        #@todo Should we check to see if the ingestion configuration exists?

        # read the transforms
        transform_ids, _ = self.clients.resource_registry.find_objects(ingestion_configuration_id, PRED.hasTransform, RT.Transform, True)
        if len(transform_ids) < 1:
            raise NotFound('The ingestion configuration %s does not exist' % str(ingestion_configuration_id))

        # since all ingestion worker transforms have the same subscription, only deactivate one
        self.clients.transform_management.activate_transform(transform_ids[0])

        return True


    def deactivate_ingestion_configuration(self, ingestion_configuration_id=''):
        """Deactivate one of the transform processes that uses an ingestion configuration

        @param ingestion_configuration_id    str
        @throws NotFound    The ingestion configuration id did not exist
        """
        log.debug("Deactivating ingestion configuration")

        # check whether the ingestion configuration object exists
        #ingestion_configuration = self.read_ingestion_configuration(ingestion_configuration_id)
        #@todo Should we check to see if the ingestion configuration exists?


        # use the deactivate method in transformation management service
        transform_ids, _ = self.clients.resource_registry.find_objects(ingestion_configuration_id, PRED.hasTransform, RT.Transform, True)
        if len(transform_ids) < 1:
            raise NotFound('The ingestion configuration %s does not exist' % str(ingestion_configuration_id))

        # since all ingestion worker transforms have the same subscription, only deactivate one
        self.clients.transform_management.deactivate_transform(transform_ids[0])

        return True

    def create_dataset_configuration(self, dataset_id='', archive_data=True, archive_metadata=True, ingestion_configuration_id=''):
        """Create a configuration for ingestion of a particular dataset and associate it to a ingestion configuration.

        @param dataset_id    str
        @param archive_data    bool
        @param archive_metadata    bool
        @param ingestion_configuration_id    str
        @retval dataset_ingestion_configuration_id    str
        """

        if not dataset_id:
            raise IngestionManagementServiceException('Must pass a dataset id to create_dataset_configuration')

        log.debug("Creating dataset configuration")


        dataset = self.clients.dataset_management.read_dataset(dataset_id=dataset_id)

        stream_id =dataset.primary_view_key

        # Read the stream to get the stream definition
        #stream = self.clients.pubsub_management.read_stream(stream_id=stream_id)

        # Get the associated stream definition!
        stream_defs, _ = self.clients.resource_registry.find_objects(stream_id, PRED.hasStreamDefinition)

        if len(stream_defs)!=1:
            raise IngestionManagementServiceException('The stream is associated with more than one stream definition!')

        stream_def_resource = stream_defs[0]
        # Get the container object out of the stream def resource and set the stream id field in the local instance
        stream_def_container = stream_def_resource.container
        stream_def_container.stream_resource_id = stream_id

        # Get the ingestion configuration
        ingestion_configuration = self.clients.resource_registry.read(ingestion_configuration_id)
        couch_storage = ingestion_configuration.couch_storage

        log.info('Adding stream definition for stream "%s" to ingestion database "%s"' % (stream_id, couch_storage.datastore_name))
        db = self.container.datastore_manager.get_datastore(ds_name = couch_storage.datastore_name, config = self.CFG)

        # put it in couch db!
        db.create(stream_def_container)
        db.close()


        #@todo Add business logic to create the right kind of dataset ingestion configuration
        config = DatasetIngestionByStream(
            archive_data=archive_data,
            archive_metadata=archive_metadata,
            stream_id=stream_id,
            dataset_id=dataset_id)

        dset_ingest_config = DatasetIngestionConfiguration(
            name = 'Dataset config %s' % dataset_id,
            description = 'configuration for dataset %s' % dataset_id,
            configuration = config,
            type = DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM
            )

        dset_ingest_config_id , _ = self.clients.resource_registry.create(dset_ingest_config)

        self.clients.resource_registry.create_association(dset_ingest_config_id, PRED.hasIngestionConfiguration, ingestion_configuration_id)

        self.clients.resource_registry.create_association(dataset_id, PRED.hasIngestionConfiguration, ingestion_configuration_id)

        self.event_publisher.publish_event(
            origin=ingestion_configuration_id, # Use the ingestion configuration ID as the origin!
            description = dset_ingest_config.description,
            configuration = config,
            type = DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM,
            resource_id = dset_ingest_config_id
            )


        return dset_ingest_config_id

    def update_dataset_config(self, dataset_ingestion_configuration=None):
        """Update the ingestion configuration for a dataset

        @param dataset_ingestion_configuration    DatasetIngestionConfiguration
        """

        #@todo - make it an exception to change the dataset_id or the stream_id in the dataset config!

        log.info('dataset configuration to update: %s' % dataset_ingestion_configuration)

        log.debug("Updating dataset config")
        dset_ingest_config_id, rev = self.clients.resource_registry.update(dataset_ingestion_configuration)

        ingest_config_ids, _ = self.clients.resource_registry.find_objects(dset_ingest_config_id, PRED.hasIngestionConfiguration, id_only=True)

        if len(ingest_config_ids)!=1:
            raise IngestionManagementServiceException('The dataset ingestion configuration is associated with more than one ingestion configuration!')

        ingest_config_id = ingest_config_ids[0]

        #@todo - what is it okay to update?
        self.event_publisher.publish_event(
            origin=ingest_config_id,
            description = dataset_ingestion_configuration.description,
            configuration = dataset_ingestion_configuration.configuration,
            type = DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM,
            resource_id = dset_ingest_config_id
        )


    def read_dataset_config(self, dataset_ingestion_configuration_id=''):
        """Get an existing dataset configuration.

        @param dataset_ingestion_configuration_id    str
        @retval dataset_ingestion_configuration    DatasetIngestionConfiguration
        @throws NotFound    if ingestion configuration did not exist
        """

        log.debug("Reading dataset configuration")
        dataset_ingestion_configuration = self.clients.resource_registry.read(dataset_ingestion_configuration_id)

        return dataset_ingestion_configuration

    def delete_dataset_config(self,dataset_ingestion_configuration_id=''):
        """Delete an existing dataset configuration.

        @param dataset_ingestion_configuration_id    str
        @throws NotFound    if ingestion configuration did not exist
        """

        dataset_ingestion_configuration = self.clients.resource_registry.read(dataset_ingestion_configuration_id)

        log.debug("Deleting dataset configuration")
        self.clients.resource_registry.delete(dataset_ingestion_configuration_id)

        ingest_config_ids, association_ids = self.clients.resource_registry.find_objects(dataset_ingestion_configuration_id, PRED.hasIngestionConfiguration, id_only=True)

        if len(ingest_config_ids)!=1:
            raise IngestionManagementServiceException('The dataset ingestion configuration is associated with more than one ingestion configuration!')

        ingest_config_id = ingest_config_ids[0]

        self.clients.resource_registry.delete_association(association=association_ids[0])

        self.event_publisher.publish_event(
            origin=ingest_config_id,
            configuration = dataset_ingestion_configuration.configuration,
            type = DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM,
            resource_id = dataset_ingestion_configuration_id,
            deleted = True
        )
Example #44
0
 def __init__(self):
     Interceptor.__init__(self)
     self._io_serializer = IonObjectSerializer()
     self._io_deserializer = IonObjectDeserializer(
         obj_registry=get_obj_registry())
Example #45
0
class PostgresPyonDataStore(PostgresDataStore):
    """
    Base class common to both CouchDB and Couchbase datastores.
    """

    def __init__(self, datastore_name=None, config=None, scope=None, profile=None):
        """
        @param datastore_name  Name of datastore within server. May be scoped to sysname
        @param config  A server config dict with connection params
        @param scope  Prefix for the datastore name (e.g. sysname) to separate multiple systems
        """

        PostgresDataStore.__init__(self, datastore_name=datastore_name,
                                     config=config or CFG.get_safe("server.postgresql"),
                                     profile=profile or DataStore.DS_PROFILE.BASIC,
                                     scope=scope)

        # IonObject Serializers
        self._io_serializer = IonObjectSerializer()
        self._io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry())

    # -------------------------------------------------------------------------
    # Couch document operations

    def create(self, obj, object_id=None, attachments=None, datastore_name=""):
        """
        Converts ion objects to python dictionary before persisting them using the optional
        suggested identifier and creates attachments to the object.
        Returns an identifier and revision number of the object
        """
        if not isinstance(obj, IonObjectBase):
            raise BadRequest("Obj param is not instance of IonObjectBase")

        return self.create_doc(self._ion_object_to_persistence_dict(obj),
                                   object_id=object_id, datastore_name=datastore_name,
                                   attachments=attachments)

    def create_mult(self, objects, object_ids=None, allow_ids=None):
        if any([not isinstance(obj, IonObjectBase) for obj in objects]):
            raise BadRequest("Obj param is not instance of IonObjectBase")

        return self.create_doc_mult([self._ion_object_to_persistence_dict(obj) for obj in objects], object_ids)


    def update(self, obj, datastore_name=""):
        if not isinstance(obj, IonObjectBase):
            raise BadRequest("Obj param is not instance of IonObjectBase")

        return self.update_doc(self._ion_object_to_persistence_dict(obj))

    def update_mult(self, objects):
        if any([not isinstance(obj, IonObjectBase) for obj in objects]):
            raise BadRequest("Obj param is not instance of IonObjectBase")

        return self.update_doc_mult([self._ion_object_to_persistence_dict(obj) for obj in objects])


    def read(self, object_id, rev_id="", datastore_name="", object_type=None):
        if not isinstance(object_id, str):
            raise BadRequest("Object id param is not string")

        doc = self.read_doc(object_id, rev_id, datastore_name=datastore_name, object_type=object_type)
        obj = self._persistence_dict_to_ion_object(doc)

        return obj

    def read_mult(self, object_ids, datastore_name="", strict=True):
        if any([not isinstance(object_id, str) for object_id in object_ids]):
            raise BadRequest("Object ids are not string: %s" % str(object_ids))

        docs = self.read_doc_mult(object_ids, datastore_name, strict=strict)
        obj_list = [self._persistence_dict_to_ion_object(doc) if doc is not None else None for doc in docs]

        return obj_list

    def delete(self, obj, datastore_name="", object_type=None):
        if not isinstance(obj, IonObjectBase) and not isinstance(obj, str):
            raise BadRequest("Obj param is not instance of IonObjectBase or string id")
        if type(obj) is str:
            self.delete_doc(obj, datastore_name=datastore_name, object_type=object_type)
        else:
            if '_id' not in obj:
                raise BadRequest("Doc must have '_id'")
            if '_rev' not in obj:
                raise BadRequest("Doc must have '_rev'")
            self.delete_doc(self._ion_object_to_persistence_dict(obj),
                            datastore_name=datastore_name, object_type=object_type)

    def delete_mult(self, object_ids, datastore_name=None):
        return self.delete_doc_mult(object_ids, datastore_name)

    # -------------------------------------------------------------------------
    # View operations

    def find_objects_mult(self, subjects, id_only=False, predicate=None, access_args=None):
        """
        Returns a list of associations for a given list of subjects
        """
        # TODO: Port this implementation to Postgres single query
        res_list = [[], []]
        if not subjects:
            return res_list
        for sub in subjects:
            res_ids, res_assocs = self.find_objects(subject=sub, id_only=id_only, predicate=predicate, access_args=access_args)
            res_list[0].extend(res_ids)
            res_list[1].extend(res_assocs)
        return res_list

    def find_subjects_mult(self, objects, id_only=False, predicate=None, access_args=None):
        """
        Returns a list of associations for a given list of objects
        """
        # TODO: Port this implementation to Postgres single query
        res_list = [[], []]
        if not objects:
            return res_list
        for obj in objects:
            res_ids, res_assocs = self.find_subjects(obj=obj, id_only=id_only, predicate=predicate, access_args=access_args)
            res_list[0].extend(res_ids)
            res_list[1].extend(res_assocs)
        return res_list

    def find_objects(self, subject, predicate=None, object_type=None, id_only=False, access_args=None, **kwargs):
        #log.debug("find_objects(subject=%s, predicate=%s, object_type=%s, id_only=%s", subject, predicate, object_type, id_only)

        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        if not subject:
            raise BadRequest("Must provide subject")
        if object_type and not predicate:
            raise BadRequest("Cannot provide object type without a predicate")

        if type(subject) is str:
            subject_id = subject
        else:
            if "_id" not in subject:
                raise BadRequest("Object id not available in subject")
            else:
                subject_id = subject._id

        qual_ds_name = self._get_datastore_name()
        assoc_table_name = qual_ds_name+"_assoc"
        table_names = dict(ds=qual_ds_name, dsa=assoc_table_name)
        view_args = self._get_view_args(kwargs, access_args)

        if id_only:
            #query = "SELECT o, doc FROM %(dsa)s WHERE retired<>true " % table_names
            query = "SELECT %(dsa)s.o, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.o=%(ds)s.id " % table_names
        else:
            query = "SELECT %(ds)s.doc, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.o=%(ds)s.id " % table_names
        query_args = dict(s=subject_id, ot=object_type, p=predicate)

        query_clause = "AND s=%(s)s"
        if predicate:
            query_clause += " AND p=%(p)s"
            if object_type:
                query_clause += " AND ot=%(ot)s"

        query_clause = self._add_access_filter(access_args, qual_ds_name, query_clause, query_args)
        extra_clause = view_args.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        obj_assocs = [self._persistence_dict_to_ion_object(row[-1]) for row in rows]
        #log.debug("find_objects() found %s objects", len(obj_assocs))
        if id_only:
            res_ids = [self._prep_id(row[0]) for row in rows]
            return res_ids, obj_assocs
        else:
            res_objs = [self._persistence_dict_to_ion_object(row[0]) for row in rows]
            return res_objs, obj_assocs

    def find_subjects(self, subject_type=None, predicate=None, obj=None, id_only=False, access_args=None, **kwargs):
        #log.debug("find_subjects(subject_type=%s, predicate=%s, object=%s, id_only=%s", subject_type, predicate, obj, id_only)

        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        if not obj:
            raise BadRequest("Must provide object")
        if subject_type and not predicate:
            raise BadRequest("Cannot provide subject type without a predicate")

        if type(obj) is str:
            object_id = obj
        else:
            if "_id" not in obj:
                raise BadRequest("Object id not available in object")
            else:
                object_id = obj._id

        qual_ds_name = self._get_datastore_name()
        assoc_table_name = qual_ds_name+"_assoc"
        table_names = dict(ds=qual_ds_name, dsa=assoc_table_name)
        view_args = self._get_view_args(kwargs, access_args)

        if id_only:
            #query = "SELECT s, doc FROM %(dsa)s WHERE retired<>true " % table_names
            query = "SELECT %(dsa)s.s, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.s=%(ds)s.id " % table_names
        else:
            query = "SELECT %(ds)s.doc, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.s=%(ds)s.id " % table_names
        query_args = dict(o=object_id, st=subject_type, p=predicate)

        query_clause = "AND o=%(o)s"
        if predicate:
            query_clause += " AND p=%(p)s"
            if subject_type:
                query_clause += " AND st=%(st)s"

        query_clause = self._add_access_filter(access_args, qual_ds_name, query_clause, query_args)
        extra_clause = view_args.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        obj_assocs = [self._persistence_dict_to_ion_object(row[-1]) for row in rows]
        #log.debug("find_subjects() found %s subjects", len(obj_assocs))
        if id_only:
            res_ids = [self._prep_id(row[0]) for row in rows]
            return res_ids, obj_assocs
        else:
            res_objs = [self._persistence_dict_to_ion_object(row[0]) for row in rows]
            return res_objs, obj_assocs

    def find_associations(self, subject=None, predicate=None, obj=None, assoc_type=None, id_only=True,
                          anyside=None, query=None, **kwargs):
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        if not (subject or obj or predicate or anyside or query):
            raise BadRequest("Illegal parameters: No S/P/O or anyside")
        if anyside and (subject or obj):
            raise BadRequest("Illegal parameters: anyside cannot be combined with S/O")
        if anyside and predicate and type(anyside) in (list, tuple):
            raise BadRequest("Illegal parameters: anyside list cannot be combined with P")

        if query:
            query["query_args"]["id_only"] = id_only
            query["query_args"]["ds_sub"] = "assoc"
            # TODO: filter out retired
            return self.find_by_query(query)

        subject_id, object_id, anyside_ids = None, None, None
        if subject:
            if type(subject) is str:
                subject_id = subject
            else:
                if "_id" not in subject:
                    raise BadRequest("Object id not available in subject")
                else:
                    subject_id = subject._id
        if obj:
            if type(obj) is str:
                object_id = obj
            else:
                if "_id" not in obj:
                    raise BadRequest("Object id not available in object")
                else:
                    object_id = obj._id
        if anyside:
            if type(anyside) is str:
                anyside_ids = [anyside]
            elif type(anyside) in (list, tuple):
                if not all([type(o) in (str, list, tuple) for o in anyside]):
                    raise BadRequest("List of object ids or (object id, predicate) expected")
                anyside_ids = anyside
            else:
                if "_id" not in anyside:
                    raise BadRequest("Object id not available in anyside")
                else:
                    anyside_ids = [anyside._id]

        #log.debug("find_associations(subject=%s, predicate=%s, object=%s, anyside=%s)", subject_id, predicate, object_id, anyside_ids)

        qual_ds_name = self._get_datastore_name()
        table = qual_ds_name + "_assoc"
        view_args = self._get_view_args(kwargs)

        if id_only:
            query = "SELECT id FROM " + table
        else:
            query = "SELECT id, doc, s, st, p, o, ot FROM " + table
        query_clause = " WHERE retired<>true AND "
        query_args = dict(s=subject_id, o=object_id, p=predicate)

        if subject and obj:
            query_clause += "s=%(s)s AND o=%(o)s"
            if predicate:
                query_clause += " AND p=%(p)s"
        elif subject:
            query_clause += "s=%(s)s"
            if predicate:
                query_clause += " AND p=%(p)s"
        elif obj:
            query_clause += "o=%(o)s"
            if predicate:
                query_clause += " AND p=%(p)s"
        elif anyside:
            if predicate:
                query_clause += "p=%(p)s AND (s=%(any)s OR o=%(any)s)"
                query_args["any"] = anyside
            elif type(anyside_ids[0]) is str:
                # keys are IDs of resources
                for i, key in enumerate(anyside_ids):
                    if i > 0:
                        query_clause += " OR "
                    argname = "id%s" % i
                    query_args[argname] = key
                    query_clause += "(s=%("+argname+")s OR o=%("+argname+")s)"
            else:
                # keys are tuples of (id, pred)
                for i, (key, pred) in enumerate(anyside_ids):
                    if i > 0:
                        query_clause += " OR "
                    argname_id = "id%s" % i
                    argname_p = "p%s" % i
                    query_args[argname_id] = key
                    query_args[argname_p] = pred
                    query_clause += "(p=%("+argname_p+")s AND (s=%("+argname_id+")s OR o=%("+argname_id+")s))"

        elif predicate:
            if predicate == "*":
                query_clause += "p is not null"
            else:
                query_clause += "p=%(p)s"
        else:
            raise BadRequest("Illegal arguments")

        extra_clause = view_args.get("extra_clause", "")
        sql = query + query_clause + extra_clause
        #print "find_associations(): SQL=", sql, query_args
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(sql, query_args)
            rows = cur.fetchall()

        if id_only:
            assocs = [self._prep_id(row[0]) for row in rows]
        else:
            assocs = [self._persistence_dict_to_ion_object(row[1]) for row in rows]
        #log.debug("find_associations() found %s associations", len(assocs))

        return assocs

    def _prepare_find_return(self, rows, res_assocs=None, id_only=True, **kwargs):
        if id_only:
            res_ids = [self._prep_id(row[0]) for row in rows]
            return res_ids, res_assocs
        else:
            res_docs = [self._persistence_dict_to_ion_object(row[-1]) for row in rows]
            return res_docs, res_assocs

    def _add_access_filter(self, view_args, tablename, query_clause, query_args, add_where=True, tablealias=None):
        """Returns a Postgres SQL filter clause and referenced values for resource queries filtered
        by resource visibility and current actor role/facility membership/superuser status"""
        view_args = view_args if view_args is not None else {}
        current_actor_id = view_args.get("current_actor_id", None)
        superuser_actor_ids = view_args.get("superuser_actor_ids", None) or []
        tablealias = tablealias or tablename

        access_filter = ""
        access_args = {}
        access_args["current_actor_id"] = current_actor_id
        assoc_tablename = tablename + "_assoc"
        if current_actor_id in superuser_actor_ids:
            # Current user is a superuser - no additional filter
            pass
        elif current_actor_id and current_actor_id != "anonymous":
            # Registered actor
            # - Return all PUBLIC, REGISTERED
            access_filter += tablealias + ".visibility NOT IN (3,4)"  # 1, 2, null and other values
            # - Return all owned by user independent of visibility
            access_filter += " OR (" + tablealias + ".id IN (SELECT s FROM " + assoc_tablename + \
                             " WHERE p='hasOwner' AND o=%(current_actor_id)s))"
            # - Return all FACILITY if user is in same facility
            access_filter += " OR (" + tablealias + ".visibility=3 AND " + tablealias + ".id IN (SELECT o FROM " + assoc_tablename + \
                             " WHERE p='hasResource' AND st='Org' AND s IN (SELECT s FROM " + assoc_tablename + \
                             " WHERE p='hasMember' AND st='Org' AND o=%(current_actor_id)s)))"
        else:
            # Anonymous access
            # All public resources
            access_filter += tablealias + ".visibility NOT IN (2,3,4)"

        if query_clause and access_filter:
            query_clause += " AND (" + access_filter + ")"
        elif not query_clause and access_filter:
            if add_where:
                query_clause = " WHERE " + access_filter
            else:
                query_clause = access_filter

        query_args.update(access_args)
        return query_clause

    def _add_deleted_filter(self, tablename, ds_sub, query_clause, query_args, show_all=False):
        if show_all:
            return query_clause
        deleted_filter = ""
        if not ds_sub:
            deleted_filter = tablename + ".lcstate<>'DELETED'"
        elif ds_sub == "assoc":
            deleted_filter = tablename + ".retired<>true"
        if query_clause and deleted_filter:
            query_clause += " AND " + deleted_filter
        elif not query_clause and deleted_filter:
            query_clause = deleted_filter
        return query_clause

    def find_resources(self, restype="", lcstate="", name="", id_only=True, access_args=None):
        return self.find_resources_ext(restype=restype, lcstate=lcstate, name=name, id_only=id_only, access_args=access_args)

    def find_resources_ext(self, restype="", lcstate="", name="",
                           keyword=None, nested_type=None,
                           attr_name=None, attr_value=None, alt_id=None, alt_id_ns=None,
                           limit=None, skip=None, descending=None, id_only=True, query=None, access_args=None):
        filter_kwargs = self._get_view_args(dict(limit=limit, skip=skip, descending=descending), access_args)
        if query:
            qargs = query["query_args"]
            if id_only is not None:
                qargs["id_only"] = id_only
            if limit is not None and limit != 0:
                qargs["limit"] = limit
            if skip is not None and skip != 0:
                qargs["skip"] = skip
            return self.find_by_query(query, access_args=access_args)
        elif name:
            if lcstate:
                raise BadRequest("find by name does not support lcstate")
            return self.find_res_by_name(name, restype, id_only, filter=filter_kwargs)
        elif keyword:
            return self.find_res_by_keyword(keyword, restype, id_only, filter=filter_kwargs)
        elif alt_id or alt_id_ns:
            return self.find_res_by_alternative_id(alt_id, alt_id_ns, id_only, filter=filter_kwargs)
        elif nested_type:
            return self.find_res_by_nested_type(nested_type, restype, id_only, filter=filter_kwargs)
        elif restype and attr_name:
            return self.find_res_by_attribute(restype, attr_name, attr_value, id_only=id_only, filter=filter_kwargs)
        elif restype and lcstate:
            return self.find_res_by_lcstate(lcstate, restype, id_only, filter=filter_kwargs)
        elif restype:
            return self.find_res_by_type(restype, lcstate, id_only, filter=filter_kwargs)
        elif lcstate:
            return self.find_res_by_lcstate(lcstate, restype, id_only, filter=filter_kwargs)
        elif not restype and not lcstate and not name:
            return self.find_res_by_type(None, None, id_only, filter=filter_kwargs)

    def find_res_by_type(self, restype, lcstate=None, id_only=False, filter=None):
        log.debug("find_res_by_type(restype=%s, lcstate=%s)", restype, lcstate)
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        if lcstate:
            raise BadRequest('lcstate not supported anymore in find_res_by_type')

        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, name, type_, lcstate FROM " + qual_ds_name
        else:
            query = "SELECT id, name, type_, lcstate, doc FROM " + qual_ds_name
        query_clause = " WHERE lcstate<>'DELETED' "
        query_args = dict(type_=restype, lcstate=lcstate)

        if restype:
            query_clause += "AND type_=%(type_)s"
        else:
            # Returns ALL documents, only limited by filter
            query_clause = ""

        query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args)
        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [dict(id=self._prep_id(row[0]), name=row[1], type=row[2]) for row in rows]
        log.debug("find_res_by_type() found %s objects", len(res_assocs))
        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_lcstate(self, lcstate, restype=None, id_only=False, filter=None):
        log.debug("find_res_by_lcstate(lcstate=%s, restype=%s)", lcstate, restype)
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        if '_' in lcstate:
            log.warn("Search for compound lcstate restricted to maturity: %s", lcstate)
            lcstate,_ = lcstate.split("_", 1)
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, name, type_, lcstate, availability FROM " + qual_ds_name
        else:
            query = "SELECT id, name, type_, lcstate, availability, doc FROM " + qual_ds_name
        query_clause = " WHERE "
        query_args = dict(type_=restype, lcstate=lcstate)

        is_maturity = lcstate not in AvailabilityStates
        if is_maturity:
            query_clause += "lcstate=%(lcstate)s"
        else:
            query_clause += "availability=%(lcstate)s"

        if restype:
            query_clause += " AND type_=%(type_)s"

        query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args)
        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [dict(id=self._prep_id(row[0]), name=row[1], type=row[2], lcstate=row[3] if is_maturity else row[4]) for row in rows]
        log.debug("find_res_by_lcstate() found %s objects", len(res_assocs))
        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_name(self, name, restype=None, id_only=False, filter=None):
        log.debug("find_res_by_name(name=%s, restype=%s)", name, restype)
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, name, type_ FROM " + qual_ds_name
        else:
            query = "SELECT id, name, type_, doc FROM " + qual_ds_name
        query_clause = " WHERE lcstate<>'DELETED' "
        query_args = dict(name=name, type_=restype)

        query_clause += "AND name=%(name)s"
        if restype:
            query_clause += " AND type_=%(type_)s"

        query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args)
        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [dict(id=self._prep_id(row[0]), name=row[1], type=row[2]) for row in rows]
        log.debug("find_res_by_name() found %s objects", len(res_assocs))

        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_keyword(self, keyword, restype=None, id_only=False, filter=None):
        log.debug("find_res_by_keyword(keyword=%s, restype=%s)", keyword, restype)
        if not keyword or type(keyword) is not str:
            raise BadRequest('Argument keyword illegal')
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, type_ FROM " + qual_ds_name
        else:
            query = "SELECT id, type_, doc FROM " + qual_ds_name
        query_clause = " WHERE lcstate<>'DELETED' "
        query_args = dict(type_=restype, kw=[keyword])

        query_clause += "AND %(kw)s <@ json_keywords(doc)"
        if restype:
            query_clause += " AND type_=%(type_)s"

        query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args)
        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [dict(id=self._prep_id(row[0]), type=row[1], keyword=keyword) for row in rows]
        log.debug("find_res_by_keyword() found %s objects", len(res_assocs))
        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_nested_type(self, nested_type, restype=None, id_only=False, filter=None):
        log.debug("find_res_by_nested_type(nested_type=%s, restype=%s)", nested_type, restype)
        if not nested_type or type(nested_type) is not str:
            raise BadRequest('Argument nested_type illegal')
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, type_ FROM " + qual_ds_name
        else:
            query = "SELECT id, type_, doc FROM " + qual_ds_name
        query_clause = " WHERE lcstate<>'DELETED' "
        query_args = dict(type_=restype, nest=[nested_type])

        query_clause += "AND %(nest)s <@ json_nested(doc)"
        if restype:
            query_clause += " AND type_=%(type_)s"

        query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args)
        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [dict(id=self._prep_id(row[0]), type=row[1], nested_type=nested_type) for row in rows]
        log.debug("find_res_by_nested_type() found %s objects", len(res_assocs))
        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_attribute(self, restype, attr_name, attr_value=None, id_only=False, filter=None):
        log.debug("find_res_by_attribute(restype=%s, attr_name=%s, attr_value=%s)", restype, attr_name, attr_value)
        if not attr_name or type(attr_name) is not str:
            raise BadRequest('Argument attr_name illegal')
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, type_, json_specialattr(doc) FROM " + qual_ds_name
        else:
            query = "SELECT id, type_, json_specialattr(doc), doc FROM " + qual_ds_name
        query_clause = " WHERE lcstate<>'DELETED' "
        query_args = dict(type_=restype, att=attr_name, val=attr_value)

        if attr_value:  # Note: cannot make None test here (and allow empty string because of default service args "")
            query_clause += "AND json_specialattr(doc)=%(spc)s"
            query_args['spc'] = "%s=%s" % (attr_name, attr_value)
        else:
            query_clause += "AND json_specialattr(doc) LIKE %(spc)s"
            query_args['spc'] = "%s=%%" % (attr_name, )
        if restype:
            query_clause += " AND type_=%(type_)s"

        query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args)
        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [dict(id=self._prep_id(row[0]), type=row[1], attr_name=attr_name, attr_value=row[2].split("=",1)[-1]) for row in rows]
        log.debug("find_res_by_attribute() found %s objects", len(res_assocs))
        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_alternative_id(self, alt_id=None, alt_id_ns=None, id_only=False, filter=None):
        log.debug("find_res_by_alternative_id(restype=%s, alt_id_ns=%s)", alt_id, alt_id_ns)
        if alt_id and type(alt_id) is not str:
            raise BadRequest('Argument alt_id illegal')
        if alt_id_ns and type(alt_id_ns) is not str:
            raise BadRequest('Argument alt_id_ns illegal')
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()

        query = "SELECT id, type_, doc FROM " + qual_ds_name
        query_args = dict(aid=[alt_id], ans=[alt_id_ns])
        query_clause = " WHERE lcstate<>'DELETED' "

        if not alt_id and not alt_id_ns:
            query_clause += "AND json_altids_ns(doc) is not null"
        elif alt_id and not alt_id_ns:
            query_clause += "AND %(aid)s <@ json_altids_id(doc)"
        elif alt_id_ns and not alt_id:
            query_clause += "AND %(ans)s <@ json_altids_ns(doc)"
        else:
            query_clause += "AND %(aid)s <@ json_altids_id(doc) AND %(ans)s <@ json_altids_ns(doc)"

        query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args)
        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        # Need to fake the return format of the Couch view for alt_ids. One record per alt_id, not one per resource.
        res_assocs = []
        res_rows = []
        for row in rows:
            doc_id = self._prep_id(row[0])
            doc = row[-1]
            for aid in doc.get("alt_ids", []):
                aid_parts = aid.split(":", 1)
                aid_ns = aid_parts[0] if len(aid_parts)>1 else "_"
                aid_id = aid_parts[-1]
                if alt_id_ns and alt_id:
                    if alt_id_ns == aid_ns and alt_id == aid_id:
                        res_assocs.append(dict(id=doc_id, alt_id_ns=aid_ns, alt_id=aid_id))
                        res_rows.append((doc_id, doc))
                elif (not alt_id_ns and not alt_id) or (alt_id_ns and alt_id_ns == aid_ns) or (alt_id and alt_id == aid_id):
                    res_assocs.append(dict(id=doc_id, alt_id_ns=aid_ns, alt_id=aid_id))
                    res_rows.append((doc_id, doc))

        log.debug("find_res_by_alternative_id() found %s objects", len(res_assocs))
        return self._prepare_find_return(res_rows, res_assocs, id_only=id_only)

    def find_by_view(self, design_name, view_name, key=None, keys=None, start_key=None, end_key=None,
                     id_only=True, convert_doc=True, **kwargs):
        """
        Generic find function using a defined index
        @param design_name  design document
        @param view_name  view name
        @param key  specific key to find
        @param keys  list of keys to find
        @param start_key  find range start value
        @param end_key  find range end value
        @param id_only  if True, the 4th element of each triple is the document
        @param convert_doc  if True, make IonObject out of doc
        @retval Returns a list of 3-tuples: (document id, index key, index value or document)
        """
        res_rows = self.find_docs_by_view(design_name=design_name, view_name=view_name, key=key, keys=keys,
                                          start_key=start_key, end_key=end_key, id_only=id_only, **kwargs)

        res_rows = [(rid, key,
                     self._persistence_dict_to_ion_object(doc) if convert_doc and isinstance(doc, dict) else doc)
                    for rid, key, doc in res_rows]

        log.debug("find_by_view() found %s objects" % (len(res_rows)))
        return res_rows

    def find_by_query(self, query, access_args=None):
        """
        Find resources given a datastore query expression dict.
        @param query  a dict representation of a datastore query
        @retval  list of resource ids or resource objects matching query (dependent on id_only value)
        """
        qual_ds_name = self._get_datastore_name()
        query_ds_sub = query["query_args"].get("ds_sub", None)
        query_format = query["query_args"].get("format", "")

        pqb = PostgresQueryBuilder(query, qual_ds_name)
        if self.profile == DataStore.DS_PROFILE.RESOURCES and not query_ds_sub:
            table_alias = qual_ds_name if query_format != "complex" else "base"
            pqb.where = self._add_access_filter(access_args, qual_ds_name, pqb.where, pqb.values,
                                                add_where=False, tablealias=table_alias)

        if self.profile == DataStore.DS_PROFILE.RESOURCES:
            pqb.where = self._add_deleted_filter(pqb.table_aliases[0], query_ds_sub,
                                                 pqb.where, pqb.values,
                                                 show_all=query["query_args"].get("show_all", False))

        with self.pool.cursor(**self.cursor_args) as cur:
            exec_query = pqb.get_query()
            cur.execute(exec_query, pqb.get_values())
            rows = cur.fetchall()
            log.info("find_by_query() QUERY: %s (%s rows)", cur.query, cur.rowcount)
            query_res = {}
            query["_result"] = query_res
            query_res["statement_gen"] = exec_query
            query_res["statement_sql"] = cur.query
            query_res["rowcount"] = cur.rowcount

        id_only = query["query_args"].get("id_only", True)
        if query_format == "complex" and pqb.has_basic_cols:
            # Return format is list of lists
            if id_only:
                res_vals = [[self._prep_id(row[0])] + list(row[1:]) for row in rows]
            else:
                res_vals = [[self._persistence_dict_to_ion_object(row[1])] + list(rows[2:]) for row in rows]

        elif query_format == "complex":
            res_vals = [list(row) for row in rows]

        else:
            if id_only:
                res_vals = [self._prep_id(row[0]) for row in rows]
            else:
                res_vals = [self._persistence_dict_to_ion_object(row[-1]) for row in rows]

        return res_vals

    # -------------------------------------------------------------------------
    # Internal operations

    def _ion_object_to_persistence_dict(self, ion_object):
        if ion_object is None:
            return None

        obj_dict = self._io_serializer.serialize(ion_object, update_version=True)
        return obj_dict

    def _persistence_dict_to_ion_object(self, obj_dict):
        if obj_dict is None:
            return None

        ion_object = self._io_deserializer.deserialize(obj_dict)
        return ion_object
Example #46
0
class PostgresPyonDataStore(PostgresDataStore):
    """
    Base class common to both CouchDB and Couchbase datastores.
    """
    def __init__(self,
                 datastore_name=None,
                 config=None,
                 scope=None,
                 profile=None):
        """
        @param datastore_name  Name of datastore within server. May be scoped to sysname
        @param config  A server config dict with connection params
        @param scope  Prefix for the datastore name (e.g. sysname) to separate multiple systems
        """

        PostgresDataStore.__init__(self,
                                   datastore_name=datastore_name,
                                   config=config
                                   or CFG.get_safe("server.postgresql"),
                                   profile=profile
                                   or DataStore.DS_PROFILE.BASIC,
                                   scope=scope)

        # IonObject Serializers
        self._io_serializer = IonObjectSerializer()
        self._io_deserializer = IonObjectDeserializer(
            obj_registry=get_obj_registry())

    # -------------------------------------------------------------------------
    # Couch document operations

    def create(self, obj, object_id=None, attachments=None, datastore_name=""):
        """
        Converts ion objects to python dictionary before persisting them using the optional
        suggested identifier and creates attachments to the object.
        Returns an identifier and revision number of the object
        """
        if not isinstance(obj, IonObjectBase):
            raise BadRequest("Obj param is not instance of IonObjectBase")

        return self.create_doc(self._ion_object_to_persistence_dict(obj),
                               object_id=object_id,
                               datastore_name=datastore_name,
                               attachments=attachments)

    def create_mult(self, objects, object_ids=None, allow_ids=None):
        if any([not isinstance(obj, IonObjectBase) for obj in objects]):
            raise BadRequest("Obj param is not instance of IonObjectBase")

        return self.create_doc_mult(
            [self._ion_object_to_persistence_dict(obj) for obj in objects],
            object_ids)

    def update(self, obj, datastore_name=""):
        if not isinstance(obj, IonObjectBase):
            raise BadRequest("Obj param is not instance of IonObjectBase")

        return self.update_doc(self._ion_object_to_persistence_dict(obj))

    def update_mult(self, objects):
        if any([not isinstance(obj, IonObjectBase) for obj in objects]):
            raise BadRequest("Obj param is not instance of IonObjectBase")

        return self.update_doc_mult(
            [self._ion_object_to_persistence_dict(obj) for obj in objects])

    def read(self, object_id, rev_id="", datastore_name="", object_type=None):
        if not isinstance(object_id, str):
            raise BadRequest("Object id param is not string")

        doc = self.read_doc(object_id,
                            rev_id,
                            datastore_name=datastore_name,
                            object_type=object_type)
        obj = self._persistence_dict_to_ion_object(doc)

        return obj

    def read_mult(self, object_ids, datastore_name="", strict=True):
        if any([not isinstance(object_id, str) for object_id in object_ids]):
            raise BadRequest("Object ids are not string: %s" % str(object_ids))

        docs = self.read_doc_mult(object_ids, datastore_name, strict=strict)
        obj_list = [
            self._persistence_dict_to_ion_object(doc)
            if doc is not None else None for doc in docs
        ]

        return obj_list

    def delete(self, obj, datastore_name="", object_type=None):
        if not isinstance(obj, IonObjectBase) and not isinstance(obj, str):
            raise BadRequest(
                "Obj param is not instance of IonObjectBase or string id")
        if type(obj) is str:
            self.delete_doc(obj,
                            datastore_name=datastore_name,
                            object_type=object_type)
        else:
            if '_id' not in obj:
                raise BadRequest("Doc must have '_id'")
            if '_rev' not in obj:
                raise BadRequest("Doc must have '_rev'")
            self.delete_doc(self._ion_object_to_persistence_dict(obj),
                            datastore_name=datastore_name,
                            object_type=object_type)

    def delete_mult(self, object_ids, datastore_name=None):
        return self.delete_doc_mult(object_ids, datastore_name)

    # -------------------------------------------------------------------------
    # View operations

    def find_objects_mult(self,
                          subjects,
                          id_only=False,
                          predicate=None,
                          access_args=None):
        """
        Returns a list of associations for a given list of subjects
        """
        # TODO: Port this implementation to Postgres single query
        res_list = [[], []]
        if not subjects:
            return res_list
        for sub in subjects:
            res_ids, res_assocs = self.find_objects(subject=sub,
                                                    id_only=id_only,
                                                    predicate=predicate,
                                                    access_args=access_args)
            res_list[0].extend(res_ids)
            res_list[1].extend(res_assocs)
        return res_list

    def find_subjects_mult(self,
                           objects,
                           id_only=False,
                           predicate=None,
                           access_args=None):
        """
        Returns a list of associations for a given list of objects
        """
        # TODO: Port this implementation to Postgres single query
        res_list = [[], []]
        if not objects:
            return res_list
        for obj in objects:
            res_ids, res_assocs = self.find_subjects(obj=obj,
                                                     id_only=id_only,
                                                     predicate=predicate,
                                                     access_args=access_args)
            res_list[0].extend(res_ids)
            res_list[1].extend(res_assocs)
        return res_list

    def find_objects(self,
                     subject,
                     predicate=None,
                     object_type=None,
                     id_only=False,
                     access_args=None,
                     **kwargs):
        #log.debug("find_objects(subject=%s, predicate=%s, object_type=%s, id_only=%s", subject, predicate, object_type, id_only)

        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        if not subject:
            raise BadRequest("Must provide subject")
        if object_type and not predicate:
            raise BadRequest("Cannot provide object type without a predicate")

        if type(subject) is str:
            subject_id = subject
        else:
            if "_id" not in subject:
                raise BadRequest("Object id not available in subject")
            else:
                subject_id = subject._id

        qual_ds_name = self._get_datastore_name()
        assoc_table_name = qual_ds_name + "_assoc"
        table_names = dict(ds=qual_ds_name, dsa=assoc_table_name)
        view_args = self._get_view_args(kwargs, access_args)

        if id_only:
            #query = "SELECT o, doc FROM %(dsa)s WHERE retired<>true " % table_names
            query = "SELECT %(dsa)s.o, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.o=%(ds)s.id " % table_names
        else:
            query = "SELECT %(ds)s.doc, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.o=%(ds)s.id " % table_names
        query_args = dict(s=subject_id, ot=object_type, p=predicate)

        query_clause = "AND s=%(s)s"
        if predicate:
            query_clause += " AND p=%(p)s"
            if object_type:
                query_clause += " AND ot=%(ot)s"

        query_clause = self._add_access_filter(access_args, qual_ds_name,
                                               query_clause, query_args)
        extra_clause = view_args.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        obj_assocs = [
            self._persistence_dict_to_ion_object(row[-1]) for row in rows
        ]
        #log.debug("find_objects() found %s objects", len(obj_assocs))
        if id_only:
            res_ids = [self._prep_id(row[0]) for row in rows]
            return res_ids, obj_assocs
        else:
            res_objs = [
                self._persistence_dict_to_ion_object(row[0]) for row in rows
            ]
            return res_objs, obj_assocs

    def find_subjects(self,
                      subject_type=None,
                      predicate=None,
                      obj=None,
                      id_only=False,
                      access_args=None,
                      **kwargs):
        #log.debug("find_subjects(subject_type=%s, predicate=%s, object=%s, id_only=%s", subject_type, predicate, obj, id_only)

        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        if not obj:
            raise BadRequest("Must provide object")
        if subject_type and not predicate:
            raise BadRequest("Cannot provide subject type without a predicate")

        if type(obj) is str:
            object_id = obj
        else:
            if "_id" not in obj:
                raise BadRequest("Object id not available in object")
            else:
                object_id = obj._id

        qual_ds_name = self._get_datastore_name()
        assoc_table_name = qual_ds_name + "_assoc"
        table_names = dict(ds=qual_ds_name, dsa=assoc_table_name)
        view_args = self._get_view_args(kwargs, access_args)

        if id_only:
            #query = "SELECT s, doc FROM %(dsa)s WHERE retired<>true " % table_names
            query = "SELECT %(dsa)s.s, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.s=%(ds)s.id " % table_names
        else:
            query = "SELECT %(ds)s.doc, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.s=%(ds)s.id " % table_names
        query_args = dict(o=object_id, st=subject_type, p=predicate)

        query_clause = "AND o=%(o)s"
        if predicate:
            query_clause += " AND p=%(p)s"
            if subject_type:
                query_clause += " AND st=%(st)s"

        query_clause = self._add_access_filter(access_args, qual_ds_name,
                                               query_clause, query_args)
        extra_clause = view_args.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        obj_assocs = [
            self._persistence_dict_to_ion_object(row[-1]) for row in rows
        ]
        #log.debug("find_subjects() found %s subjects", len(obj_assocs))
        if id_only:
            res_ids = [self._prep_id(row[0]) for row in rows]
            return res_ids, obj_assocs
        else:
            res_objs = [
                self._persistence_dict_to_ion_object(row[0]) for row in rows
            ]
            return res_objs, obj_assocs

    def find_associations(self,
                          subject=None,
                          predicate=None,
                          obj=None,
                          assoc_type=None,
                          id_only=True,
                          anyside=None,
                          query=None,
                          **kwargs):
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        if not (subject or obj or predicate or anyside or query):
            raise BadRequest("Illegal parameters: No S/P/O or anyside")
        if anyside and (subject or obj):
            raise BadRequest(
                "Illegal parameters: anyside cannot be combined with S/O")
        if anyside and predicate and type(anyside) in (list, tuple):
            raise BadRequest(
                "Illegal parameters: anyside list cannot be combined with P")

        if query:
            query["query_args"]["id_only"] = id_only
            query["query_args"]["ds_sub"] = "assoc"
            # TODO: filter out retired
            return self.find_by_query(query)

        subject_id, object_id, anyside_ids = None, None, None
        if subject:
            if type(subject) is str:
                subject_id = subject
            else:
                if "_id" not in subject:
                    raise BadRequest("Object id not available in subject")
                else:
                    subject_id = subject._id
        if obj:
            if type(obj) is str:
                object_id = obj
            else:
                if "_id" not in obj:
                    raise BadRequest("Object id not available in object")
                else:
                    object_id = obj._id
        if anyside:
            if type(anyside) is str:
                anyside_ids = [anyside]
            elif type(anyside) in (list, tuple):
                if not all([type(o) in (str, list, tuple) for o in anyside]):
                    raise BadRequest(
                        "List of object ids or (object id, predicate) expected"
                    )
                anyside_ids = anyside
            else:
                if "_id" not in anyside:
                    raise BadRequest("Object id not available in anyside")
                else:
                    anyside_ids = [anyside._id]

        #log.debug("find_associations(subject=%s, predicate=%s, object=%s, anyside=%s)", subject_id, predicate, object_id, anyside_ids)

        qual_ds_name = self._get_datastore_name()
        table = qual_ds_name + "_assoc"
        view_args = self._get_view_args(kwargs)

        if id_only:
            query = "SELECT id FROM " + table
        else:
            query = "SELECT id, doc, s, st, p, o, ot FROM " + table
        query_clause = " WHERE retired<>true AND "
        query_args = dict(s=subject_id, o=object_id, p=predicate)

        if subject and obj:
            query_clause += "s=%(s)s AND o=%(o)s"
            if predicate:
                query_clause += " AND p=%(p)s"
        elif subject:
            query_clause += "s=%(s)s"
            if predicate:
                query_clause += " AND p=%(p)s"
        elif obj:
            query_clause += "o=%(o)s"
            if predicate:
                query_clause += " AND p=%(p)s"
        elif anyside:
            if predicate:
                query_clause += "p=%(p)s AND (s=%(any)s OR o=%(any)s)"
                query_args["any"] = anyside
            elif type(anyside_ids[0]) is str:
                # keys are IDs of resources
                for i, key in enumerate(anyside_ids):
                    if i > 0:
                        query_clause += " OR "
                    argname = "id%s" % i
                    query_args[argname] = key
                    query_clause += "(s=%(" + argname + ")s OR o=%(" + argname + ")s)"
            else:
                # keys are tuples of (id, pred)
                for i, (key, pred) in enumerate(anyside_ids):
                    if i > 0:
                        query_clause += " OR "
                    argname_id = "id%s" % i
                    argname_p = "p%s" % i
                    query_args[argname_id] = key
                    query_args[argname_p] = pred
                    query_clause += "(p=%(" + argname_p + ")s AND (s=%(" + argname_id + ")s OR o=%(" + argname_id + ")s))"

        elif predicate:
            if predicate == "*":
                query_clause += "p is not null"
            else:
                query_clause += "p=%(p)s"
        else:
            raise BadRequest("Illegal arguments")

        extra_clause = view_args.get("extra_clause", "")
        sql = query + query_clause + extra_clause
        #print "find_associations(): SQL=", sql, query_args
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(sql, query_args)
            rows = cur.fetchall()

        if id_only:
            assocs = [self._prep_id(row[0]) for row in rows]
        else:
            assocs = [
                self._persistence_dict_to_ion_object(row[1]) for row in rows
            ]
        #log.debug("find_associations() found %s associations", len(assocs))

        return assocs

    def _prepare_find_return(self,
                             rows,
                             res_assocs=None,
                             id_only=True,
                             **kwargs):
        if id_only:
            res_ids = [self._prep_id(row[0]) for row in rows]
            return res_ids, res_assocs
        else:
            res_docs = [
                self._persistence_dict_to_ion_object(row[-1]) for row in rows
            ]
            return res_docs, res_assocs

    def _add_access_filter(self,
                           view_args,
                           tablename,
                           query_clause,
                           query_args,
                           add_where=True,
                           tablealias=None):
        """Returns a Postgres SQL filter clause and referenced values for resource queries filtered
        by resource visibility and current actor role/facility membership/superuser status"""
        view_args = view_args if view_args is not None else {}
        current_actor_id = view_args.get("current_actor_id", None)
        superuser_actor_ids = view_args.get("superuser_actor_ids", None) or []
        tablealias = tablealias or tablename

        access_filter = ""
        access_args = {}
        access_args["current_actor_id"] = current_actor_id
        assoc_tablename = tablename + "_assoc"
        if current_actor_id in superuser_actor_ids:
            # Current user is a superuser - no additional filter
            pass
        elif current_actor_id and current_actor_id != "anonymous":
            # Registered actor
            # - Return all PUBLIC, REGISTERED
            access_filter += tablealias + ".visibility NOT IN (3,4)"  # 1, 2, null and other values
            # - Return all owned by user independent of visibility
            access_filter += " OR (" + tablealias + ".id IN (SELECT s FROM " + assoc_tablename + \
                             " WHERE p='hasOwner' AND o=%(current_actor_id)s))"
            # - Return all FACILITY if user is in same facility
            access_filter += " OR (" + tablealias + ".visibility=3 AND " + tablealias + ".id IN (SELECT o FROM " + assoc_tablename + \
                             " WHERE p='hasResource' AND st='Org' AND s IN (SELECT s FROM " + assoc_tablename + \
                             " WHERE p='hasMember' AND st='Org' AND o=%(current_actor_id)s)))"
        else:
            # Anonymous access
            # All public resources
            access_filter += tablealias + ".visibility NOT IN (2,3,4)"

        if query_clause and access_filter:
            query_clause += " AND (" + access_filter + ")"
        elif not query_clause and access_filter:
            if add_where:
                query_clause = " WHERE " + access_filter
            else:
                query_clause = access_filter

        query_args.update(access_args)
        return query_clause

    def _add_deleted_filter(self,
                            tablename,
                            ds_sub,
                            query_clause,
                            query_args,
                            with_deleted=False):
        if with_deleted:
            return query_clause
        deleted_filter = ""
        if not ds_sub:
            deleted_filter = tablename + ".lcstate<>'DELETED'"
        elif ds_sub == "assoc":
            deleted_filter = tablename + ".retired<>true"
        if query_clause and deleted_filter:
            query_clause += " AND " + deleted_filter
        elif not query_clause and deleted_filter:
            query_clause = deleted_filter
        return query_clause

    def find_resources(self,
                       restype="",
                       lcstate="",
                       name="",
                       id_only=True,
                       access_args=None):
        return self.find_resources_ext(restype=restype,
                                       lcstate=lcstate,
                                       name=name,
                                       id_only=id_only,
                                       access_args=access_args)

    def find_resources_ext(self,
                           restype="",
                           lcstate="",
                           name="",
                           keyword=None,
                           nested_type=None,
                           attr_name=None,
                           attr_value=None,
                           alt_id=None,
                           alt_id_ns=None,
                           limit=None,
                           skip=None,
                           descending=None,
                           id_only=True,
                           query=None,
                           access_args=None):
        filter_kwargs = self._get_view_args(
            dict(limit=limit, skip=skip, descending=descending), access_args)
        if query:
            qargs = query["query_args"]
            if id_only is not None:
                qargs["id_only"] = id_only
            if limit is not None and limit != 0:
                qargs["limit"] = limit
            if skip is not None and skip != 0:
                qargs["skip"] = skip
            return self.find_by_query(query, access_args=access_args)
        elif name:
            if lcstate:
                raise BadRequest("find by name does not support lcstate")
            return self.find_res_by_name(name,
                                         restype,
                                         id_only,
                                         filter=filter_kwargs)
        elif keyword:
            return self.find_res_by_keyword(keyword,
                                            restype,
                                            id_only,
                                            filter=filter_kwargs)
        elif alt_id or alt_id_ns:
            return self.find_res_by_alternative_id(alt_id,
                                                   alt_id_ns,
                                                   id_only,
                                                   filter=filter_kwargs)
        elif nested_type:
            return self.find_res_by_nested_type(nested_type,
                                                restype,
                                                id_only,
                                                filter=filter_kwargs)
        elif restype and attr_name:
            return self.find_res_by_attribute(restype,
                                              attr_name,
                                              attr_value,
                                              id_only=id_only,
                                              filter=filter_kwargs)
        elif restype and lcstate:
            return self.find_res_by_lcstate(lcstate,
                                            restype,
                                            id_only,
                                            filter=filter_kwargs)
        elif restype:
            return self.find_res_by_type(restype,
                                         lcstate,
                                         id_only,
                                         filter=filter_kwargs)
        elif lcstate:
            return self.find_res_by_lcstate(lcstate,
                                            restype,
                                            id_only,
                                            filter=filter_kwargs)
        elif not restype and not lcstate and not name:
            return self.find_res_by_type(None,
                                         None,
                                         id_only,
                                         filter=filter_kwargs)

    def find_res_by_type(self,
                         restype,
                         lcstate=None,
                         id_only=False,
                         filter=None):
        log.debug("find_res_by_type(restype=%s, lcstate=%s)", restype, lcstate)
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        if lcstate:
            raise BadRequest(
                'lcstate not supported anymore in find_res_by_type')

        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, name, type_, lcstate FROM " + qual_ds_name
        else:
            query = "SELECT id, name, type_, lcstate, doc FROM " + qual_ds_name
        query_clause = " WHERE lcstate<>'DELETED' "
        query_args = dict(type_=restype, lcstate=lcstate)

        if restype:
            query_clause += "AND type_=%(type_)s"
        else:
            # Returns ALL documents, only limited by filter
            query_clause = ""

        query_clause = self._add_access_filter(filter, qual_ds_name,
                                               query_clause, query_args)
        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [
            dict(id=self._prep_id(row[0]), name=row[1], type=row[2])
            for row in rows
        ]
        log.debug("find_res_by_type() found %s objects", len(res_assocs))
        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_lcstate(self,
                            lcstate,
                            restype=None,
                            id_only=False,
                            filter=None):
        log.debug("find_res_by_lcstate(lcstate=%s, restype=%s)", lcstate,
                  restype)
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        if '_' in lcstate:
            log.warn("Search for compound lcstate restricted to maturity: %s",
                     lcstate)
            lcstate, _ = lcstate.split("_", 1)
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, name, type_, lcstate, availability FROM " + qual_ds_name
        else:
            query = "SELECT id, name, type_, lcstate, availability, doc FROM " + qual_ds_name
        query_clause = " WHERE "
        query_args = dict(type_=restype, lcstate=lcstate)

        is_maturity = lcstate not in AvailabilityStates
        if is_maturity:
            query_clause += "lcstate=%(lcstate)s"
        else:
            query_clause += "availability=%(lcstate)s"

        if restype:
            query_clause += " AND type_=%(type_)s"

        query_clause = self._add_access_filter(filter, qual_ds_name,
                                               query_clause, query_args)
        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [
            dict(id=self._prep_id(row[0]),
                 name=row[1],
                 type=row[2],
                 lcstate=row[3] if is_maturity else row[4]) for row in rows
        ]
        log.debug("find_res_by_lcstate() found %s objects", len(res_assocs))
        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_name(self, name, restype=None, id_only=False, filter=None):
        log.debug("find_res_by_name(name=%s, restype=%s)", name, restype)
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, name, type_ FROM " + qual_ds_name
        else:
            query = "SELECT id, name, type_, doc FROM " + qual_ds_name
        query_clause = " WHERE lcstate<>'DELETED' "
        query_args = dict(name=name, type_=restype)

        query_clause += "AND name=%(name)s"
        if restype:
            query_clause += " AND type_=%(type_)s"

        query_clause = self._add_access_filter(filter, qual_ds_name,
                                               query_clause, query_args)
        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [
            dict(id=self._prep_id(row[0]), name=row[1], type=row[2])
            for row in rows
        ]
        log.debug("find_res_by_name() found %s objects", len(res_assocs))

        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_keyword(self,
                            keyword,
                            restype=None,
                            id_only=False,
                            filter=None):
        log.debug("find_res_by_keyword(keyword=%s, restype=%s)", keyword,
                  restype)
        if not keyword or type(keyword) is not str:
            raise BadRequest('Argument keyword illegal')
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, type_ FROM " + qual_ds_name
        else:
            query = "SELECT id, type_, doc FROM " + qual_ds_name
        query_clause = " WHERE lcstate<>'DELETED' "
        query_args = dict(type_=restype, kw=[keyword])

        query_clause += "AND %(kw)s <@ json_keywords(doc)"
        if restype:
            query_clause += " AND type_=%(type_)s"

        query_clause = self._add_access_filter(filter, qual_ds_name,
                                               query_clause, query_args)
        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [
            dict(id=self._prep_id(row[0]), type=row[1], keyword=keyword)
            for row in rows
        ]
        log.debug("find_res_by_keyword() found %s objects", len(res_assocs))
        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_nested_type(self,
                                nested_type,
                                restype=None,
                                id_only=False,
                                filter=None):
        log.debug("find_res_by_nested_type(nested_type=%s, restype=%s)",
                  nested_type, restype)
        if not nested_type or type(nested_type) is not str:
            raise BadRequest('Argument nested_type illegal')
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, type_ FROM " + qual_ds_name
        else:
            query = "SELECT id, type_, doc FROM " + qual_ds_name
        query_clause = " WHERE lcstate<>'DELETED' "
        query_args = dict(type_=restype, nest=[nested_type])

        query_clause += "AND %(nest)s <@ json_nested(doc)"
        if restype:
            query_clause += " AND type_=%(type_)s"

        query_clause = self._add_access_filter(filter, qual_ds_name,
                                               query_clause, query_args)
        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [
            dict(id=self._prep_id(row[0]),
                 type=row[1],
                 nested_type=nested_type) for row in rows
        ]
        log.debug("find_res_by_nested_type() found %s objects",
                  len(res_assocs))
        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_attribute(self,
                              restype,
                              attr_name,
                              attr_value=None,
                              id_only=False,
                              filter=None):
        log.debug(
            "find_res_by_attribute(restype=%s, attr_name=%s, attr_value=%s)",
            restype, attr_name, attr_value)
        if not attr_name or type(attr_name) is not str:
            raise BadRequest('Argument attr_name illegal')
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()
        if id_only:
            query = "SELECT id, type_, json_specialattr(doc) FROM " + qual_ds_name
        else:
            query = "SELECT id, type_, json_specialattr(doc), doc FROM " + qual_ds_name
        query_clause = " WHERE lcstate<>'DELETED' "
        query_args = dict(type_=restype, att=attr_name, val=attr_value)

        if attr_value:  # Note: cannot make None test here (and allow empty string because of default service args "")
            query_clause += "AND json_specialattr(doc)=%(spc)s"
            query_args['spc'] = "%s=%s" % (attr_name, attr_value)
        else:
            query_clause += "AND json_specialattr(doc) LIKE %(spc)s"
            query_args['spc'] = "%s=%%" % (attr_name, )
        if restype:
            query_clause += " AND type_=%(type_)s"

        query_clause = self._add_access_filter(filter, qual_ds_name,
                                               query_clause, query_args)
        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        res_assocs = [
            dict(id=self._prep_id(row[0]),
                 type=row[1],
                 attr_name=attr_name,
                 attr_value=row[2].split("=", 1)[-1]) for row in rows
        ]
        log.debug("find_res_by_attribute() found %s objects", len(res_assocs))
        return self._prepare_find_return(rows, res_assocs, id_only=id_only)

    def find_res_by_alternative_id(self,
                                   alt_id=None,
                                   alt_id_ns=None,
                                   id_only=False,
                                   filter=None):
        log.debug("find_res_by_alternative_id(restype=%s, alt_id_ns=%s)",
                  alt_id, alt_id_ns)
        if alt_id and type(alt_id) is not str:
            raise BadRequest('Argument alt_id illegal')
        if alt_id_ns and type(alt_id_ns) is not str:
            raise BadRequest('Argument alt_id_ns illegal')
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' %
                             type(id_only))
        filter = filter if filter is not None else {}
        qual_ds_name = self._get_datastore_name()

        query = "SELECT id, type_, doc FROM " + qual_ds_name
        query_args = dict(aid=[alt_id], ans=[alt_id_ns])
        query_clause = " WHERE lcstate<>'DELETED' "

        if not alt_id and not alt_id_ns:
            query_clause += "AND json_altids_ns(doc) is not null"
        elif alt_id and not alt_id_ns:
            query_clause += "AND %(aid)s <@ json_altids_id(doc)"
        elif alt_id_ns and not alt_id:
            query_clause += "AND %(ans)s <@ json_altids_ns(doc)"
        else:
            query_clause += "AND %(aid)s <@ json_altids_id(doc) AND %(ans)s <@ json_altids_ns(doc)"

        query_clause = self._add_access_filter(filter, qual_ds_name,
                                               query_clause, query_args)
        extra_clause = filter.get("extra_clause", "")
        with self.pool.cursor(**self.cursor_args) as cur:
            cur.execute(query + query_clause + extra_clause, query_args)
            rows = cur.fetchall()

        # Need to fake the return format of the Couch view for alt_ids. One record per alt_id, not one per resource.
        res_assocs = []
        res_rows = []
        for row in rows:
            doc_id = self._prep_id(row[0])
            doc = row[-1]
            for aid in doc.get("alt_ids", []):
                aid_parts = aid.split(":", 1)
                aid_ns = aid_parts[0] if len(aid_parts) > 1 else "_"
                aid_id = aid_parts[-1]
                if alt_id_ns and alt_id:
                    if alt_id_ns == aid_ns and alt_id == aid_id:
                        res_assocs.append(
                            dict(id=doc_id, alt_id_ns=aid_ns, alt_id=aid_id))
                        res_rows.append((doc_id, doc))
                elif (not alt_id_ns
                      and not alt_id) or (alt_id_ns and alt_id_ns
                                          == aid_ns) or (alt_id
                                                         and alt_id == aid_id):
                    res_assocs.append(
                        dict(id=doc_id, alt_id_ns=aid_ns, alt_id=aid_id))
                    res_rows.append((doc_id, doc))

        log.debug("find_res_by_alternative_id() found %s objects",
                  len(res_assocs))
        return self._prepare_find_return(res_rows, res_assocs, id_only=id_only)

    def find_by_view(self,
                     design_name,
                     view_name,
                     key=None,
                     keys=None,
                     start_key=None,
                     end_key=None,
                     id_only=True,
                     convert_doc=True,
                     **kwargs):
        """
        Generic find function using a defined index
        @param design_name  design document
        @param view_name  view name
        @param key  specific key to find
        @param keys  list of keys to find
        @param start_key  find range start value
        @param end_key  find range end value
        @param id_only  if True, the 4th element of each triple is the document
        @param convert_doc  if True, make IonObject out of doc
        @retval Returns a list of 3-tuples: (document id, index key, index value or document)
        """
        res_rows = self.find_docs_by_view(design_name=design_name,
                                          view_name=view_name,
                                          key=key,
                                          keys=keys,
                                          start_key=start_key,
                                          end_key=end_key,
                                          id_only=id_only,
                                          **kwargs)

        res_rows = [(rid, key, self._persistence_dict_to_ion_object(doc)
                     if convert_doc and isinstance(doc, dict) else doc)
                    for rid, key, doc in res_rows]

        log.debug("find_by_view() found %s objects" % (len(res_rows)))
        return res_rows

    def find_by_query(self, query, access_args=None):
        """
        Find resources given a datastore query expression dict.
        @param query  a dict representation of a datastore query
        @retval  list of resource ids or resource objects matching query (dependent on id_only value)
        """
        qual_ds_name = self._get_datastore_name()
        query_ds_sub = query["query_args"].get("ds_sub", None)
        query_format = query["query_args"].get("format", "")

        pqb = PostgresQueryBuilder(query, qual_ds_name)
        if self.profile == DataStore.DS_PROFILE.RESOURCES and not query_ds_sub:
            table_alias = qual_ds_name if query_format != "complex" else "base"
            pqb.where = self._add_access_filter(access_args,
                                                qual_ds_name,
                                                pqb.where,
                                                pqb.values,
                                                add_where=False,
                                                tablealias=table_alias)

        if self.profile == DataStore.DS_PROFILE.RESOURCES:
            pqb.where = self._add_deleted_filter(
                pqb.table_aliases[0],
                query_ds_sub,
                pqb.where,
                pqb.values,
                with_deleted=query["query_args"].get("with_deleted",
                                                     False) is True)

        with self.pool.cursor(**self.cursor_args) as cur:
            exec_query = pqb.get_query()
            cur.execute(exec_query, pqb.get_values())
            rows = cur.fetchall()
            log.info("find_by_query() QUERY: %s (%s rows)", cur.query,
                     cur.rowcount)
            query_res = {}
            query["_result"] = query_res
            query_res["statement_gen"] = exec_query
            query_res["statement_sql"] = cur.query
            query_res["rowcount"] = cur.rowcount

        id_only = query["query_args"].get("id_only", True)
        if query_format == "complex" and pqb.has_basic_cols:
            # Return format is list of lists
            if id_only:
                res_vals = [[self._prep_id(row[0])] + list(row[1:])
                            for row in rows]
            else:
                res_vals = [[self._persistence_dict_to_ion_object(row[1])] +
                            list(rows[2:]) for row in rows]

        elif query_format == "complex":
            res_vals = [list(row) for row in rows]

        else:
            if id_only:
                res_vals = [self._prep_id(row[0]) for row in rows]
            else:
                res_vals = [
                    self._persistence_dict_to_ion_object(row[-1])
                    for row in rows
                ]

        return res_vals

    # -------------------------------------------------------------------------
    # Internal operations

    def _ion_object_to_persistence_dict(self, ion_object):
        if ion_object is None:
            return None

        obj_dict = self._io_serializer.serialize(ion_object,
                                                 update_version=True)
        return obj_dict

    def _persistence_dict_to_ion_object(self, obj_dict):
        if obj_dict is None:
            return None

        ion_object = self._io_deserializer.deserialize(obj_dict)
        return ion_object
Example #47
0
class MockDB_DataStore(DataStore):
    """
    Data store implementation utilizing in-memory dict of dicts
    to persist documents.
    """

    def __init__(self, datastore_name='prototype'):
        self.datastore_name = datastore_name
        log.debug('Creating in-memory dict of dicts that will simulate data stores')
        self.root = {}

        # serializers
        self._io_serializer     = IonObjectSerializer()
        self._io_deserializer   = IonObjectDeserializer(obj_registry=get_obj_registry())

    def create_datastore(self, datastore_name="", create_indexes=True):
        if not datastore_name:
            datastore_name = self.datastore_name
        log.info('Creating data store %s' % datastore_name)
        if self.datastore_exists(datastore_name):
            raise BadRequest("Data store with name %s already exists" % datastore_name)
        if datastore_name not in self.root:
            self.root[datastore_name] = {}

    def delete_datastore(self, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        log.info('Deleting data store %s' % datastore_name)
        if datastore_name in self.root:
            del self.root[datastore_name]
        else:
            log.info('Data store %s does not exist' % datastore_name)

    def list_datastores(self):
        log.debug('Listing all data stores')
        dsList = self.root.keys()
        log.debug('Data stores: %s' % str(dsList))
        return dsList

    def info_datastore(self, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        log.debug('Listing information about data store %s' % datastore_name)
        if datastore_name in self.root:
            info = 'Data store exists'
        else:
            raise BadRequest("Data store with name %s does not exist" % datastore_name)
        log.debug('Data store info: %s' % str(info))
        return info

    def datastore_exists(self, datastore_name=""):
        return datastore_name in self.root

    def list_objects(self, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        log.debug('Listing all objects in data store %s' % datastore_name)
        objs = []
        for key, value in self.root[datastore_name].items():
            if key.find('_version_counter') == -1 and key.find('_version_') == -1:
                objs.append(key)
        log.debug('Objects: %s' % str(objs))
        return objs

    def list_object_revisions(self, object_id, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        log.debug('Listing all versions of object %s/%s' % (datastore_name, str(object_id)))
        res = []
        for key, value in self.root[datastore_name].items():
            if (key.find('_version_counter') == -1
                and (key.find(object_id + '_version_') == 0)):
                res.append(key)
        log.debug('Versions: %s' % str(res))
        return res

    def create(self, obj, object_id=None, datastore_name=""):
        if not isinstance(obj, IonObjectBase):
            raise BadRequest("Obj param is not instance of IonObjectBase")
        return self.create_doc(self._ion_object_to_persistence_dict(obj),
                               object_id=object_id, datastore_name=datastore_name)

    def create_doc(self, doc, object_id=None, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        if '_id' in doc:
            raise BadRequest("Doc must not have '_id'")
        if '_rev' in doc:
            raise BadRequest("Doc must not have '_rev'")
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        if object_id:
            if object_id in datastore_dict:
                raise BadRequest("Object with id %s already exist" % object_id)

        # Assign an id to doc
        doc["_id"] = object_id or uuid4().hex
        object_id = doc["_id"]

        log.debug('Creating new object %s/%s' % (datastore_name, object_id))

        # Create key for version counter entry.  Will be used
        # on update to increment version number easily.
        version_counter_key = '__' + object_id + '_version_counter'
        version_counter = 1

        # Assign initial version to doc
        doc["_rev"] = str(version_counter)

        # Write HEAD, version and version counter dicts
        datastore_dict[object_id] = doc
        datastore_dict[version_counter_key] = version_counter
        datastore_dict[object_id + '_version_' + str(version_counter)] = doc

        # Return list that identifies the id of the new doc and its version
        res = [object_id, str(version_counter)]
        log.debug('Create result: %s' % str(res))
        return res

    def create_mult(self, objects, object_ids=None):
        if any([not isinstance(obj, IonObjectBase) for obj in objects]):
            raise BadRequest("Obj param is not instance of IonObjectBase")
        return self.create_doc_mult([self._ion_object_to_persistence_dict(obj) for obj in objects],
                                    object_ids)

    def create_doc_mult(self, docs, object_ids=None):
        if any(["_id" in doc for doc in docs]):
            raise BadRequest("Docs must not have '_id'")
        if any(["_rev" in doc for doc in docs]):
            raise BadRequest("Docs must not have '_rev'")
        if object_ids and len(object_ids) != len(docs):
            raise BadRequest("Invalid object_ids")

        # Assign an id to doc (recommended in CouchDB documentation)
        object_ids = object_ids or [uuid4().hex for i in xrange(len(docs))]

        res = []
        for doc, oid in zip(docs, object_ids):
            oid,rev = self.create_doc(doc, oid)
            res.append((True,oid,rev))
        return res

    def read(self, object_id, rev_id="", datastore_name=""):
        if not isinstance(object_id, str):
            raise BadRequest("Object id param is not string")
        doc = self.read_doc(object_id, rev_id, datastore_name)

        # Convert doc into Ion object
        obj = self._persistence_dict_to_ion_object(doc)
        log.debug('Ion object: %s' % str(obj))
        return obj

    def read_doc(self, object_id, rev_id="", datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        try:
            key = object_id
            if rev_id != None and rev_id != "":
                log.debug('Reading version %s of object %s/%s' % (str(rev_id), datastore_name, str(object_id)))
                key += '_version_' + str(rev_id)
            else:
                log.debug('Reading head version of object %s/%s' % (datastore_name, str(object_id)))
            doc = datastore_dict[key]
        except KeyError:
            raise NotFound('Object with id %s does not exist.' % str(object_id))
        log.debug('Read result: %s' % str(doc))
        return doc

    def read_mult(self, object_ids, datastore_name=""):
        if any([not isinstance(object_id, str) for object_id in object_ids]):
            raise BadRequest("Object id param is not string")
        docs = self.read_doc_mult(object_ids, datastore_name)
        # Convert docs into Ion objects
        obj_list = [self._persistence_dict_to_ion_object(doc) for doc in docs]
        return obj_list

    def read_doc_mult(self, object_ids, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        doc_list = []
        try:
            for object_id in object_ids:
                log.debug('Reading head version of object %s/%s' % (datastore_name, str(object_id)))
                doc = datastore_dict[object_id]

                doc_list.append(doc.copy())
        except KeyError:
            raise NotFound('Object with id %s does not exist.' % str(object_id))
        return doc_list

    def update(self, obj, datastore_name=""):
        if not isinstance(obj, IonObjectBase):
            raise BadRequest("Obj param is not instance of IonObjectBase")
        return self.update_doc(self._ion_object_to_persistence_dict(obj))

    def update_doc(self, doc, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        if '_id' not in doc:
            raise BadRequest("Doc must have '_id'")
        if '_rev' not in doc:
            raise BadRequest("Doc must have '_rev'")
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        try:
            object_id = doc["_id"]

            # Find the next doc version
            version_counter_key = '__' + object_id + '_version_counter'
            baseVersion = doc["_rev"]
            version_counter = datastore_dict[version_counter_key] + 1
            if baseVersion != str(version_counter - 1):
                raise Conflict('Object not based on most current version')
        except KeyError:
            raise BadRequest("Object missing required _id and/or _rev values")

        log.debug('Saving new version of object %s/%s' % (datastore_name, doc["_id"]))
        doc["_rev"] = str(version_counter)

        # Overwrite HEAD and version counter dicts, add new version dict
        datastore_dict[object_id] = doc
        datastore_dict[version_counter_key] = version_counter
        datastore_dict[object_id + '_version_' + str(version_counter)] = doc
        res = [object_id, str(version_counter)]
        log.debug('Update result: %s' % str(res))
        return res

    def delete(self, obj, datastore_name=""):
        if not isinstance(obj, IonObjectBase) and not isinstance(obj, str):
            raise BadRequest("Obj param is not instance of IonObjectBase or string id")
        if type(obj) is str:
            return self.delete_doc(obj, datastore_name=datastore_name)
        return self.delete_doc(self._ion_object_to_persistence_dict(obj), datastore_name=datastore_name)

    def delete_doc(self, doc, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        if type(doc) is str:
            object_id = doc
        else:
            object_id = doc["_id"]
        
        log.info('Deleting object %s/%s' % (datastore_name, object_id))
        if object_id in datastore_dict.keys():

            if self._is_in_association(object_id, datastore_name):
                obj = self.read(object_id, "", datastore_name)
                log.warn("XXXXXXX Attempt to delete object %s that still has associations" % str(obj))
#                raise BadRequest("Object cannot be deleted until associations are broken")

            # Find all version dicts and delete them
            for key in datastore_dict.keys():
                if key.find(object_id + '_version_') == 0:
                    del datastore_dict[key]
            # Delete the HEAD dict
            del datastore_dict[object_id]
            # Delete the version counter dict
            del datastore_dict['__' + object_id + '_version_counter']
        else:
            raise NotFound('Object with id ' + object_id + ' does not exist.')
        log.info('Delete result: True')

    def _is_in_association(self, obj_id, datastore_name=""):
        log.debug("_is_in_association(%s)" % obj_id)
        if not obj_id:
            raise BadRequest("Must provide object id")

        if not datastore_name:
            datastore_name = self.datastore_name
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        for objname,obj in datastore_dict.iteritems():
            if (objname.find('_version_')>0) or (not type(obj) is dict): continue
            if 'type_' in obj and obj['type_'] == "Association":
                association = obj
                if association["s"] == obj_id or association["o"] == obj_id:
                    log.debug("association found(%s)" % association)
                    return True
        return False

    def find_objects(self, subject, predicate=None, object_type=None, id_only=False):
        log.debug("find_objects(subject=%s, predicate=%s, object_type=%s, id_only=%s" % (subject, predicate, object_type, id_only))
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        if not subject:
            raise BadRequest("Must provide subject")
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + self.datastore_name + ' does not exist.')

        if type(subject) is str:
            subject_id = subject
        else:
            if "_id" not in subject:
                raise BadRequest("Object id not available in subject")
            else:
                subject_id = subject._id
        assoc_list = []
        target_id_list = []
        target_list = []
        for objname,obj in datastore_dict.iteritems():
            if (objname.find('_version_')>0) or (not type(obj) is dict): continue
            if 'type_' in obj and obj['type_'] == "Association":
                if obj['s'] == subject_id:
                    if predicate and obj['p'] == predicate:
                        if (object_type and obj['ot'] == object_type) or not object_type:
                            assoc_list.append(obj)
                            target_id_list.append(obj['o'])
                            target_list.append(self.read(obj['o']))
                    elif not predicate:
                        assoc_list.append(obj)
                        target_id_list.append(obj['o'])
                        target_list.append(self.read(obj['o']))

        log.debug("find_objects() found %s objects" % (len(target_list)))
        if id_only:
            return (target_id_list, assoc_list)
        else:
            return (target_list, assoc_list)

    def find_subjects(self, subject_type=None, predicate=None, obj=None, id_only=False):
        log.debug("find_subjects(subject_type=%s, predicate=%s, object=%s, id_only=%s" % (subject_type, predicate, obj, id_only))
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        if not obj:
            raise BadRequest("Must provide object")
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + self.datastore_name + ' does not exist.')

        if type(obj) is str:
            object_id = obj
        else:
            if "_id" not in obj:
                raise BadRequest("Object id not available in object")
            else:
                object_id = obj._id
        assoc_list = []
        target_id_list = []
        target_list = []
        for objname,obj in datastore_dict.iteritems():
            if (objname.find('_version_')>0) or (not type(obj) is dict): continue
            if 'type_' in obj and obj['type_'] == "Association":
                if obj['o'] == object_id:
                    if predicate and obj['p'] == predicate:
                        if (subject_type and obj['st'] == subject_type) or not subject_type:
                            assoc_list.append(obj)
                            target_id_list.append(obj['s'])
                            target_list.append(self.read(obj['s']))
                    elif not predicate:
                        assoc_list.append(obj)
                        target_id_list.append(obj['s'])
                        target_list.append(self.read(obj['s']))

        log.debug("find_subjects() found %s subjects" % (len(target_list)))
        if id_only:
            return (target_id_list, assoc_list)
        else:
            return (target_list, assoc_list)

    def find_associations(self, subject=None, predicate=None, obj=None, assoc_type=None, id_only=True):
        log.debug("find_associations(subject=%s, predicate=%s, object=%s, assoc_type=%s)" % (subject, predicate, obj, assoc_type))
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        if subject and obj or predicate:
            pass
        else:
            raise BadRequest("Illegal parameters")
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + self.datastore_name + ' does not exist.')

        if subject and obj:
            if type(subject) is str:
                subject_id = subject
            else:
                if "_id" not in subject:
                    raise BadRequest("Object id not available in subject")
                else:
                    subject_id = subject._id
            if type(obj) is str:
                object_id = obj
            else:
                if "_id" not in obj:
                    raise BadRequest("Object id not available in object")
                else:
                    object_id = obj._id
            target_list = []
            for objname,obj in datastore_dict.iteritems():
                if (objname.find('_version_')>0) or (not type(obj) is dict): continue
                if 'type_' in obj and obj['type_'] == "Association":
                    if obj['s'] == subject_id and obj['o'] == object_id:
                        if assoc_type:
                            if obj['at'] == assoc_type:
                                target_list.append(obj)
                        else:
                            target_list.append(obj)
        else:
            target_list = []
            for objname,obj in datastore_dict.iteritems():
                if (objname.find('_version_')>0) or (not type(obj) is dict): continue
                if 'type_' in obj and obj['type_'] == "Association":
                    if obj['p'] == predicate:
                        target_list.append(obj)

        if id_only:
            assocs = [row['_id'] for row in target_list]
        else:
            assocs = [self._persistence_dict_to_ion_object(row) for row in target_list]
        log.debug("find_associations() found %s associations" % (len(assocs)))
        return assocs
        
    def find_res_by_type(self, restype, lcstate=None, id_only=False):
        log.debug("find_res_by_type(restype=%s, lcstate=%s)" % (restype, lcstate))
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + self.datastore_name + ' does not exist.')

        assoc_list = []
        target_id_list = []
        target_list = []
        for objname,obj in datastore_dict.iteritems():
            if (objname.find('_version_')>0) or (not type(obj) is dict): continue
            if 'type_' in obj and (obj['type_'] == restype or (not restype and obj['type_'] != "Association")):
                if (lcstate and 'lcstate' in obj and obj['lcstate'] == lcstate) or not lcstate or not restype:
                    target_id_list.append(obj['_id'])
                    target_list.append(self._persistence_dict_to_ion_object(obj))
                    assoc_list.append([])

        log.debug("find_res_by_type() found %s resources" % (len(target_list)))
        if id_only:
            return (target_id_list, assoc_list)
        else:
            return (target_list, assoc_list)

    def find_res_by_lcstate(self, lcstate, restype=None, id_only=False):
        log.debug("find_res_by_type(lcstate=%s, restype=%s)" % (lcstate, restype))
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + self.datastore_name + ' does not exist.')

        if lcstate in CommonResourceLifeCycleSM.STATE_ALIASES:
            lcstate_match = CommonResourceLifeCycleSM.STATE_ALIASES[lcstate]
        else:
            lcstate_match = [lcstate]
        assoc_list = []
        target_id_list = []
        target_list = []
        for objname,obj in datastore_dict.iteritems():
            if (objname.find('_version_')>0) or (not type(obj) is dict): continue
            if 'lcstate' in obj and obj['lcstate'] in lcstate_match:
                if (restype and obj['type_'] == restype) or not restype:
                    target_id_list.append(obj['_id'])
                    target_list.append(self._persistence_dict_to_ion_object(obj))
                    assoc_list.append([])

        log.debug("find_res_by_lcstate() found %s resources" % (len(target_list)))
        if id_only:
            return (target_id_list, assoc_list)
        else:
            return (target_list, assoc_list)

    def _pass(self):
        pass

    def find_res_by_name(self, name, restype=None, id_only=False):
        log.debug("find_res_by_name(name=%s, restype=%s)" % (name, restype))
        if type(id_only) is not bool:
            raise BadRequest('id_only must be type bool, not %s' % type(id_only))
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + self.datastore_name + ' does not exist.')

        assoc_list = []
        target_id_list = []
        target_list = []
        for objname,obj in datastore_dict.iteritems():
            if (objname.find('_version_')>0) or (not type(obj) is dict): continue
            if 'name' in obj and obj['name'] == name:
                if (restype and obj['type_'] == restype) or not restype:
                    target_id_list.append(obj['_id'])
                    target_list.append(self._persistence_dict_to_ion_object(obj))
                    assoc_list.append([])

        log.debug("find_res_by_name() found %s resources" % (len(target_list)))
        if id_only:
            return (target_id_list, assoc_list)
        else:
            return (target_list, assoc_list)

    def find_dir_entries(self, qname):
        raise NotImplementedError()

    def _ion_object_to_persistence_dict(self, ion_object):
        if ion_object is None: return None

        obj_dict = self._io_serializer.serialize(ion_object)
        return obj_dict

    def _persistence_dict_to_ion_object(self, obj_dict):
        if obj_dict is None: return None

        ion_object = self._io_deserializer.deserialize(obj_dict)
        return ion_object
Example #48
0
 def __init__(self):
     Interceptor.__init__(self)
     self._io_serializer = IonObjectSerializer()
     self._io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry())
    def __init__(self):
        BaseTransformManagementService.__init__(self)

        self.serializer = IonObjectSerializer()
Example #50
0
class FileDataStore(object):
    def __init__(self, container, datastore_name=""):
        self.container = container
        self.datastore_name = datastore_name

        # Object serialization/deserialization
        self._io_serializer = IonObjectSerializer()
        self._io_deserializer = IonObjectDeserializer(
            obj_registry=get_obj_registry())

    def start(self):
        if self.container.has_capability(self.container.CCAP.FILE_SYSTEM):
            self.datastore_dir = FileSystem.get_url(FS.FILESTORE,
                                                    self.datastore_name)
        else:
            self.datastore_dir = "./tmp/%s" % self.datastore_name

    def stop(self):
        pass

    def _get_filename(self, object_id):
        return "%s/%s" % (self.datastore_dir, object_id)

    def create(self, obj, object_id=None, attachments=None, datastore_name=""):
        """
        Converts ion objects to python dictionary before persisting them using the optional
        suggested identifier and creates attachments to the object.
        Returns an identifier and revision number of the object
        """
        if not isinstance(obj, IonObjectBase):
            raise BadRequest("Obj param is not instance of IonObjectBase")

        return self.create_doc(self._ion_object_to_persistence_dict(obj),
                               object_id=object_id,
                               datastore_name=datastore_name,
                               attachments=attachments)

    def create_doc(self,
                   doc,
                   object_id=None,
                   attachments=None,
                   datastore_name=""):
        """
        Persists the document using the optionally suggested doc_id, and creates attachments to it.
        Returns the identifier and version number of the document
        """
        if '_id' in doc:
            raise BadRequest("Doc must not have '_id'")

        # Assign an id to doc (recommended in CouchDB documentation)
        doc["_id"] = object_id or uuid4().hex
        log.debug('Creating new object %s/%s' % (datastore_name, doc["_id"]))
        log.debug('create doc contents: %s', doc)

        filename = self._get_filename(doc["_id"])
        doc_json = json.dumps(doc)
        with open(filename, "w") as f:
            f.write(doc_json)

        return doc["_id"], 1

    def update(self, obj, datastore_name=""):
        if not isinstance(obj, IonObjectBase):
            raise BadRequest("Obj param is not instance of IonObjectBase")
        return self.update_doc(self._ion_object_to_persistence_dict(obj))

    def update_doc(self, doc, datastore_name=""):
        if '_id' not in doc:
            raise BadRequest("Doc must have '_id'")

        log.debug('update doc contents: %s', doc)
        filename = self._get_filename(doc["_id"])
        doc_json = json.dumps(doc)
        with open(filename, "w") as f:
            f.write(doc_json)

        return doc["_id"], 2

    def read(self, object_id, rev_id="", datastore_name=""):
        if not isinstance(object_id, str):
            raise BadRequest("Object id param is not string")
        doc = self.read_doc(object_id, rev_id, datastore_name)

        # Convert doc into Ion object
        obj = self._persistence_dict_to_ion_object(doc)
        log.debug('Ion object: %s', str(obj))
        return obj

    def read_doc(self, doc_id, rev_id="", datastore_name=""):
        log.debug('Reading head version of object %s/%s', datastore_name,
                  doc_id)
        filename = self._get_filename(doc_id)
        doc = None
        with open(filename, "r") as f:
            doc_json = f.read()
            doc = json.loads(doc_json)
        if doc is None:
            raise NotFound('Object with id %s does not exist.' % str(doc_id))
        log.debug('read doc contents: %s', doc)
        return doc

    def delete(self, obj, datastore_name="", del_associations=False):
        if not isinstance(obj, IonObjectBase) and not isinstance(obj, str):
            raise BadRequest(
                "Obj param is not instance of IonObjectBase or string id")
        if type(obj) is str:
            self.delete_doc(obj,
                            datastore_name=datastore_name,
                            del_associations=del_associations)
        else:
            if '_id' not in obj:
                raise BadRequest("Doc must have '_id'")
            self.delete_doc(self._ion_object_to_persistence_dict(obj),
                            datastore_name=datastore_name,
                            del_associations=del_associations)

    def delete_doc(self, doc, datastore_name="", del_associations=False):
        doc_id = doc if type(doc) is str else doc["_id"]
        log.debug('Deleting object %s/%s', datastore_name, doc_id)
        filename = self._get_filename(doc_id)

        try:
            os.remove(filename)
        except OSError:
            raise NotFound('Object with id %s does not exist.' % doc_id)

    def _ion_object_to_persistence_dict(self, ion_object):
        if ion_object is None: return None

        obj_dict = self._io_serializer.serialize(ion_object)
        return obj_dict

    def _persistence_dict_to_ion_object(self, obj_dict):
        if obj_dict is None: return None

        ion_object = self._io_deserializer.deserialize(obj_dict)
        return ion_object
Example #51
0
class MockDB_DataStore(DataStore):
    """
    Data store implementation utilizing in-memory dict of dicts
    to persist documents.
    """

    def __init__(self, datastore_name='prototype'):
        self.datastore_name = datastore_name
        log.debug('Creating in-memory dict of dicts that will simulate data stores')
        self.root = {}

        # serializers
        self._io_serializer     = IonObjectSerializer()
        self._io_deserializer   = IonObjectDeserializer(obj_registry=obj_registry)

    def create_datastore(self, datastore_name="", create_indexes=True):
        if not datastore_name:
            datastore_name = self.datastore_name
        log.info('Creating data store %s' % datastore_name)
        if self.datastore_exists(datastore_name):
            raise BadRequest("Data store with name %s already exists" % datastore_name)
        if datastore_name not in self.root:
            self.root[datastore_name] = {}

    def delete_datastore(self, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        log.info('Deleting data store %s' % datastore_name)
        if datastore_name in self.root:
            del self.root[datastore_name]
        else:
            log.info('Data store %s does not exist' % datastore_name)

    def list_datastores(self):
        log.debug('Listing all data stores')
        dsList = self.root.keys()
        log.debug('Data stores: %s' % str(dsList))
        return dsList

    def info_datastore(self, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        log.debug('Listing information about data store %s' % datastore_name)
        if datastore_name in self.root:
            info = 'Data store exists'
        else:
            raise BadRequest("Data store with name %s does not exist" % datastore_name)
        log.debug('Data store info: %s' % str(info))
        return info

    def datastore_exists(self, datastore_name=""):
        return datastore_name in self.root

    def list_objects(self, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        log.debug('Listing all objects in data store %s' % datastore_name)
        objs = []
        for key, value in self.root[datastore_name].items():
            if key.find('_version_counter') == -1 and key.find('_version_') == -1:
                objs.append(key)
        log.debug('Objects: %s' % str(objs))
        return objs

    def list_object_revisions(self, object_id, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        log.debug('Listing all versions of object %s/%s' % (datastore_name, str(object_id)))
        res = []
        for key, value in self.root[datastore_name].items():
            if (key.find('_version_counter') == -1
                and (key.find(object_id + '_version_') == 0)):
                res.append(key)
        log.debug('Versions: %s' % str(res))
        return res

    def create(self, obj, object_id=None, datastore_name=""):
        if not isinstance(obj, IonObjectBase):
            raise BadRequest("Obj param is not instance of IonObjectBase")
        return self.create_doc(self._ion_object_to_persistence_dict(obj),
                               object_id=object_id, datastore_name=datastore_name)

    def create_doc(self, doc, object_id=None, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        if '_id' in doc:
            raise BadRequest("Doc must not have '_id'")
        if '_rev' in doc:
            raise BadRequest("Doc must not have '_rev'")
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        if object_id:
            if object_id in datastore_dict:
                raise BadRequest("Object with id %s already exist" % object_id)

        # Assign an id to doc
        doc["_id"] = object_id or uuid4().hex
        object_id = doc["_id"]

        log.debug('Creating new object %s/%s' % (datastore_name, object_id))

        # Create key for version counter entry.  Will be used
        # on update to increment version number easily.
        versionCounterKey = '__' + object_id + '_version_counter'
        versionCounter = 1

        # Assign initial version to doc
        doc["_rev"] = str(versionCounter)

        # Write HEAD, version and version counter dicts
        datastore_dict[object_id] = doc
        datastore_dict[versionCounterKey] = versionCounter
        datastore_dict[object_id + '_version_' + str(versionCounter)] = doc

        # Return list that identifies the id of the new doc and its version
        res = [object_id, str(versionCounter)]
        log.debug('Create result: %s' % str(res))
        return res

    def create_mult(self, objects, object_ids=None):
        if any([not isinstance(obj, IonObjectBase) for obj in objects]):
            raise BadRequest("Obj param is not instance of IonObjectBase")
        return self.create_doc_mult([self._ion_object_to_persistence_dict(obj) for obj in objects],
                                    object_ids)

    def create_doc_mult(self, docs, object_ids=None):
        if any(["_id" in doc for doc in docs]):
            raise BadRequest("Docs must not have '_id'")
        if any(["_rev" in doc for doc in docs]):
            raise BadRequest("Docs must not have '_rev'")
        if object_ids and len(object_ids) != len(docs):
            raise BadRequest("Invalid object_ids")

        # Assign an id to doc (recommended in CouchDB documentation)
        object_ids = object_ids or [uuid4().hex for i in xrange(len(docs))]

        res = []
        for doc, oid in zip(docs, object_ids):
            oid,rev = self.create_doc(doc, oid)
            res.append((True,oid,rev))
        return res

    def read(self, object_id, rev_id="", datastore_name=""):
        if not isinstance(object_id, str):
            raise BadRequest("Object id param is not string")
        doc = self.read_doc(object_id, rev_id, datastore_name)

        # Convert doc into Ion object
        obj = self._persistence_dict_to_ion_object(doc)
        log.debug('Ion object: %s' % str(obj))
        return obj

    def read_doc(self, object_id, rev_id="", datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        try:
            key = object_id
            if rev_id == "":
                log.debug('Reading head version of object %s/%s' % (datastore_name, str(object_id)))
            else:
                log.debug('Reading version %s of object %s/%s' % (str(rev_id), datastore_name, str(object_id)))
                key += '_version_' + str(rev_id)
            doc = datastore_dict[key]
        except KeyError:
            raise NotFound('Object with id %s does not exist.' % str(object_id))
        log.debug('Read result: %s' % str(doc))
        return doc

    def read_mult(self, object_ids, datastore_name=""):
        if any([not isinstance(object_id, str) for object_id in object_ids]):
            raise BadRequest("Object id param is not string")
        docs = self.read_doc_mult(object_ids, datastore_name)
        # Convert docs into Ion objects
        obj_list = [self._persistence_dict_to_ion_object(doc) for doc in docs]
        return obj_list

    def read_doc_mult(self, object_ids, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        doc_list = []
        try:
            for object_id in object_ids:
                log.debug('Reading head version of object %s/%s' % (datastore_name, str(object_id)))
                doc = datastore_dict[object_id]

                doc_list.append(doc.copy())
        except KeyError:
            raise NotFound('Object with id %s does not exist.' % str(object_id))
        return doc_list

    def update(self, obj, datastore_name=""):
        if not isinstance(obj, IonObjectBase):
            raise BadRequest("Obj param is not instance of IonObjectBase")
        return self.update_doc(self._ion_object_to_persistence_dict(obj))

    def update_doc(self, doc, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        if '_id' not in doc:
            raise BadRequest("Doc must have '_id'")
        if '_rev' not in doc:
            raise BadRequest("Doc must have '_rev'")
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        try:
            object_id = doc["_id"]

            # Find the next doc version
            versionCounterKey = '__' + object_id + '_version_counter'
            baseVersion = doc["_rev"]
            versionCounter = datastore_dict[versionCounterKey] + 1
            if baseVersion != str(versionCounter - 1):
                raise Conflict('Object not based on most current version')
        except KeyError:
            raise BadRequest("Object missing required _id and/or _rev values")

        log.debug('Saving new version of object %s/%s' % (datastore_name, doc["_id"]))
        doc["_rev"] = str(versionCounter)

        # Overwrite HEAD and version counter dicts, add new version dict
        datastore_dict[object_id] = doc
        datastore_dict[versionCounterKey] = versionCounter
        datastore_dict[object_id + '_version_' + str(versionCounter)] = doc
        res = [object_id, str(versionCounter)]
        log.debug('Update result: %s' % str(res))
        return res

    def delete(self, obj, datastore_name=""):
        if not isinstance(obj, IonObjectBase) and not isinstance(obj, str):
            raise BadRequest("Obj param is not instance of IonObjectBase or string id")
        if type(obj) is str:
            return self.delete_doc(obj, datastore_name=datastore_name)
        return self.delete_doc(self._ion_object_to_persistence_dict(obj), datastore_name=datastore_name)

    def delete_doc(self, doc, datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        if type(doc) is str:
            object_id = doc
        else:
            object_id = doc["_id"]
        
        log.info('Deleting object %s/%s' % (datastore_name, object_id))
        if object_id in datastore_dict.keys():

            if self._is_in_association(object_id, datastore_name):
                obj = self.read(object_id, datastore_name)
                log.warn("XXXXXXX Attempt to delete object %s that still has associations" % str(obj))
#                raise BadRequest("Object cannot be deleted until associations are broken")

            # Find all version dicts and delete them
            for key in datastore_dict.keys():
                if key.find(object_id + '_version_') == 0:
                    del datastore_dict[key]
            # Delete the HEAD dict
            del datastore_dict[object_id]
            # Delete the version counter dict
            del datastore_dict['__' + object_id + '_version_counter']
        else:
            raise NotFound('Object ' + object_id + ' does not exist.')
        log.info('Delete result: True')

    def find(self, criteria=[], datastore_name=""):
        docList = self.find_doc(criteria, datastore_name)

        results = []
        # Convert each returned doc to its associated Ion object
        for doc in docList:
            obj = self._persistence_dict_to_ion_object(doc)
            log.debug('Ion object: %s' % str(obj))
            results.append(obj)

        return results

    def find_doc(self, criteria=[], datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        results = []
        log_string = "Searching for objects matching criteria list: " + str(criteria)
        log.debug(log_string)

        # Traverse entire data store, checking each HEAD version for equality
        # with specified criterion
        for obj_id in self.list_objects(datastore_name):
            try:
                doc = self.read_doc(obj_id, rev_id="", datastore_name=datastore_name)
                log.debug("Doc: %s" % str(doc))
                if len(criteria) == 0:
                    results.append(doc)
                else:
                    criteria_satisfied = False
                    for criterion in criteria:
                        if isinstance(criterion, list):
                            if len(criterion) != 3:
                                raise BadRequest("Insufficient criterion values specified.  Much match [<field>, <logical constant>, <value>]")
                            for item in criterion:
                                if not isinstance(item, str):
                                    raise BadRequest("All criterion values must be strings")
                            key = criterion[0]
                            logical_operation = criterion[1]
                            value = criterion[2]
                            if key in doc:
                                if logical_operation == DataStore.EQUAL:
                                    if doc[key] == value:
                                        criteria_satisfied = True
                                    else:
                                        criteria_satisfied = False
                                elif logical_operation == DataStore.NOT_EQUAL:
                                    if doc[key] != value:
                                        criteria_satisfied = True
                                    else:
                                        criteria_satisfied = False
                                elif logical_operation == DataStore.GREATER_THAN:
                                    if doc[key] > value:
                                        criteria_satisfied = True
                                    else:
                                        criteria_satisfied = False
                                elif logical_operation == DataStore.GREATER_THAN_OR_EQUAL:
                                    if doc[key] >= value:
                                        criteria_satisfied = True
                                    else:
                                        criteria_satisfied = False
                                elif logical_operation == DataStore.LESS_THAN:
                                    if doc[key] < value:
                                        criteria_satisfied = True
                                    else:
                                        criteria_satisfied = False
                                elif logical_operation == DataStore.LESS_THAN_OR_EQUAL:
                                    if doc[key] <= value:
                                        criteria_satisfied = True
                                    else:
                                        criteria_satisfied = False
                        else:
                            if criterion == DataStore.AND:
                                # Can shortcut the query at this point if the
                                # previous criterion failed
                                if criteria_satisfied == False:
                                    break

                    if criteria_satisfied:
                        results.append(doc)
                
            except KeyError:
                pass

        log.debug('Find results: %s' % str(results))

        if len(results) == 0:
            raise NotFound('No objects matched criteria %s' % criteria)

        return results

    def find_by_idref(self, criteria=[], association="", datastore_name=""):
        doc_list = self.find_by_idref_doc(criteria, association, datastore_name)

        results = []
        # Convert each returned doc to its associated Ion object
        for doc in doc_list:
            obj = self._persistence_dict_to_ion_object(doc)
            log.debug('Ion object: %s' % str(obj))
            results.append(obj)

        return results

    def find_by_idref_doc(self, criteria=[], association="", datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        ids = []
        log_string = "Searching for objects matching criteria list: " + str(criteria)
        log.debug(log_string)

        # Traverse entire data store, checking each HEAD version for equality
        # with specified criterion
        for obj_id in self.list_objects(datastore_name):
            try:
                doc = self.read_doc(obj_id, rev_id="", datastore_name=datastore_name)
                log.debug("Doc: %s" % str(doc))
                if len(criteria) == 0:
                    if association in doc:
                        for id in doc[association]:
                            ids.append(id)
                else:
                    criteria_satisfied = False
                    for criterion in criteria:
                        if isinstance(criterion, list):
                            key = criterion[0]
                            logical_operation = criterion[1]
                            value = criterion[2]
                            if key in doc:
                                if logical_operation == DataStore.EQUAL:
                                    if doc[key] == value:
                                        criteria_satisfied = True
                                    else:
                                        criteria_satisfied = False
                                elif logical_operation == DataStore.NOT_EQUAL:
                                    if doc[key] != value:
                                        criteria_satisfied = True
                                    else:
                                        criteria_satisfied = False
                                elif logical_operation == DataStore.GREATER_THAN:
                                    if doc[key] > value:
                                        criteria_satisfied = True
                                    else:
                                        criteria_satisfied = False
                                elif logical_operation == DataStore.GREATER_THAN_OR_EQUAL:
                                    if doc[key] >= value:
                                        criteria_satisfied = True
                                    else:
                                        criteria_satisfied = False
                                elif logical_operation == DataStore.LESS_THAN:
                                    if doc[key] < value:
                                        criteria_satisfied = True
                                    else:
                                        criteria_satisfied = False
                                elif logical_operation == DataStore.LESS_THAN_OR_EQUAL:
                                    if doc[key] <= value:
                                        criteria_satisfied = True
                                    else:
                                        criteria_satisfied = False
                        else:
                            if criterion == DataStore.AND:
                                # Can shortcut the query at this point if the
                                # previous criterion failed
                                if criteria_satisfied == False:
                                    break

                    if criteria_satisfied:
                        if association in doc:
                            for id in doc[association]:
                                ids.append(id)

            except KeyError:
                pass

        results = []
        for id in ids:
            doc = self.read_doc(id, "", datastore_name)
            results.append(doc)

        log.debug('Find results: %s' % str(results))

        if len(results) == 0:
            raise NotFound('No objects matched criteria %s' % criteria)

        return results

    def resolve_idref(self, subject="", predicate="", obj="", datastore_name=""):
        res_list = self.resolve_idref_doc(subject, predicate, obj, datastore_name)

        results = []
        # Convert each returned doc to its associated Ion object
        for item in res_list:
            subject_dict = item[0]
            object_dict = item[2]
            subject = self._persistence_dict_to_ion_object(subject_dict)
            log.debug('Subject Ion object: %s' % str(subject))
            obj = self._persistence_dict_to_ion_object(object_dict)
            log.debug('Object Ion object: %s' % str(obj))
            results.append([subject, item[1], obj])

        return results

    def resolve_idref_doc(self, subject="", predicate="", obj="", datastore_name=""):
        if not datastore_name:
            datastore_name = self.datastore_name
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        if subject == "":
            if predicate == "":
                if obj == "":
                    # throw exception
                    raise BadRequest("Data store query does not specify subject, predicate or object")
                else:
                    # Find all subjects with any association to object
                    object_doc = self.read_doc(obj, "", datastore_name)
                    res = []
                    all_doc_ids = self.list_objects(datastore_name)
                    for subject_doc_id in all_doc_ids:
                        if subject_doc_id == obj:
                            continue
                        subject_doc = self.read_doc(subject_doc_id, "", datastore_name)
                        for key in subject_doc:
                            if isinstance(subject_doc[key], list):
                                if obj in subject_doc[key]:
                                    res.append([subject_doc, key, object_doc])
                            else:
                                if obj == subject_doc[key]:
                                    res.append([subject_doc, key, object_doc])

                    if len(res) == 0:
                        raise NotFound("Data store query for association %s/%s/%s failed" % (subject, predicate, obj))
                    else:
                        return res
            else:
                # Find all subjects with association to object
                object_doc = self.read_doc(obj, "", datastore_name)
                res = []
                all_doc_ids = self.list_objects(datastore_name)
                for subject_doc_id in all_doc_ids:
                    if subject_doc_id == obj:
                        continue
                    subject_doc = self.read_doc(subject_doc_id, "", datastore_name)
                    if predicate in subject_doc:
                        if obj in subject_doc[predicate]:
                            res.append([subject_doc, predicate, object_doc])

                if len(res) == 0:
                    raise NotFound("Data store query for association %s/%s/%s failed" % (subject, predicate, obj))
                else:
                    return res
        else:
            if predicate == "":
                if obj == "":
                    # Find all objects with any association to subject
                    # TODO would need some way to indicate a key is an association predicate
                    pass
                else:
                    # Find all associations between subject and object
                    subject_doc = self.read_doc(subject, "", datastore_name)
                    object_doc = self.read_doc(obj, "", datastore_name)
                    res = []
                    for key in subject_doc:
                        if isinstance(subject_doc[key], list):
                            if obj in subject_doc[key]:
                                res.append([subject_doc, key, object_doc])
                        else:
                            if obj == subject_doc[key]:
                                res.append([subject_doc, key, object_doc])

                    if len(res) == 0:
                        raise NotFound("Data store query for association %s/%s/%s failed" % (subject, predicate, obj))
                    else:
                        return res
            else:
                if obj == "":
                    # Find all associated objects
                    subject_doc = self.read_doc(subject, "", datastore_name)
                    res = []
                    if predicate in subject_doc:
                        for id in subject_doc[predicate]:
                            object_doc = self.read_doc(id, "", datastore_name)
                            res.append([subject_doc, predicate, object_doc])
                        return res
                    raise NotFound("Data store query for association %s/%s/%s failed" % (subject, predicate, obj))
                else:
                    # Determine if association exists
                    subject_doc = self.read_doc(subject, "", datastore_name)
                    object_doc = self.read_doc(obj, "", datastore_name)
                    if predicate in subject_doc:
                        if obj in subject_doc[predicate]:
                            return [[subject_doc, predicate, object_doc]]
                    raise NotFound("Data store query for association %s/%s/%s failed" % (subject, predicate, obj))

    def _is_in_association(self, obj_id, datastore_name=""):
        log.debug("_is_in_association(%s)" % obj_id)
        if not obj_id:
            raise BadRequest("Must provide object id")

        if not datastore_name:
            datastore_name = self.datastore_name
        try:
            datastore_dict = self.root[datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        for objname,obj in datastore_dict.iteritems():
            if (objname.find('_version_')>0) or (not type(obj) is dict): continue
            if 'type_' in obj and obj['type_'] == "Association":
                association = obj
                if association["s"] == obj_id or association["o"] == obj_id:
                    log.debug("association found(%s)" % association)
                    return True
        return False

    def find_objects(self, subject, predicate=None, object_type=None, id_only=False):
        log.debug("find_objects(subject=%s, predicate=%s, object_type=%s, id_only=%s" % (subject, predicate, object_type, id_only))
        if not subject:
            raise BadRequest("Must provide subject")
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        if type(subject) is str:
            subject_id = subject
        else:
            if "_id" not in subject:
                raise BadRequest("Object id not available in subject")
            else:
                subject_id = subject._id
        assoc_list = []
        target_id_list = []
        target_list = []
        for objname,obj in datastore_dict.iteritems():
            if (objname.find('_version_')>0) or (not type(obj) is dict): continue
            if 'type_' in obj and obj['type_'] == "Association":
                if obj['s'] == subject_id:
                    if predicate and obj['p'] == predicate:
                        if (object_type and obj['ot'] == object_type) or not object_type:
                            assoc_list.append(obj)
                            target_id_list.append(obj['o'])
                            target_list.append(self.read(obj['o']))
                    elif not predicate:
                        assoc_list.append(obj)
                        target_id_list.append(obj['o'])
                        target_list.append(self.read(obj['o']))

        log.debug("find_objects() found %s objects" % (len(target_list)))
        if id_only:
            return (target_id_list, assoc_list)
        else:
            return (target_list, assoc_list)

    def find_subjects(self, subject_type=None, predicate=None, obj=None, id_only=False):
        log.debug("find_subjects(subject_type=%s, predicate=%s, object=%s, id_only=%s" % (subject_type, predicate, obj, id_only))
        if not obj:
            raise BadRequest("Must provide object")
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        if type(obj) is str:
            object_id = obj
        else:
            if "_id" not in obj:
                raise BadRequest("Object id not available in object")
            else:
                object_id = obj._id
        assoc_list = []
        target_id_list = []
        target_list = []
        for objname,obj in datastore_dict.iteritems():
            if (objname.find('_version_')>0) or (not type(obj) is dict): continue
            if 'type_' in obj and obj['type_'] == "Association":
                if obj['o'] == object_id:
                    if predicate and obj['p'] == predicate:
                        if (subject_type and obj['st'] == subject_type) or not subject_type:
                            assoc_list.append(obj)
                            target_id_list.append(obj['s'])
                            target_list.append(self.read(obj['s']))
                    elif not predicate:
                        assoc_list.append(obj)
                        target_id_list.append(obj['s'])
                        target_list.append(self.read(obj['s']))

        log.debug("find_subjects() found %s subjects" % (len(target_list)))
        if id_only:
            return (target_id_list, assoc_list)
        else:
            return (target_list, assoc_list)

    def find_associations(self, subject=None, predicate=None, obj=None, id_only=True):
        log.debug("find_associations(subject=%s, predicate=%s, object=%s)" % (subject, predicate, obj))
        if subject and obj or predicate:
            pass
        else:
            raise BadRequest("Illegal parameters")
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        if subject and obj:
            if type(subject) is str:
                subject_id = subject
            else:
                if "_id" not in subject:
                    raise BadRequest("Object id not available")
                else:
                    subject_id = subject._id
            if type(obj) is str:
                object_id = obj
            else:
                if "_id" not in obj:
                    raise BadRequest("Object id not available in object")
                else:
                    object_id = obj._id
            target_list = []
            for objname,obj in datastore_dict.iteritems():
                if (objname.find('_version_')>0) or (not type(obj) is dict): continue
                if 'type_' in obj and obj['type_'] == "Association":
                    if obj['s'] == subject_id and obj['o'] == object_id:
                        target_list.append(obj)
        else:
            target_list = []
            for objname,obj in datastore_dict.iteritems():
                if (objname.find('_version_')>0) or (not type(obj) is dict): continue
                if 'type_' in obj and obj['type_'] == "Association":
                    if obj['p'] == predicate:
                        target_list.append(obj)

        if id_only:
            assocs = [row['_id'] for row in target_list]
        else:
            assocs = [self._persistence_dict_to_ion_object(row) for row in target_list]
        log.debug("find_associations() found %s associations" % (len(assocs)))
        return assocs
        
    def find_res_by_type(self, restype, lcstate=None, id_only=False):
        log.debug("find_res_by_type(restype=%s, lcstate=%s)" % (restype, lcstate))
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        assoc_list = []
        target_id_list = []
        target_list = []
        for objname,obj in datastore_dict.iteritems():
            if (objname.find('_version_')>0) or (not type(obj) is dict): continue
            if 'type_' in obj and (obj['type_'] == restype or (not restype and obj['type_'] != "Association")):
                if (lcstate and 'lcstate' in obj and obj['lcstate'] == lcstate) or not lcstate or not restype:
                    target_id_list.append(obj['_id'])
                    target_list.append(self._persistence_dict_to_ion_object(obj))
                    assoc_list.append([])

        log.debug("find_res_by_type() found %s resources" % (len(target_list)))
        if id_only:
            return (target_id_list, assoc_list)
        else:
            return (target_list, assoc_list)

    def find_res_by_lcstate(self, lcstate, restype=None, id_only=False):
        log.debug("find_res_by_type(lcstate=%s, restype=%s)" % (lcstate, restype))
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        if lcstate in ResourceLifeCycleSM.STATE_ALIASES:
            lcstate_match = ResourceLifeCycleSM.STATE_ALIASES[lcstate]
        else:
            lcstate_match = [lcstate]
        assoc_list = []
        target_id_list = []
        target_list = []
        for objname,obj in datastore_dict.iteritems():
            if (objname.find('_version_')>0) or (not type(obj) is dict): continue
            if 'lcstate' in obj and obj['lcstate'] in lcstate_match:
                if (restype and obj['type_'] == restype) or not restype:
                    target_id_list.append(obj['_id'])
                    target_list.append(self._persistence_dict_to_ion_object(obj))
                    assoc_list.append([])

        log.debug("find_res_by_lcstate() found %s resources" % (len(target_list)))
        if id_only:
            return (target_id_list, assoc_list)
        else:
            return (target_list, assoc_list)

    def _pass(self):
        pass

    def find_res_by_name(self, name, restype=None, id_only=False):
        log.debug("find_res_by_name(name=%s, restype=%s)" % (name, restype))
        try:
            datastore_dict = self.root[self.datastore_name]
        except KeyError:
            raise BadRequest('Data store ' + datastore_name + ' does not exist.')

        assoc_list = []
        target_id_list = []
        target_list = []
        for objname,obj in datastore_dict.iteritems():
            if (objname.find('_version_')>0) or (not type(obj) is dict): continue
            if 'name' in obj and obj['name'] == name:
                if (restype and obj['type_'] == restype) or not restype:
                    target_id_list.append(obj['_id'])
                    target_list.append(self._persistence_dict_to_ion_object(obj))
                    assoc_list.append([])

        log.debug("find_res_by_name() found %s resources" % (len(target_list)))
        if id_only:
            return (target_id_list, assoc_list)
        else:
            return (target_list, assoc_list)

    def find_dir_entries(self, qname):
        raise NotImplementedError()

    def _ion_object_to_persistence_dict(self, ion_object):
        if ion_object is None: return None

        obj_dict = self._io_serializer.serialize(ion_object)
        return obj_dict

    def _persistence_dict_to_ion_object(self, obj_dict):
        if obj_dict is None: return None

        ion_object = self._io_deserializer.deserialize(obj_dict)
        return ion_object
class IngestionManagementService(BaseIngestionManagementService):
    """
    id_p = cc.spawn_process('ingestion_worker', 'ion.services.dm.ingestion.ingestion_management_service', 'IngestionManagementService')
    cc.proc_manager.procs['%s.%s' %(cc.id,id_p)].start()
    """

    base_exchange_name = 'ingestion_queue'

    def __init__(self):
        BaseIngestionManagementService.__init__(self)

        xs_dot_xp = CFG.core_xps.science_data
        try:
            self.XS, xp_base = xs_dot_xp.split('.')
            self.XP = '.'.join([bootstrap.get_sys_name(), xp_base])
        except ValueError:
            raise StandardError(
                'Invalid CFG for core_xps.science_data: "%s"; must have "xs.xp" structure'
                % xs_dot_xp)

        self.serializer = IonObjectSerializer()
        self.process_definition_id = None

    def on_start(self):
        super(IngestionManagementService, self).on_start()
        self.event_publisher = EventPublisher(
            event_type="DatasetIngestionConfigurationEvent")

        res_list, _ = self.clients.resource_registry.find_resources(
            restype=RT.ProcessDefinition,
            name='ingestion_worker_process',
            id_only=True)
        if len(res_list):
            self.process_definition_id = res_list[0]

    def on_quit(self):
        #self.clients.process_dispatcher.delete_process_definition(process_definition_id=self.process_definition_id)
        super(IngestionManagementService, self).on_quit()

    def create_ingestion_configuration(self,
                                       exchange_point_id='',
                                       couch_storage=None,
                                       hdf_storage=None,
                                       number_of_workers=0):
        """
        @brief Setup ingestion workers to ingest all the data from a single exchange point.
        @param exchange_point_id is the resource id for the exchagne point to ingest from
        @param couch_storage is the specification of the couch database to use
        @param hdf_storage is the specification of the filesystem to use for hdf data files
        @param number_of_workers is the number of ingestion workers to create
        """

        if self.process_definition_id is None:
            process_definition = ProcessDefinition(
                name='ingestion_worker_process',
                description='Worker transform process for ingestion of datasets'
            )
            process_definition.executable[
                'module'] = 'ion.processes.data.ingestion.ingestion_worker'
            process_definition.executable['class'] = 'IngestionWorker'
            self.process_definition_id = self.clients.process_dispatcher.create_process_definition(
                process_definition=process_definition)

        # Give each ingestion configuration its own queue name to receive data on
        exchange_name = 'ingestion_queue'

        ##------------------------------------------------------------------------------------
        ## declare our intent to subscribe to all messages on the exchange point
        query = ExchangeQuery()

        subscription_id = self.clients.pubsub_management.create_subscription(query=query,\
            exchange_name=exchange_name, name='Ingestion subscription', description='Subscription for ingestion workers')

        ##------------------------------------------------------------------------------------------

        # create an ingestion_configuration instance and update the registry
        # @todo: right now sending in the exchange_point_id as the name...
        ingestion_configuration = IngestionConfiguration(name=self.XP)
        ingestion_configuration.description = '%s exchange point ingestion configuration' % self.XP
        ingestion_configuration.number_of_workers = number_of_workers

        if hdf_storage is not None:
            ingestion_configuration.hdf_storage.update(hdf_storage)

        if couch_storage is not None:
            ingestion_configuration.couch_storage.update(couch_storage)

        ingestion_configuration_id, _ = self.clients.resource_registry.create(
            ingestion_configuration)

        self._launch_transforms(ingestion_configuration.number_of_workers,
                                subscription_id, ingestion_configuration_id,
                                ingestion_configuration,
                                self.process_definition_id)
        return ingestion_configuration_id

    def _launch_transforms(self, number_of_workers, subscription_id,
                           ingestion_configuration_id, ingestion_configuration,
                           process_definition_id):
        """
        This method spawns the two transform processes without activating them...Note: activating the transforms does the binding
        """

        description = 'Ingestion worker'

        configuration = self.serializer.serialize(ingestion_configuration)
        configuration.pop('type_')
        configuration['configuration_id'] = ingestion_configuration_id

        # launch the transforms
        for i in xrange(number_of_workers):
            name = '(%s)_Ingestion_Worker_%s' % (ingestion_configuration_id,
                                                 i + 1)
            transform_id = self.clients.transform_management.create_transform(
                name=name,
                description=description,
                in_subscription_id=subscription_id,
                out_streams={},
                process_definition_id=process_definition_id,
                configuration=ingestion_configuration)

            # create association between ingestion configuration and the transforms that act as Ingestion Workers
            if not transform_id:
                raise IngestionManagementServiceException(
                    'Transform could not be launched by ingestion.')
            self.clients.resource_registry.create_association(
                ingestion_configuration_id, PRED.hasTransform, transform_id)

    def update_ingestion_configuration(self, ingestion_configuration=None):
        """Change the number of workers or the default policy for ingesting data on each stream

        @param ingestion_configuration    IngestionConfiguration
        """
        log.debug("Updating ingestion configuration")
        id, rev = self.clients.resource_registry.update(
            ingestion_configuration)

    def read_ingestion_configuration(self, ingestion_configuration_id=''):
        """Get an existing ingestion configuration object.

        @param ingestion_configuration_id    str
        @retval ingestion_configuration    IngestionConfiguration
        @throws NotFound    if ingestion configuration did not exist
        """
        log.debug("Reading ingestion configuration object id: %s",
                  ingestion_configuration_id)
        ingestion_configuration = self.clients.resource_registry.read(
            ingestion_configuration_id)
        if ingestion_configuration is None:
            raise NotFound("Ingestion configuration %s does not exist" %
                           ingestion_configuration_id)
        return ingestion_configuration

    def delete_ingestion_configuration(self, ingestion_configuration_id=''):
        """Delete an existing ingestion configuration object.

        @param ingestion_configuration_id    str
        @throws NotFound    if ingestion configuration did not exist
        """
        log.debug("Deleting ingestion configuration: %s",
                  ingestion_configuration_id)

        #ingestion_configuration = self.read_ingestion_configuration(ingestion_configuration_id)
        #@todo Should we check to see if the ingestion configuration exists?

        #delete the transforms associated with the ingestion_configuration_id
        transform_ids = self.clients.resource_registry.find_objects(
            ingestion_configuration_id, PRED.hasTransform, RT.Transform, True)

        if len(transform_ids) < 1:
            raise NotFound(
                'No transforms associated with this ingestion configuration!')

        log.debug('len(transform_ids): %s' % len(transform_ids))

        for transform_id in transform_ids:
            # To Delete - we need to actually remove each of the transforms
            self.clients.transform_management.delete_transform(transform_id)

        # delete the associations too...
        associations = self.clients.resource_registry.find_associations(
            ingestion_configuration_id, PRED.hasTransform)
        log.info('associations: %s' % associations)
        for association in associations:
            self.clients.resource_registry.delete_association(association)
            #@todo How should we deal with failure?

        self.clients.resource_registry.delete(ingestion_configuration_id)

    def activate_ingestion_configuration(self, ingestion_configuration_id=''):
        """Activate an ingestion configuration and the transform processes that execute it

        @param ingestion_configuration_id    str
        @throws NotFound    The ingestion configuration id did not exist
        """

        log.debug("Activating ingestion configuration")

        # check whether the ingestion configuration object exists
        #ingestion_configuration = self.read_ingestion_configuration(ingestion_configuration_id)
        #@todo Should we check to see if the ingestion configuration exists?

        # read the transforms
        transform_ids, _ = self.clients.resource_registry.find_objects(
            ingestion_configuration_id, PRED.hasTransform, RT.Transform, True)
        if len(transform_ids) < 1:
            raise NotFound('The ingestion configuration %s does not exist' %
                           str(ingestion_configuration_id))

        # since all ingestion worker transforms have the same subscription, only deactivate one
        self.clients.transform_management.activate_transform(transform_ids[0])

        return True

    def deactivate_ingestion_configuration(self,
                                           ingestion_configuration_id=''):
        """Deactivate one of the transform processes that uses an ingestion configuration

        @param ingestion_configuration_id    str
        @throws NotFound    The ingestion configuration id did not exist
        """
        log.debug("Deactivating ingestion configuration")

        # check whether the ingestion configuration object exists
        #ingestion_configuration = self.read_ingestion_configuration(ingestion_configuration_id)
        #@todo Should we check to see if the ingestion configuration exists?

        # use the deactivate method in transformation management service
        transform_ids, _ = self.clients.resource_registry.find_objects(
            ingestion_configuration_id, PRED.hasTransform, RT.Transform, True)
        if len(transform_ids) < 1:
            raise NotFound('The ingestion configuration %s does not exist' %
                           str(ingestion_configuration_id))

        # since all ingestion worker transforms have the same subscription, only deactivate one
        self.clients.transform_management.deactivate_transform(
            transform_ids[0])

        return True

    def create_dataset_configuration(self,
                                     dataset_id='',
                                     archive_data=True,
                                     archive_metadata=True,
                                     ingestion_configuration_id=''):
        """Create a configuration for ingestion of a particular dataset and associate it to a ingestion configuration.

        @param dataset_id    str
        @param archive_data    bool
        @param archive_metadata    bool
        @param ingestion_configuration_id    str
        @retval dataset_ingestion_configuration_id    str
        """

        if not dataset_id:
            raise IngestionManagementServiceException(
                'Must pass a dataset id to create_dataset_configuration')

        log.debug("Creating dataset configuration")

        dataset = self.clients.dataset_management.read_dataset(
            dataset_id=dataset_id)

        stream_id = dataset.primary_view_key

        # Read the stream to get the stream definition
        #stream = self.clients.pubsub_management.read_stream(stream_id=stream_id)

        # Get the associated stream definition!
        stream_defs, _ = self.clients.resource_registry.find_objects(
            stream_id, PRED.hasStreamDefinition)

        if len(stream_defs) != 1:
            raise IngestionManagementServiceException(
                'The stream is associated with more than one stream definition!'
            )

        stream_def_resource = stream_defs[0]
        # Get the container object out of the stream def resource and set the stream id field in the local instance
        stream_def_container = stream_def_resource.container
        stream_def_container.stream_resource_id = stream_id

        # Get the ingestion configuration
        ingestion_configuration = self.clients.resource_registry.read(
            ingestion_configuration_id)
        couch_storage = ingestion_configuration.couch_storage

        log.info(
            'Adding stream definition for stream "%s" to ingestion database "%s"'
            % (stream_id, couch_storage.datastore_name))
        db = self.container.datastore_manager.get_datastore(
            ds_name=couch_storage.datastore_name, config=self.CFG)

        # put it in couch db!
        db.create(stream_def_container)
        db.close()

        #@todo Add business logic to create the right kind of dataset ingestion configuration
        config = DatasetIngestionByStream(archive_data=archive_data,
                                          archive_metadata=archive_metadata,
                                          stream_id=stream_id,
                                          dataset_id=dataset_id)

        dset_ingest_config = DatasetIngestionConfiguration(
            name='Dataset config %s' % dataset_id,
            description='configuration for dataset %s' % dataset_id,
            configuration=config,
            type=DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM)

        dset_ingest_config_id, _ = self.clients.resource_registry.create(
            dset_ingest_config)

        self.clients.resource_registry.create_association(
            dset_ingest_config_id, PRED.hasIngestionConfiguration,
            ingestion_configuration_id)

        self.clients.resource_registry.create_association(
            dataset_id, PRED.hasIngestionConfiguration,
            ingestion_configuration_id)

        self.event_publisher.publish_event(
            origin=
            ingestion_configuration_id,  # Use the ingestion configuration ID as the origin!
            description=dset_ingest_config.description,
            configuration=config,
            type=DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM,
            resource_id=dset_ingest_config_id)

        return dset_ingest_config_id

    def update_dataset_config(self, dataset_ingestion_configuration=None):
        """Update the ingestion configuration for a dataset

        @param dataset_ingestion_configuration    DatasetIngestionConfiguration
        """

        #@todo - make it an exception to change the dataset_id or the stream_id in the dataset config!

        log.info('dataset configuration to update: %s' %
                 dataset_ingestion_configuration)

        log.debug("Updating dataset config")
        dset_ingest_config_id, rev = self.clients.resource_registry.update(
            dataset_ingestion_configuration)

        ingest_config_ids, _ = self.clients.resource_registry.find_objects(
            dset_ingest_config_id,
            PRED.hasIngestionConfiguration,
            id_only=True)

        if len(ingest_config_ids) != 1:
            raise IngestionManagementServiceException(
                'The dataset ingestion configuration is associated with more than one ingestion configuration!'
            )

        ingest_config_id = ingest_config_ids[0]

        #@todo - what is it okay to update?
        self.event_publisher.publish_event(
            origin=ingest_config_id,
            description=dataset_ingestion_configuration.description,
            configuration=dataset_ingestion_configuration.configuration,
            type=DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM,
            resource_id=dset_ingest_config_id)

    def read_dataset_config(self, dataset_ingestion_configuration_id=''):
        """Get an existing dataset configuration.

        @param dataset_ingestion_configuration_id    str
        @retval dataset_ingestion_configuration    DatasetIngestionConfiguration
        @throws NotFound    if ingestion configuration did not exist
        """

        log.debug("Reading dataset configuration")
        dataset_ingestion_configuration = self.clients.resource_registry.read(
            dataset_ingestion_configuration_id)

        return dataset_ingestion_configuration

    def delete_dataset_config(self, dataset_ingestion_configuration_id=''):
        """Delete an existing dataset configuration.

        @param dataset_ingestion_configuration_id    str
        @throws NotFound    if ingestion configuration did not exist
        """

        dataset_ingestion_configuration = self.clients.resource_registry.read(
            dataset_ingestion_configuration_id)

        log.debug("Deleting dataset configuration")
        self.clients.resource_registry.delete(
            dataset_ingestion_configuration_id)

        ingest_config_ids, association_ids = self.clients.resource_registry.find_objects(
            dataset_ingestion_configuration_id,
            PRED.hasIngestionConfiguration,
            id_only=True)

        if len(ingest_config_ids) != 1:
            raise IngestionManagementServiceException(
                'The dataset ingestion configuration is associated with more than one ingestion configuration!'
            )

        ingest_config_id = ingest_config_ids[0]

        self.clients.resource_registry.delete_association(
            association=association_ids[0])

        self.event_publisher.publish_event(
            origin=ingest_config_id,
            configuration=dataset_ingestion_configuration.configuration,
            type=DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM,
            resource_id=dataset_ingestion_configuration_id,
            deleted=True)
Example #53
0
    def test_perf(self):
        _io_serializer = IonObjectSerializer()
        _io_deserializer = IonObjectDeserializer(
            obj_registry=get_obj_registry())

        def time_serialize(test_obj, name="?", has_ion=False):
            with time_it(name + ", serialize"):
                os = _io_serializer.serialize(test_obj)

            with time_it(name + ", deserialize"):
                os2 = _io_deserializer.deserialize(os)

            count_objs(os)

            if has_ion:
                test_obj = os

            with time_it(name + ", json.dumps"):
                oj = json.dumps(test_obj)

            with time_it(name + ", json.loads"):
                o2 = json.loads(oj)
            log.info("  len(json): %s", len(oj))

            with time_it(name + ", simplejson.dumps"):
                oj = simplejson.dumps(test_obj)

            with time_it(name + ", simplejson.loads"):
                o2 = simplejson.loads(oj)
            log.info("  len(simplejson): %s", len(oj))

            with time_it(name + ", msgpack.packb"):
                o1 = msgpack.packb(test_obj)

            with time_it(name + ", msgpack.unpackb"):
                o2 = msgpack.unpackb(o1, use_list=1)
            log.info("  len(msgpack): %s", len(o1))

            # with time_it(name + ", pickle.dumps"):
            #     op = pickle.dumps(test_obj)
            #
            # with time_it(name + ", pickle.loads"):
            #     o2 = pickle.loads(op)
            # log.info("  len(pickle): %s", len(op))
            #
            # with time_it(name + ", cPickle.dumps"):
            #     op = cPickle.dumps(test_obj)
            #
            # with time_it(name + ", cPickle.loads"):
            #     o2 = cPickle.loads(op)
            # log.info("  len(cPickle): %s", len(op))

            log.info("----------------")

        # Large nested
        with time_it("highly nested dict/list, create"):
            test_obj = create_test_object(4,
                                          4,
                                          do_list=False,
                                          uvals=True,
                                          ukeys=True)

        time_serialize(test_obj, "highly nested dict/list")

        # Nested
        with time_it("nested dict/list, create"):
            test_obj = create_test_object(3,
                                          40,
                                          do_list=True,
                                          uvals=False,
                                          ukeys=False)

        time_serialize(test_obj, "nested dict/list")

        # Large string
        #value = ''.join(random.choice(allowed_chars) for x in xrange(1460000))
        value = ''.join(random.choice(allowed_chars) for x in xrange(500000))

        time_serialize(value, "long string")

        # ION
        with time_it("create ion"):
            test_obj1 = create_test_object(2,
                                           200,
                                           do_ion=True,
                                           do_list=False,
                                           do_dict=True,
                                           obj_validate=False)

        count_objs(test_obj1)
        time_serialize(test_obj1, "dict of ion nested", has_ion=True)

        from pyon.core.interceptor.interceptor import Invocation
        from pyon.core.interceptor.encode import EncodeInterceptor
        encode = EncodeInterceptor()
        invocation = Invocation()
        invocation.message = test_obj1

        with time_it("ion object, encode"):
            encode.outgoing(invocation)

        with time_it("ion object, decode"):
            encode.incoming(invocation)

        count_objs(invocation.message)

        # ION
        with time_it("create ion unicode"):
            test_obj1 = create_test_object(2,
                                           200,
                                           do_ion=True,
                                           do_list=False,
                                           do_dict=True,
                                           obj_validate=False,
                                           uvals=True,
                                           ukeys=True)

        count_objs(test_obj1)
        time_serialize(test_obj1, "dict of ion nested unicode", has_ion=True)

        # Create objects with validation on
        with time_it("create ion calidated"):
            test_obj1 = create_test_object(2,
                                           200,
                                           do_ion=True,
                                           do_list=False,
                                           do_dict=True,
                                           obj_validate=True)

        count_objs(test_obj1)
        time_serialize(test_obj1, "dict of ion nested validated", has_ion=True)
    def _generate_stream_config(self):
        log.debug("_generate_stream_config for %s",
                  self.agent_instance_obj.name)
        dsm = self.clients.dataset_management
        psm = self.clients.pubsub_management

        agent_obj = self._get_agent()
        device_obj = self._get_device()

        streams_dict = {}
        for stream_cfg in agent_obj.stream_configurations:
            #create a stream def for each param dict to match against the existing data products
            streams_dict[stream_cfg.stream_name] = {
                'param_dict_name': stream_cfg.parameter_dictionary_name
            }

        #retrieve the output products
        # TODO: What about platforms? other things?
        device_id = device_obj._id
        data_product_objs = self.RR2.find_data_products_of_instrument_device_using_has_output_product(
            device_id)

        stream_config = {}
        for d in data_product_objs:
            stream_def_id = self.RR2.find_stream_definition_id_of_data_product_using_has_stream_definition(
                d._id)
            for stream_name, stream_info_dict in streams_dict.items():
                # read objects from cache to be compared
                pdict = self.RR2.find_resource_by_name(
                    RT.ParameterDictionary,
                    stream_info_dict.get('param_dict_name'))
                stream_def_id = self._find_streamdef_for_dp_and_pdict(
                    d._id, pdict._id)

                if stream_def_id:
                    #model_param_dict = self.RR2.find_resources_by_name(RT.ParameterDictionary,
                    #                                         stream_info_dict.get('param_dict_name'))[0]
                    #model_param_dict = self._get_param_dict_by_name(stream_info_dict.get('param_dict_name'))
                    #stream_route = self.RR2.read(product_stream_id).stream_route
                    product_stream_id = self.RR2.find_stream_id_of_data_product_using_has_stream(
                        d._id)
                    stream_def = psm.read_stream_definition(stream_def_id)
                    stream_route = psm.read_stream_route(
                        stream_id=product_stream_id)

                    from pyon.core.object import IonObjectSerializer
                    stream_def_dict = IonObjectSerializer().serialize(
                        stream_def)
                    stream_def_dict.pop('type_')

                    if stream_name in stream_config:
                        log.warn("Overwriting stream_config[%s]", stream_name)

                    stream_config[stream_name] = {
                        'routing_key': stream_route.
                        routing_key,  # TODO: Serialize stream_route together
                        'stream_id': product_stream_id,
                        'stream_definition_ref': stream_def_id,
                        'stream_def_dict': stream_def_dict,
                        'exchange_point': stream_route.exchange_point,
                        # TODO: This is redundant and very large - the param dict is in the stream_def_dict ???
                        'parameter_dictionary':
                        stream_def.parameter_dictionary,
                    }

        log.debug("Stream config generated")
        log.trace("generate_stream_config: %s", stream_config)
        return stream_config