def test_complex_version(self): io_serializer = IonObjectSerializer() obj = IonObject('SampleComplexEvent', {'num': 9, 'other_field': 'test value'}) obj_dict = io_serializer.serialize(obj,True) self.assertEquals(obj_dict['persisted_version'], 1) # simulate a previous version data of SampleComplexEvent_V2 obj_dict['type_'] = 'SampleComplexEvent_V2' # verify that the simulated previous version data has resource self.assertEquals('resource' in obj_dict, True) # verify that the simulated previous version data does not have new_attribute self.assertEquals('new_resource' in obj_dict, False) # simulate reading the previous version that does not have new_attribute io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry()) obj = io_deserializer.deserialize(obj_dict) # verify that new attribute is added and initialized with default value self.assertEquals(obj.new_resource.new_attribute['key'], 'value') # verify that old attributes are still there self.assertEquals(obj.num, 9) # verify that old attributes are still there self.assertEquals(obj.other_field, 'test value') # verify that on read version is not yet updated self.assertEquals(obj_dict['persisted_version'], 1) # simulate create/update obj_dict = io_serializer.serialize(obj,True) # verify that version is updated self.assertEquals(obj_dict['persisted_version'], 2)
def test_persisted_version(self): # create an initial version of SampleResource io_serializer = IonObjectSerializer() obj = IonObject('SampleResource', {'num': 9, 'other_field': 'test value'}) obj_dict = io_serializer.serialize(obj,True) self.assertEquals(obj_dict['persisted_version'], 1) # verify that the simulated previous version does not have new_attribute self.assertEquals('new_attribute' in obj_dict, False) # simulate version increment to SampleResource that adds new_attribute obj_dict['type_'] = 'SampleResource_V2' # simulate reading the previous version after version increment io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry()) obj = io_deserializer.deserialize(obj_dict) # verify that “new_attribute” is added and initialized with default value self.assertEquals(obj.new_attribute['key'], 'value') # verify that old attributes are still there and retain values self.assertEquals(obj.num, 9) # verify that old attributes are still there and retain values self.assertEquals(obj.other_field, 'test value') # verify that persisted_version is not updated at read self.assertEquals(obj_dict['persisted_version'], 1) # simulate update obj_dict = io_serializer.serialize(obj,True) # verify that version is updated self.assertEquals(obj_dict['persisted_version'], 2)
def test_complex_version_del_attrib(self): io_serializer = IonObjectSerializer() # verify that extraneous fields given while creating an IonObject raises an error. with self.assertRaises(AttributeError): IonObject('SampleComplexEvent_V2', {'num': 9, 'other_field': 'test value','more_new_resource': {'key':'value'}}) obj = IonObject('SampleComplexEvent_V2', {'num': 9, 'other_field': 'test value','new_resource': {'num': 9, 'other_field': 'test value','new_attribute':{'key':'value'}}}) # create simulated saved data obj_dict = io_serializer.serialize(obj,True) self.assertEquals(obj_dict['persisted_version'], 2) # simulate a next version data of SampleComplexEvent_V2 obj_dict['type_'] = 'SampleComplexEvent_V3' # verify that the simulated previous version data does have new_resource self.assertEquals('new_resource' in obj_dict, True) # note the schema version of new_resource self.assertEquals(obj_dict['new_resource']['persisted_version'], 2) # simulate reading the next version that has a new type of new_resource io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry()) obj = io_deserializer.deserialize(obj_dict) # verify that new_resource exists self.assertTrue('new_resource' in obj) # however, verify that new_resource does not have new_attribute since type of new_resource has changed self.assertFalse('new_attribute' in obj.new_resource) # verify that the new type of new_resource has another_new_attribute that is initialized to default data self.assertEquals(obj.new_resource.another_new_attribute['key'], 'new_value') # verify on read that the schema version of new_resource replaces the old persisted_version self.assertEquals(obj.new_resource.persisted_version, 3) # verify that old attributes values of new_resource have been thrown away self.assertNotEquals(obj.new_resource.num, 9) # verify that attributes values of new_resource have been initialized to default values self.assertEquals(obj.new_resource.num, 0) # However, verify that old attributes of the resource (SampleComplexEvent) are still there self.assertEquals(obj.num, 9) # verify that old attributes are still there self.assertEquals(obj.other_field, 'test value') # verify that on read, version is not yet updated self.assertEquals(obj.persisted_version, 2) # simulate create/update obj_dict = io_serializer.serialize(obj,True) # verify that version is updated self.assertEquals(obj_dict['persisted_version'], 3) # verify that version is updated fo the subsumed object self.assertEquals(obj_dict['new_resource']['persisted_version'], 3)
def test_attribute_version(self): io_serializer = IonObjectSerializer() # verify that extraneous fields given while creating an IonObject raises an error. with self.assertRaises(AttributeError): IonObject('SampleComplexEvent_V2', {'num': 9, 'other_field': 'test value','more_new_resource': {'key':'value'}}) obj = IonObject('SampleComplexEvent_V2', {'num': 9, 'other_field': 'test value','new_resource': {'num': 9, 'other_field': 'test value','new_attribute':{'key':'value'}}}) obj_dict = io_serializer.serialize(obj,True) self.assertEquals(obj_dict['persisted_version'], 2) # verify that the simulated previous version data does have new_resource self.assertEquals('new_resource' in obj_dict, True) # verify that the new_resource has type SampleResource_V2 self.assertEquals(obj_dict['new_resource']['type_'],"SampleResource_V2") # set type to SampleComplexEvent_V3 obj_dict['type_']="SampleComplexEvent_V3" obj_dict['persisted_version']=3 # set new_resource's type to SampleResource_V3 # so we pretend that version, not the type, of the attribute has been changed obj_dict['new_resource']['type_']="SampleResource_V3" # simulate reading SampleComplexEvent_V3 after a new version of new_resource has been introduced io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry()) obj = io_deserializer.deserialize(obj_dict) # verify that new resource is not deleted self.assertTrue('new_resource' in obj) # verify that new resource does not have new_attribute self.assertFalse('new_attribute' in obj.new_resource) # verify that the next version of new_resource has default data in the another_new_attribute self.assertEquals(obj.new_resource.another_new_attribute['key'], 'new_value') # verify that old attributes values of new_resource have not been thrown away self.assertEquals(obj.new_resource.num, 9) # verify that values from old attributes of SampleComplexEvent_V2 are still there self.assertEquals(obj.num, 9) self.assertEquals(obj.other_field, 'test value') # verify that on read version is not yet updated for the subsumed object self.assertEquals(obj.new_resource.persisted_version, 2) # simulate create/update obj_dict = io_serializer.serialize(obj,True) # verify that versions are unchanged self.assertEquals(obj_dict['persisted_version'], 3) # verify that versions are updated in the subsumed object self.assertEquals(obj_dict['new_resource']['persisted_version'], 3)
def _build_stream_config(self): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) dataset_management = DatasetManagementServiceClient() encoder = IonObjectSerializer() # Create streams and subscriptions for each stream named in driver. self._stream_config = {} stream_name = 'parsed' param_dict_name = 'ctd_parsed_param_dict' pd_id = dataset_management.read_parameter_dictionary_by_name( param_dict_name, id_only=True) stream_def_id = pubsub_client.create_stream_definition( name=stream_name, parameter_dictionary_id=pd_id) stream_def = pubsub_client.read_stream_definition(stream_def_id) stream_def_dict = encoder.serialize(stream_def) pd = stream_def.parameter_dictionary stream_id, stream_route = pubsub_client.create_stream( name=stream_name, exchange_point='science_data', stream_definition_id=stream_def_id) stream_config = dict(routing_key=stream_route.routing_key, exchange_point=stream_route.exchange_point, stream_id=stream_id, parameter_dictionary=pd, stream_def_dict=stream_def_dict) self._stream_config[stream_name] = stream_config stream_name = 'raw' param_dict_name = 'ctd_raw_param_dict' pd_id = dataset_management.read_parameter_dictionary_by_name( param_dict_name, id_only=True) stream_def_id = pubsub_client.create_stream_definition( name=stream_name, parameter_dictionary_id=pd_id) stream_def = pubsub_client.read_stream_definition(stream_def_id) stream_def_dict = encoder.serialize(stream_def) pd = stream_def.parameter_dictionary stream_id, stream_route = pubsub_client.create_stream( name=stream_name, exchange_point='science_data', stream_definition_id=stream_def_id) stream_config = dict(routing_key=stream_route.routing_key, exchange_point=stream_route.exchange_point, stream_id=stream_id, parameter_dictionary=pd, stream_def_dict=stream_def_dict) self._stream_config[stream_name] = stream_config
def _build_stream_config(self): """ """ # Create a pubsub client to create streams. pubsub_client = PubsubManagementServiceClient(node=self.container.node) dataset_management = DatasetManagementServiceClient() encoder = IonObjectSerializer() # Create streams and subscriptions for each stream named in driver. self._stream_config = {} stream_name = "parsed" param_dict_name = "ctd_parsed_param_dict" pd_id = dataset_management.read_parameter_dictionary_by_name(param_dict_name, id_only=True) stream_def_id = pubsub_client.create_stream_definition(name=stream_name, parameter_dictionary_id=pd_id) stream_def = pubsub_client.read_stream_definition(stream_def_id) stream_def_dict = encoder.serialize(stream_def) pd = stream_def.parameter_dictionary stream_id, stream_route = pubsub_client.create_stream( name=stream_name, exchange_point="science_data", stream_definition_id=stream_def_id ) stream_config = dict( routing_key=stream_route.routing_key, exchange_point=stream_route.exchange_point, stream_id=stream_id, parameter_dictionary=pd, stream_def_dict=stream_def_dict, ) self._stream_config[stream_name] = stream_config stream_name = "raw" param_dict_name = "ctd_raw_param_dict" pd_id = dataset_management.read_parameter_dictionary_by_name(param_dict_name, id_only=True) stream_def_id = pubsub_client.create_stream_definition(name=stream_name, parameter_dictionary_id=pd_id) stream_def = pubsub_client.read_stream_definition(stream_def_id) stream_def_dict = encoder.serialize(stream_def) pd = stream_def.parameter_dictionary stream_id, stream_route = pubsub_client.create_stream( name=stream_name, exchange_point="science_data", stream_definition_id=stream_def_id ) stream_config = dict( routing_key=stream_route.routing_key, exchange_point=stream_route.exchange_point, stream_id=stream_id, parameter_dictionary=pd, stream_def_dict=stream_def_dict, ) self._stream_config[stream_name] = stream_config
def _process_gateway_request(resource_id, operation, json_request, requester): if requester is not None: json_request["agentRequest"]["requester"] = requester decoder = IonObjectSerializer() decoded_msg = decoder.serialize(json_request) payload = simplejson.dumps(decoded_msg) response = _agent_gateway_request(resource_id + '/' + operation, payload) if response['data'].has_key(GATEWAY_ERROR): log.error(response['data'][GATEWAY_ERROR][GATEWAY_ERROR_MESSAGE]) #raise BadRequest(response['data'][GATEWAY_ERROR][GATEWAY_ERROR_MESSAGE]) ex_cls = response['data'][GATEWAY_ERROR][GATEWAY_ERROR_EXCEPTION] ex_msg = response['data'][GATEWAY_ERROR][GATEWAY_ERROR_MESSAGE] if hasattr(pyex, ex_cls): raise getattr(pyex, ex_cls)(ex_msg) else: raise Exception(ex_msg) try: if "type_" in response['data'][GATEWAY_RESPONSE]: del response['data'][GATEWAY_RESPONSE]["type_"] except Exception, e: pass
class CodecInterceptor(Interceptor): """ Transforms IonObject <-> dict """ def __init__(self): Interceptor.__init__(self) self._io_serializer = IonObjectSerializer() self._io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry()) def outgoing(self, invocation): #log.debug("CodecInterceptor.outgoing: %s", invocation) #log.debug("Payload, pre-transform: %s", invocation.message) invocation.message = self._io_serializer.serialize(invocation.message) #log.debug("Payload, post-transform: %s", invocation.message) return invocation def incoming(self, invocation): #log.debug("CodecInterceptor.incoming: %s", invocation) payload = invocation.message #log.debug("Payload, pre-transform: %s", payload) invocation.message = self._io_deserializer.deserialize(payload) #log.debug("Payload, post-transform: %s", invocation.message) return invocation
class CodecInterceptor(Interceptor): """ Transforms IonObject <-> dict """ def __init__(self): Interceptor.__init__(self) self._io_serializer = IonObjectSerializer() self._io_deserializer = IonObjectDeserializer( obj_registry=get_obj_registry()) def outgoing(self, invocation): log.debug("CodecInterceptor.outgoing: %s", invocation) log.debug("Payload, pre-transform: %s", invocation.message) invocation.message = self._io_serializer.serialize(invocation.message) log.debug("Payload, post-transform: %s", invocation.message) return invocation def incoming(self, invocation): log.debug("CodecInterceptor.incoming: %s", invocation) payload = invocation.message log.debug("Payload, pre-transform: %s", payload) invocation.message = self._io_deserializer.deserialize(payload) log.debug("Payload, post-transform: %s", invocation.message) return invocation
def obj_to_tree(definition): from pyon.core.object import IonObjectSerializer if not isinstance(definition,StreamDefinitionContainer): return serializer = IonObjectSerializer() definition = serializer.serialize(definition) tree = DefinitionTree.traverse(definition,definition['data_stream_id']) return tree
def _serialize_port_assigments(self, port_assignments=None): serializer = IonObjectSerializer() serialized_port_assignments = {} if isinstance(port_assignments, dict): for device_id, platform_port in port_assignments.iteritems(): flatpp = serializer.serialize(platform_port) serialized_port_assignments[device_id] = flatpp return serialized_port_assignments
def obj_to_tree(definition): from pyon.core.object import IonObjectSerializer if not isinstance(definition, StreamDefinitionContainer): return serializer = IonObjectSerializer() definition = serializer.serialize(definition) tree = DefinitionTree.traverse(definition, definition['data_stream_id']) return tree
def size(self): ''' Truly poor way to calculate the size of a granule... returns the size in bytes. ''' granule = self.to_granule() serializer = IonObjectSerializer() flat = serializer.serialize(granule) byte_stream = msgpack.packb(flat, default=encode_ion) return len(byte_stream)
class IonSerializerDictionaryRepresentation(Representation): def __init__(self, id_factory): self.encoder = IonObjectSerializer() self.decoder = IonObjectDeserializer(obj_registry=get_obj_registry()) self.id_factory = id_factory def encode(self, obj, add_id=False): out = self.encoder.serialize(obj) if add_id and '_id' not in out.keys(): out['_id'] = self.id_factory.create_id() return out def decode(self, data): return self.decoder.deserialize(data)
def _process_gateway_request(service_name, operation, json_request, requester): if requester is not None: json_request["serviceRequest"]["requester"] = requester decoder = IonObjectSerializer() decoded_msg = decoder.serialize(json_request) payload = simplejson.dumps(decoded_msg) response = _service_gateway_request(service_name + '/' + operation, payload) return response
def test_version_del_attrib(self): io_serializer = IonObjectSerializer() # verify that extraneous fields given while creating an IonObject raises an error. with self.assertRaises(AttributeError): IonObject('SampleResource_V2', {'num': 9, 'other_field': 'test value','more_new_attribute': {'key':'value'}}) # simulate creating a version 2 of SampleResource that has "new_attribute" obj = IonObject('SampleResource_V2', {'num': 9, 'other_field': 'test value','new_attribute': {'key':'value'}}) obj_dict = io_serializer.serialize(obj,True) # verify that version is 2 self.assertEquals(obj_dict['persisted_version'], 2) # verify that the simulated version 2 data does have new_attribute self.assertEquals('new_attribute' in obj_dict, True) # simulate incrementing to version 3 that does not have "new_attribute" obj_dict['type_'] = 'SampleResource_V3' # simulate reading after version increment to 3 io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry()) obj = io_deserializer.deserialize(obj_dict) # verify that new attribute is deleted self.assertFalse('new_attribute' in obj) # verify that the simulated next version data does have more_new_attribute self.assertEquals(obj.another_new_attribute['key'], 'new_value') # verify that old attributes are still there and retain their data self.assertEquals(obj.num, 9) # verify that old attributes are still there and retain their data self.assertEquals(obj.other_field, 'test value') # verify that persisted_version is not yet updated i.e. it is still 2 self.assertEquals(obj_dict['persisted_version'], 2) # simulate update obj_dict = io_serializer.serialize(obj,True) # verify that version is updated self.assertEquals(obj_dict['persisted_version'], 3)
def test_event_version_del_attrib(self): io_serializer = IonObjectSerializer() # verify that extraneous fields given while creating an IonObject raises an error. with self.assertRaises(AttributeError): IonObject('SampleEvent_V2', {'num': 9, 'other_field': 'test value','more_new_attribute': {'key':'value'}}) obj = IonObject('SampleEvent_V2', {'num': 9, 'other_field': 'test value','new_attribute': {'key':'value'}}) obj_dict = io_serializer.serialize(obj,True) self.assertEquals(obj_dict['persisted_version'], 2) # simulate a next version data of SampleEvent_V2 obj_dict['type_'] = 'SampleEvent_V3' # verify that the simulated previous version data does have new_attribute self.assertEquals('new_attribute' in obj_dict, True) # simulate reading the next version that does not have new_attribute io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry()) obj = io_deserializer.deserialize(obj_dict) # verify that new attribute is deleted self.assertFalse('new_attribute' in obj) # verify that the simulated next version data does have more_new_attribute self.assertEquals(obj.another_new_attribute['key'], 'new_value') # verify that old attributes are still there self.assertEquals(obj.num, 9) # verify that old attributes are still there self.assertEquals(obj.other_field, 'test value') # verify that on read version is not yet updated self.assertEquals(obj_dict['persisted_version'], 2) # simulate create/update obj_dict = io_serializer.serialize(obj,True) # verify that version is updated self.assertEquals(obj_dict['persisted_version'], 3)
class CodecInterceptor(Interceptor): """ Transforms IonObject <-> dict """ def __init__(self): Interceptor.__init__(self) self._io_serializer = IonObjectSerializer() self._io_deserializer = IonObjectDeserializer(obj_registry=obj_registry) def outgoing(self, invocation): log.debug("CodecInterceptor.outgoing: %s", invocation) log.debug("Payload, pre-transform: %s", invocation.message) invocation.message = self._io_serializer.serialize(invocation.message) log.debug("Payload, post-transform: %s", invocation.message) return invocation def incoming(self, invocation): log.debug("CodecInterceptor.incoming: %s", invocation) payload = invocation.message log.debug("Payload, pre-transform: %s", payload) # Horrible, hacky workaround for msgpack issue # See http://jira.msgpack.org/browse/MSGPACK-15 #@todo replace this with use_list in msgpack.unpackb !!! def convert_tuples_to_lists(obj): if isinstance(obj, tuple): res = list(obj) return res return obj payload = walk(payload, convert_tuples_to_lists) invocation.message = self._io_deserializer.deserialize(payload) log.debug("Payload, post-transform: %s", invocation.message) return invocation
class PostgresPyonDataStore(PostgresDataStore): """ Base class common to both CouchDB and Couchbase datastores. """ def __init__(self, datastore_name=None, config=None, scope=None, profile=None): """ @param datastore_name Name of datastore within server. May be scoped to sysname @param config A server config dict with connection params @param scope Prefix for the datastore name (e.g. sysname) to separate multiple systems """ PostgresDataStore.__init__(self, datastore_name=datastore_name, config=config or CFG.get_safe("server.postgresql"), profile=profile or DataStore.DS_PROFILE.BASIC, scope=scope) # IonObject Serializers self._io_serializer = IonObjectSerializer() self._io_deserializer = IonObjectDeserializer( obj_registry=get_obj_registry()) # ------------------------------------------------------------------------- # Couch document operations def create(self, obj, object_id=None, attachments=None, datastore_name=""): """ Converts ion objects to python dictionary before persisting them using the optional suggested identifier and creates attachments to the object. Returns an identifier and revision number of the object """ if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.create_doc(self._ion_object_to_persistence_dict(obj), object_id=object_id, datastore_name=datastore_name, attachments=attachments) def create_mult(self, objects, object_ids=None, allow_ids=None): if any([not isinstance(obj, IonObjectBase) for obj in objects]): raise BadRequest("Obj param is not instance of IonObjectBase") return self.create_doc_mult( [self._ion_object_to_persistence_dict(obj) for obj in objects], object_ids) def update(self, obj, datastore_name=""): if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.update_doc(self._ion_object_to_persistence_dict(obj)) def update_mult(self, objects): if any([not isinstance(obj, IonObjectBase) for obj in objects]): raise BadRequest("Obj param is not instance of IonObjectBase") return self.update_doc_mult( [self._ion_object_to_persistence_dict(obj) for obj in objects]) def read(self, object_id, rev_id="", datastore_name="", object_type=None): if not isinstance(object_id, str): raise BadRequest("Object id param is not string") doc = self.read_doc(object_id, rev_id, datastore_name=datastore_name, object_type=object_type) obj = self._persistence_dict_to_ion_object(doc) return obj def read_mult(self, object_ids, datastore_name="", strict=True): if any([not isinstance(object_id, str) for object_id in object_ids]): raise BadRequest("Object ids are not string: %s" % str(object_ids)) docs = self.read_doc_mult(object_ids, datastore_name, strict=strict) obj_list = [ self._persistence_dict_to_ion_object(doc) if doc is not None else None for doc in docs ] return obj_list def delete(self, obj, datastore_name="", object_type=None): if not isinstance(obj, IonObjectBase) and not isinstance(obj, str): raise BadRequest( "Obj param is not instance of IonObjectBase or string id") if type(obj) is str: self.delete_doc(obj, datastore_name=datastore_name, object_type=object_type) else: if '_id' not in obj: raise BadRequest("Doc must have '_id'") if '_rev' not in obj: raise BadRequest("Doc must have '_rev'") self.delete_doc(self._ion_object_to_persistence_dict(obj), datastore_name=datastore_name, object_type=object_type) def delete_mult(self, object_ids, datastore_name=None): return self.delete_doc_mult(object_ids, datastore_name) # ------------------------------------------------------------------------- # View operations def find_objects_mult(self, subjects, id_only=False, predicate=None, access_args=None): """ Returns a list of associations for a given list of subjects """ # TODO: Port this implementation to Postgres single query res_list = [[], []] if not subjects: return res_list for sub in subjects: res_ids, res_assocs = self.find_objects(subject=sub, id_only=id_only, predicate=predicate, access_args=access_args) res_list[0].extend(res_ids) res_list[1].extend(res_assocs) return res_list def find_subjects_mult(self, objects, id_only=False, predicate=None, access_args=None): """ Returns a list of associations for a given list of objects """ # TODO: Port this implementation to Postgres single query res_list = [[], []] if not objects: return res_list for obj in objects: res_ids, res_assocs = self.find_subjects(obj=obj, id_only=id_only, predicate=predicate, access_args=access_args) res_list[0].extend(res_ids) res_list[1].extend(res_assocs) return res_list def find_objects(self, subject, predicate=None, object_type=None, id_only=False, access_args=None, **kwargs): #log.debug("find_objects(subject=%s, predicate=%s, object_type=%s, id_only=%s", subject, predicate, object_type, id_only) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if not subject: raise BadRequest("Must provide subject") if object_type and not predicate: raise BadRequest("Cannot provide object type without a predicate") if type(subject) is str: subject_id = subject else: if "_id" not in subject: raise BadRequest("Object id not available in subject") else: subject_id = subject._id qual_ds_name = self._get_datastore_name() assoc_table_name = qual_ds_name + "_assoc" table_names = dict(ds=qual_ds_name, dsa=assoc_table_name) view_args = self._get_view_args(kwargs, access_args) if id_only: #query = "SELECT o, doc FROM %(dsa)s WHERE retired<>true " % table_names query = "SELECT %(dsa)s.o, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.o=%(ds)s.id " % table_names else: query = "SELECT %(ds)s.doc, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.o=%(ds)s.id " % table_names query_args = dict(s=subject_id, ot=object_type, p=predicate) query_clause = "AND s=%(s)s" if predicate: query_clause += " AND p=%(p)s" if object_type: query_clause += " AND ot=%(ot)s" query_clause = self._add_access_filter(access_args, qual_ds_name, query_clause, query_args) extra_clause = view_args.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() obj_assocs = [ self._persistence_dict_to_ion_object(row[-1]) for row in rows ] #log.debug("find_objects() found %s objects", len(obj_assocs)) if id_only: res_ids = [self._prep_id(row[0]) for row in rows] return res_ids, obj_assocs else: res_objs = [ self._persistence_dict_to_ion_object(row[0]) for row in rows ] return res_objs, obj_assocs def find_subjects(self, subject_type=None, predicate=None, obj=None, id_only=False, access_args=None, **kwargs): #log.debug("find_subjects(subject_type=%s, predicate=%s, object=%s, id_only=%s", subject_type, predicate, obj, id_only) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if not obj: raise BadRequest("Must provide object") if subject_type and not predicate: raise BadRequest("Cannot provide subject type without a predicate") if type(obj) is str: object_id = obj else: if "_id" not in obj: raise BadRequest("Object id not available in object") else: object_id = obj._id qual_ds_name = self._get_datastore_name() assoc_table_name = qual_ds_name + "_assoc" table_names = dict(ds=qual_ds_name, dsa=assoc_table_name) view_args = self._get_view_args(kwargs, access_args) if id_only: #query = "SELECT s, doc FROM %(dsa)s WHERE retired<>true " % table_names query = "SELECT %(dsa)s.s, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.s=%(ds)s.id " % table_names else: query = "SELECT %(ds)s.doc, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.s=%(ds)s.id " % table_names query_args = dict(o=object_id, st=subject_type, p=predicate) query_clause = "AND o=%(o)s" if predicate: query_clause += " AND p=%(p)s" if subject_type: query_clause += " AND st=%(st)s" query_clause = self._add_access_filter(access_args, qual_ds_name, query_clause, query_args) extra_clause = view_args.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() obj_assocs = [ self._persistence_dict_to_ion_object(row[-1]) for row in rows ] #log.debug("find_subjects() found %s subjects", len(obj_assocs)) if id_only: res_ids = [self._prep_id(row[0]) for row in rows] return res_ids, obj_assocs else: res_objs = [ self._persistence_dict_to_ion_object(row[0]) for row in rows ] return res_objs, obj_assocs def find_associations(self, subject=None, predicate=None, obj=None, assoc_type=None, id_only=True, anyside=None, query=None, **kwargs): if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if not (subject or obj or predicate or anyside or query): raise BadRequest("Illegal parameters: No S/P/O or anyside") if anyside and (subject or obj): raise BadRequest( "Illegal parameters: anyside cannot be combined with S/O") if anyside and predicate and type(anyside) in (list, tuple): raise BadRequest( "Illegal parameters: anyside list cannot be combined with P") if query: query["query_args"]["id_only"] = id_only query["query_args"]["ds_sub"] = "assoc" # TODO: filter out retired return self.find_by_query(query) subject_id, object_id, anyside_ids = None, None, None if subject: if type(subject) is str: subject_id = subject else: if "_id" not in subject: raise BadRequest("Object id not available in subject") else: subject_id = subject._id if obj: if type(obj) is str: object_id = obj else: if "_id" not in obj: raise BadRequest("Object id not available in object") else: object_id = obj._id if anyside: if type(anyside) is str: anyside_ids = [anyside] elif type(anyside) in (list, tuple): if not all([type(o) in (str, list, tuple) for o in anyside]): raise BadRequest( "List of object ids or (object id, predicate) expected" ) anyside_ids = anyside else: if "_id" not in anyside: raise BadRequest("Object id not available in anyside") else: anyside_ids = [anyside._id] #log.debug("find_associations(subject=%s, predicate=%s, object=%s, anyside=%s)", subject_id, predicate, object_id, anyside_ids) qual_ds_name = self._get_datastore_name() table = qual_ds_name + "_assoc" view_args = self._get_view_args(kwargs) if id_only: query = "SELECT id FROM " + table else: query = "SELECT id, doc, s, st, p, o, ot FROM " + table query_clause = " WHERE retired<>true AND " query_args = dict(s=subject_id, o=object_id, p=predicate) if subject and obj: query_clause += "s=%(s)s AND o=%(o)s" if predicate: query_clause += " AND p=%(p)s" elif subject: query_clause += "s=%(s)s" if predicate: query_clause += " AND p=%(p)s" elif obj: query_clause += "o=%(o)s" if predicate: query_clause += " AND p=%(p)s" elif anyside: if predicate: query_clause += "p=%(p)s AND (s=%(any)s OR o=%(any)s)" query_args["any"] = anyside elif type(anyside_ids[0]) is str: # keys are IDs of resources for i, key in enumerate(anyside_ids): if i > 0: query_clause += " OR " argname = "id%s" % i query_args[argname] = key query_clause += "(s=%(" + argname + ")s OR o=%(" + argname + ")s)" else: # keys are tuples of (id, pred) for i, (key, pred) in enumerate(anyside_ids): if i > 0: query_clause += " OR " argname_id = "id%s" % i argname_p = "p%s" % i query_args[argname_id] = key query_args[argname_p] = pred query_clause += "(p=%(" + argname_p + ")s AND (s=%(" + argname_id + ")s OR o=%(" + argname_id + ")s))" elif predicate: if predicate == "*": query_clause += "p is not null" else: query_clause += "p=%(p)s" else: raise BadRequest("Illegal arguments") extra_clause = view_args.get("extra_clause", "") sql = query + query_clause + extra_clause #print "find_associations(): SQL=", sql, query_args with self.pool.cursor(**self.cursor_args) as cur: cur.execute(sql, query_args) rows = cur.fetchall() if id_only: assocs = [self._prep_id(row[0]) for row in rows] else: assocs = [ self._persistence_dict_to_ion_object(row[1]) for row in rows ] #log.debug("find_associations() found %s associations", len(assocs)) return assocs def _prepare_find_return(self, rows, res_assocs=None, id_only=True, **kwargs): if id_only: res_ids = [self._prep_id(row[0]) for row in rows] return res_ids, res_assocs else: res_docs = [ self._persistence_dict_to_ion_object(row[-1]) for row in rows ] return res_docs, res_assocs def _add_access_filter(self, view_args, tablename, query_clause, query_args, add_where=True, tablealias=None): """Returns a Postgres SQL filter clause and referenced values for resource queries filtered by resource visibility and current actor role/facility membership/superuser status""" view_args = view_args if view_args is not None else {} current_actor_id = view_args.get("current_actor_id", None) superuser_actor_ids = view_args.get("superuser_actor_ids", None) or [] tablealias = tablealias or tablename access_filter = "" access_args = {} access_args["current_actor_id"] = current_actor_id assoc_tablename = tablename + "_assoc" if current_actor_id in superuser_actor_ids: # Current user is a superuser - no additional filter pass elif current_actor_id and current_actor_id != "anonymous": # Registered actor # - Return all PUBLIC, REGISTERED access_filter += tablealias + ".visibility NOT IN (3,4)" # 1, 2, null and other values # - Return all owned by user independent of visibility access_filter += " OR (" + tablealias + ".id IN (SELECT s FROM " + assoc_tablename + \ " WHERE p='hasOwner' AND o=%(current_actor_id)s))" # - Return all FACILITY if user is in same facility access_filter += " OR (" + tablealias + ".visibility=3 AND " + tablealias + ".id IN (SELECT o FROM " + assoc_tablename + \ " WHERE p='hasResource' AND st='Org' AND s IN (SELECT s FROM " + assoc_tablename + \ " WHERE p='hasMember' AND st='Org' AND o=%(current_actor_id)s)))" else: # Anonymous access # All public resources access_filter += tablealias + ".visibility NOT IN (2,3,4)" if query_clause and access_filter: query_clause += " AND (" + access_filter + ")" elif not query_clause and access_filter: if add_where: query_clause = " WHERE " + access_filter else: query_clause = access_filter query_args.update(access_args) return query_clause def _add_deleted_filter(self, tablename, ds_sub, query_clause, query_args, with_deleted=False): if with_deleted: return query_clause deleted_filter = "" if not ds_sub: deleted_filter = tablename + ".lcstate<>'DELETED'" elif ds_sub == "assoc": deleted_filter = tablename + ".retired<>true" if query_clause and deleted_filter: query_clause += " AND " + deleted_filter elif not query_clause and deleted_filter: query_clause = deleted_filter return query_clause def find_resources(self, restype="", lcstate="", name="", id_only=True, access_args=None): return self.find_resources_ext(restype=restype, lcstate=lcstate, name=name, id_only=id_only, access_args=access_args) def find_resources_ext(self, restype="", lcstate="", name="", keyword=None, nested_type=None, attr_name=None, attr_value=None, alt_id=None, alt_id_ns=None, limit=None, skip=None, descending=None, id_only=True, query=None, access_args=None): filter_kwargs = self._get_view_args( dict(limit=limit, skip=skip, descending=descending), access_args) if query: qargs = query["query_args"] if id_only is not None: qargs["id_only"] = id_only if limit is not None and limit != 0: qargs["limit"] = limit if skip is not None and skip != 0: qargs["skip"] = skip return self.find_by_query(query, access_args=access_args) elif name: if lcstate: raise BadRequest("find by name does not support lcstate") return self.find_res_by_name(name, restype, id_only, filter=filter_kwargs) elif keyword: return self.find_res_by_keyword(keyword, restype, id_only, filter=filter_kwargs) elif alt_id or alt_id_ns: return self.find_res_by_alternative_id(alt_id, alt_id_ns, id_only, filter=filter_kwargs) elif nested_type: return self.find_res_by_nested_type(nested_type, restype, id_only, filter=filter_kwargs) elif restype and attr_name: return self.find_res_by_attribute(restype, attr_name, attr_value, id_only=id_only, filter=filter_kwargs) elif restype and lcstate: return self.find_res_by_lcstate(lcstate, restype, id_only, filter=filter_kwargs) elif restype: return self.find_res_by_type(restype, lcstate, id_only, filter=filter_kwargs) elif lcstate: return self.find_res_by_lcstate(lcstate, restype, id_only, filter=filter_kwargs) elif not restype and not lcstate and not name: return self.find_res_by_type(None, None, id_only, filter=filter_kwargs) def find_res_by_type(self, restype, lcstate=None, id_only=False, filter=None): log.debug("find_res_by_type(restype=%s, lcstate=%s)", restype, lcstate) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if lcstate: raise BadRequest( 'lcstate not supported anymore in find_res_by_type') filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, name, type_, lcstate FROM " + qual_ds_name else: query = "SELECT id, name, type_, lcstate, doc FROM " + qual_ds_name query_clause = " WHERE lcstate<>'DELETED' " query_args = dict(type_=restype, lcstate=lcstate) if restype: query_clause += "AND type_=%(type_)s" else: # Returns ALL documents, only limited by filter query_clause = "" query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args) extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [ dict(id=self._prep_id(row[0]), name=row[1], type=row[2]) for row in rows ] log.debug("find_res_by_type() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_lcstate(self, lcstate, restype=None, id_only=False, filter=None): log.debug("find_res_by_lcstate(lcstate=%s, restype=%s)", lcstate, restype) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if '_' in lcstate: log.warn("Search for compound lcstate restricted to maturity: %s", lcstate) lcstate, _ = lcstate.split("_", 1) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, name, type_, lcstate, availability FROM " + qual_ds_name else: query = "SELECT id, name, type_, lcstate, availability, doc FROM " + qual_ds_name query_clause = " WHERE " query_args = dict(type_=restype, lcstate=lcstate) is_maturity = lcstate not in AvailabilityStates if is_maturity: query_clause += "lcstate=%(lcstate)s" else: query_clause += "availability=%(lcstate)s" if restype: query_clause += " AND type_=%(type_)s" query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args) extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [ dict(id=self._prep_id(row[0]), name=row[1], type=row[2], lcstate=row[3] if is_maturity else row[4]) for row in rows ] log.debug("find_res_by_lcstate() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_name(self, name, restype=None, id_only=False, filter=None): log.debug("find_res_by_name(name=%s, restype=%s)", name, restype) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, name, type_ FROM " + qual_ds_name else: query = "SELECT id, name, type_, doc FROM " + qual_ds_name query_clause = " WHERE lcstate<>'DELETED' " query_args = dict(name=name, type_=restype) query_clause += "AND name=%(name)s" if restype: query_clause += " AND type_=%(type_)s" query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args) extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [ dict(id=self._prep_id(row[0]), name=row[1], type=row[2]) for row in rows ] log.debug("find_res_by_name() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_keyword(self, keyword, restype=None, id_only=False, filter=None): log.debug("find_res_by_keyword(keyword=%s, restype=%s)", keyword, restype) if not keyword or type(keyword) is not str: raise BadRequest('Argument keyword illegal') if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, type_ FROM " + qual_ds_name else: query = "SELECT id, type_, doc FROM " + qual_ds_name query_clause = " WHERE lcstate<>'DELETED' " query_args = dict(type_=restype, kw=[keyword]) query_clause += "AND %(kw)s <@ json_keywords(doc)" if restype: query_clause += " AND type_=%(type_)s" query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args) extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [ dict(id=self._prep_id(row[0]), type=row[1], keyword=keyword) for row in rows ] log.debug("find_res_by_keyword() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_nested_type(self, nested_type, restype=None, id_only=False, filter=None): log.debug("find_res_by_nested_type(nested_type=%s, restype=%s)", nested_type, restype) if not nested_type or type(nested_type) is not str: raise BadRequest('Argument nested_type illegal') if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, type_ FROM " + qual_ds_name else: query = "SELECT id, type_, doc FROM " + qual_ds_name query_clause = " WHERE lcstate<>'DELETED' " query_args = dict(type_=restype, nest=[nested_type]) query_clause += "AND %(nest)s <@ json_nested(doc)" if restype: query_clause += " AND type_=%(type_)s" query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args) extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [ dict(id=self._prep_id(row[0]), type=row[1], nested_type=nested_type) for row in rows ] log.debug("find_res_by_nested_type() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_attribute(self, restype, attr_name, attr_value=None, id_only=False, filter=None): log.debug( "find_res_by_attribute(restype=%s, attr_name=%s, attr_value=%s)", restype, attr_name, attr_value) if not attr_name or type(attr_name) is not str: raise BadRequest('Argument attr_name illegal') if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, type_, json_specialattr(doc) FROM " + qual_ds_name else: query = "SELECT id, type_, json_specialattr(doc), doc FROM " + qual_ds_name query_clause = " WHERE lcstate<>'DELETED' " query_args = dict(type_=restype, att=attr_name, val=attr_value) if attr_value: # Note: cannot make None test here (and allow empty string because of default service args "") query_clause += "AND json_specialattr(doc)=%(spc)s" query_args['spc'] = "%s=%s" % (attr_name, attr_value) else: query_clause += "AND json_specialattr(doc) LIKE %(spc)s" query_args['spc'] = "%s=%%" % (attr_name, ) if restype: query_clause += " AND type_=%(type_)s" query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args) extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [ dict(id=self._prep_id(row[0]), type=row[1], attr_name=attr_name, attr_value=row[2].split("=", 1)[-1]) for row in rows ] log.debug("find_res_by_attribute() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_alternative_id(self, alt_id=None, alt_id_ns=None, id_only=False, filter=None): log.debug("find_res_by_alternative_id(restype=%s, alt_id_ns=%s)", alt_id, alt_id_ns) if alt_id and type(alt_id) is not str: raise BadRequest('Argument alt_id illegal') if alt_id_ns and type(alt_id_ns) is not str: raise BadRequest('Argument alt_id_ns illegal') if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() query = "SELECT id, type_, doc FROM " + qual_ds_name query_args = dict(aid=[alt_id], ans=[alt_id_ns]) query_clause = " WHERE lcstate<>'DELETED' " if not alt_id and not alt_id_ns: query_clause += "AND json_altids_ns(doc) is not null" elif alt_id and not alt_id_ns: query_clause += "AND %(aid)s <@ json_altids_id(doc)" elif alt_id_ns and not alt_id: query_clause += "AND %(ans)s <@ json_altids_ns(doc)" else: query_clause += "AND %(aid)s <@ json_altids_id(doc) AND %(ans)s <@ json_altids_ns(doc)" query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args) extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() # Need to fake the return format of the Couch view for alt_ids. One record per alt_id, not one per resource. res_assocs = [] res_rows = [] for row in rows: doc_id = self._prep_id(row[0]) doc = row[-1] for aid in doc.get("alt_ids", []): aid_parts = aid.split(":", 1) aid_ns = aid_parts[0] if len(aid_parts) > 1 else "_" aid_id = aid_parts[-1] if alt_id_ns and alt_id: if alt_id_ns == aid_ns and alt_id == aid_id: res_assocs.append( dict(id=doc_id, alt_id_ns=aid_ns, alt_id=aid_id)) res_rows.append((doc_id, doc)) elif (not alt_id_ns and not alt_id) or (alt_id_ns and alt_id_ns == aid_ns) or (alt_id and alt_id == aid_id): res_assocs.append( dict(id=doc_id, alt_id_ns=aid_ns, alt_id=aid_id)) res_rows.append((doc_id, doc)) log.debug("find_res_by_alternative_id() found %s objects", len(res_assocs)) return self._prepare_find_return(res_rows, res_assocs, id_only=id_only) def find_by_view(self, design_name, view_name, key=None, keys=None, start_key=None, end_key=None, id_only=True, convert_doc=True, **kwargs): """ Generic find function using a defined index @param design_name design document @param view_name view name @param key specific key to find @param keys list of keys to find @param start_key find range start value @param end_key find range end value @param id_only if True, the 4th element of each triple is the document @param convert_doc if True, make IonObject out of doc @retval Returns a list of 3-tuples: (document id, index key, index value or document) """ res_rows = self.find_docs_by_view(design_name=design_name, view_name=view_name, key=key, keys=keys, start_key=start_key, end_key=end_key, id_only=id_only, **kwargs) res_rows = [(rid, key, self._persistence_dict_to_ion_object(doc) if convert_doc and isinstance(doc, dict) else doc) for rid, key, doc in res_rows] log.debug("find_by_view() found %s objects" % (len(res_rows))) return res_rows def find_by_query(self, query, access_args=None): """ Find resources given a datastore query expression dict. @param query a dict representation of a datastore query @retval list of resource ids or resource objects matching query (dependent on id_only value) """ qual_ds_name = self._get_datastore_name() query_ds_sub = query["query_args"].get("ds_sub", None) query_format = query["query_args"].get("format", "") pqb = PostgresQueryBuilder(query, qual_ds_name) if self.profile == DataStore.DS_PROFILE.RESOURCES and not query_ds_sub: table_alias = qual_ds_name if query_format != "complex" else "base" pqb.where = self._add_access_filter(access_args, qual_ds_name, pqb.where, pqb.values, add_where=False, tablealias=table_alias) if self.profile == DataStore.DS_PROFILE.RESOURCES: pqb.where = self._add_deleted_filter( pqb.table_aliases[0], query_ds_sub, pqb.where, pqb.values, with_deleted=query["query_args"].get("with_deleted", False) is True) with self.pool.cursor(**self.cursor_args) as cur: exec_query = pqb.get_query() cur.execute(exec_query, pqb.get_values()) rows = cur.fetchall() log.info("find_by_query() QUERY: %s (%s rows)", cur.query, cur.rowcount) query_res = {} query["_result"] = query_res query_res["statement_gen"] = exec_query query_res["statement_sql"] = cur.query query_res["rowcount"] = cur.rowcount id_only = query["query_args"].get("id_only", True) if query_format == "complex" and pqb.has_basic_cols: # Return format is list of lists if id_only: res_vals = [[self._prep_id(row[0])] + list(row[1:]) for row in rows] else: res_vals = [[self._persistence_dict_to_ion_object(row[1])] + list(rows[2:]) for row in rows] elif query_format == "complex": res_vals = [list(row) for row in rows] else: if id_only: res_vals = [self._prep_id(row[0]) for row in rows] else: res_vals = [ self._persistence_dict_to_ion_object(row[-1]) for row in rows ] return res_vals # ------------------------------------------------------------------------- # Internal operations def _ion_object_to_persistence_dict(self, ion_object): if ion_object is None: return None obj_dict = self._io_serializer.serialize(ion_object, update_version=True) return obj_dict def _persistence_dict_to_ion_object(self, obj_dict): if obj_dict is None: return None ion_object = self._io_deserializer.deserialize(obj_dict) return ion_object
class TransformManagementService(BaseTransformManagementService): """Provides the main orchestration for stream processing subscription, data process definition and computation request (scheduling). The transformation service handles content format transformation, mediation, qualification, verification and validation """ def __init__(self): BaseTransformManagementService.__init__(self) self.serializer = IonObjectSerializer() def on_start(self): super(TransformManagementService,self).on_start() restart_flag = self.CFG.get_safe('service.transform_management.restart', False) if restart_flag: transform_ids, meta = self.clients.resource_registry.find_resources(restype=RT.Transform, id_only=True) for transform_id in transform_ids: self._restart_transform(transform_id) def _restart_transform(self, transform_id): transform = self.clients.resource_registry.read(transform_id) configuration = transform.configuration proc_def_ids,other = self.clients.resource_registry.find_objects(subject=transform_id,predicate=PRED.hasProcessDefinition,id_only=True) if len(proc_def_ids) < 1: log.warning('Transform did not have a correct process definition.') return pid = self.clients.process_dispatcher.schedule_process( process_definition_id=proc_def_ids[0], configuration=configuration ) transform.process_id = pid self.clients.resource_registry.update(transform) def _strip_types(self, obj): if not isinstance(obj, dict): return for k,v in obj.iteritems(): if isinstance(v,dict): self._strip_types(v) if "type_" in obj: del obj['type_'] def create_transform(self, name='', description='', in_subscription_id='', out_streams=None, process_definition_id='', configuration=None): """Creates the transform and registers it with the resource registry @param process_definition_id The process definition contains the module and class of the process to be spawned @param in_subscription_id The subscription id corresponding to the input subscription @param out_stream_id The stream id for the output @param configuration {} @return The transform_id to the transform """ # ------------------------------------------------------------------------------------ # Resources and Initial Configs # ------------------------------------------------------------------------------------ # Determine Transform Name if isinstance(configuration, IonObjectBase): configuration = self.serializer.serialize(configuration) # strip the type self._strip_types(configuration) elif not configuration: configuration = {} # Handle the name uniqueness factor res, _ = self.clients.resource_registry.find_resources(name=name, id_only=True) if len(res)>0: raise BadRequest('The transform resource with name: %s, already exists.' % name) transform_name=name if not process_definition_id: raise NotFound('No process definition was provided') # Transform Resource for association management and pid transform_res = Transform(name=name, description=description) transform_id, _ = self.clients.resource_registry.create(transform_res) transform_res = self.clients.resource_registry.read(transform_id) # ------------------------------------------------------------------------------------ # Spawn Configuration and Parameters # ------------------------------------------------------------------------------------ subscription = self.clients.pubsub_management.read_subscription(subscription_id = in_subscription_id) listen_name = subscription.exchange_name configuration['process'] = dict({ 'name':transform_name, 'type':'stream_process', 'listen_name':listen_name, 'transform_id':transform_id }) if out_streams: configuration['process']['publish_streams'] = out_streams stream_ids = list(v for k,v in out_streams.iteritems()) else: stream_ids = [] transform_res.configuration = configuration # ------------------------------------------------------------------------------------ # Process Spawning # ------------------------------------------------------------------------------------ # Spawn the process pid = self.clients.process_dispatcher.schedule_process( process_definition_id=process_definition_id, configuration=configuration ) transform_res.process_id = pid # ------------------------------------------------------------------------------------ # Handle Resources # ------------------------------------------------------------------------------------ self.clients.resource_registry.update(transform_res) self.clients.resource_registry.create_association(transform_id,PRED.hasProcessDefinition,process_definition_id) self.clients.resource_registry.create_association(transform_id,PRED.hasSubscription,in_subscription_id) for stream_id in stream_ids: self.clients.resource_registry.create_association(transform_id,PRED.hasOutStream,stream_id) return transform_id def update_transform(self, configuration=None): """Not currently possible to update a transform @throws NotImplementedError """ raise NotImplementedError def read_transform(self, transform_id=''): """Reads a transform from the resource registry @param transform_id The unique transform identifier @return Transform resource @throws NotFound when transform doesn't exist """ log.debug('(%s): Reading Transform: %s' % (self.name,transform_id)) transform = self.clients.resource_registry.read(object_id=transform_id,rev_id='') return transform def delete_transform(self, transform_id=''): """Deletes and stops an existing transform process @param transform_id The unique transform identifier @throws NotFound when a transform doesn't exist """ # get the transform resource (also verifies it's existence before continuing) transform_res = self.read_transform(transform_id=transform_id) pid = transform_res.process_id # get the resources process_definition_ids, _ = self.clients.resource_registry.find_objects(transform_id, PRED.hasProcessDefinition, RT.ProcessDefinition, True) in_subscription_ids, _ = self.clients.resource_registry.find_objects(transform_id, PRED.hasSubscription, RT.Subscription, True) out_stream_ids, _ = self.clients.resource_registry.find_objects(transform_id, PRED.hasOutStream, RT.Stream, True) # build a list of all the ids above id_list = process_definition_ids + in_subscription_ids + out_stream_ids # stop the transform process #@note: terminate_process does not raise or confirm if there termination was successful or not self.clients.process_dispatcher.cancel_process(pid) log.debug('(%s): Terminated Process (%s)' % (self.name,pid)) # delete the associations for predicate in [PRED.hasProcessDefinition, PRED.hasSubscription, PRED.hasOutStream]: associations = self.clients.resource_registry.find_associations(transform_id,predicate) for association in associations: self.clients.resource_registry.delete_association(association) #@todo: should I delete the resources, or should dpms? # iterate through the list and delete each #for res_id in id_list: # self.clients.resource_registry.delete(res_id) self.clients.resource_registry.delete(transform_id) return True # --------------------------------------------------------------------------- def execute_transform(self, process_definition_id='', data=None, configuration=None): process_definition = self.clients.process_dispatcher.read_process_definition(process_definition_id) module = process_definition.executable.get('module') cls = process_definition.executable.get('class') module = __import__(module, fromlist=[cls]) cls = getattr(module,cls) instance = cls() result = gevent.event.AsyncResult() def execute(data): result.set(instance.execute(data)) g = gevent.greenlet.Greenlet(execute, data) g.start() retval = result.get(timeout=10) return retval def activate_transform(self, transform_id=''): """Activate the subscription to bind (start) the transform @param transform_id @retval True on success @throws NotFound if either the subscription doesn't exist or the transform object doesn't exist. """ subscription_ids, _ = self.clients.resource_registry.find_objects(transform_id, PRED.hasSubscription, RT.Subscription, True) if len(subscription_ids) < 1: raise NotFound for subscription_id in subscription_ids: self.clients.pubsub_management.activate_subscription(subscription_id) return True def deactivate_transform(self, transform_id=''): """Decativates the subscriptions for the specified transform @param transform_id @retval True on success @throws NotFound if either the subscription doesn't exist or the transform object doesn't exist """ subscription_ids, _ = self.clients.resource_registry.find_objects(transform_id, PRED.hasSubscription, RT.Subscription, True) if len(subscription_ids) < 1: raise NotFound for subscription_id in subscription_ids: self.clients.pubsub_management.deactivate_subscription(subscription_id) return True def schedule_transform(self, transform_id=''): """Not currently implemented @throws NotImplementedError """ raise NotImplementedError
class PostgresPyonDataStore(PostgresDataStore): """ Base class common to both CouchDB and Couchbase datastores. """ def __init__(self, datastore_name=None, config=None, scope=None, profile=None): """ @param datastore_name Name of datastore within server. May be scoped to sysname @param config A server config dict with connection params @param scope Prefix for the datastore name (e.g. sysname) to separate multiple systems """ PostgresDataStore.__init__(self, datastore_name=datastore_name, config=config or CFG.get_safe("server.postgresql"), profile=profile or DataStore.DS_PROFILE.BASIC, scope=scope) # IonObject Serializers self._io_serializer = IonObjectSerializer() self._io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry()) # ------------------------------------------------------------------------- # Couch document operations def create(self, obj, object_id=None, attachments=None, datastore_name=""): """ Converts ion objects to python dictionary before persisting them using the optional suggested identifier and creates attachments to the object. Returns an identifier and revision number of the object """ if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.create_doc(self._ion_object_to_persistence_dict(obj), object_id=object_id, datastore_name=datastore_name, attachments=attachments) def create_mult(self, objects, object_ids=None, allow_ids=None): if any([not isinstance(obj, IonObjectBase) for obj in objects]): raise BadRequest("Obj param is not instance of IonObjectBase") return self.create_doc_mult([self._ion_object_to_persistence_dict(obj) for obj in objects], object_ids) def update(self, obj, datastore_name=""): if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.update_doc(self._ion_object_to_persistence_dict(obj)) def update_mult(self, objects): if any([not isinstance(obj, IonObjectBase) for obj in objects]): raise BadRequest("Obj param is not instance of IonObjectBase") return self.update_doc_mult([self._ion_object_to_persistence_dict(obj) for obj in objects]) def read(self, object_id, rev_id="", datastore_name="", object_type=None): if not isinstance(object_id, str): raise BadRequest("Object id param is not string") doc = self.read_doc(object_id, rev_id, datastore_name=datastore_name, object_type=object_type) obj = self._persistence_dict_to_ion_object(doc) return obj def read_mult(self, object_ids, datastore_name="", strict=True): if any([not isinstance(object_id, str) for object_id in object_ids]): raise BadRequest("Object ids are not string: %s" % str(object_ids)) docs = self.read_doc_mult(object_ids, datastore_name, strict=strict) obj_list = [self._persistence_dict_to_ion_object(doc) if doc is not None else None for doc in docs] return obj_list def delete(self, obj, datastore_name="", object_type=None): if not isinstance(obj, IonObjectBase) and not isinstance(obj, str): raise BadRequest("Obj param is not instance of IonObjectBase or string id") if type(obj) is str: self.delete_doc(obj, datastore_name=datastore_name, object_type=object_type) else: if '_id' not in obj: raise BadRequest("Doc must have '_id'") if '_rev' not in obj: raise BadRequest("Doc must have '_rev'") self.delete_doc(self._ion_object_to_persistence_dict(obj), datastore_name=datastore_name, object_type=object_type) def delete_mult(self, object_ids, datastore_name=None): return self.delete_doc_mult(object_ids, datastore_name) # ------------------------------------------------------------------------- # View operations def find_objects_mult(self, subjects, id_only=False, predicate=None, access_args=None): """ Returns a list of associations for a given list of subjects """ # TODO: Port this implementation to Postgres single query res_list = [[], []] if not subjects: return res_list for sub in subjects: res_ids, res_assocs = self.find_objects(subject=sub, id_only=id_only, predicate=predicate, access_args=access_args) res_list[0].extend(res_ids) res_list[1].extend(res_assocs) return res_list def find_subjects_mult(self, objects, id_only=False, predicate=None, access_args=None): """ Returns a list of associations for a given list of objects """ # TODO: Port this implementation to Postgres single query res_list = [[], []] if not objects: return res_list for obj in objects: res_ids, res_assocs = self.find_subjects(obj=obj, id_only=id_only, predicate=predicate, access_args=access_args) res_list[0].extend(res_ids) res_list[1].extend(res_assocs) return res_list def find_objects(self, subject, predicate=None, object_type=None, id_only=False, access_args=None, **kwargs): #log.debug("find_objects(subject=%s, predicate=%s, object_type=%s, id_only=%s", subject, predicate, object_type, id_only) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if not subject: raise BadRequest("Must provide subject") if object_type and not predicate: raise BadRequest("Cannot provide object type without a predicate") if type(subject) is str: subject_id = subject else: if "_id" not in subject: raise BadRequest("Object id not available in subject") else: subject_id = subject._id qual_ds_name = self._get_datastore_name() assoc_table_name = qual_ds_name+"_assoc" table_names = dict(ds=qual_ds_name, dsa=assoc_table_name) view_args = self._get_view_args(kwargs, access_args) if id_only: #query = "SELECT o, doc FROM %(dsa)s WHERE retired<>true " % table_names query = "SELECT %(dsa)s.o, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.o=%(ds)s.id " % table_names else: query = "SELECT %(ds)s.doc, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.o=%(ds)s.id " % table_names query_args = dict(s=subject_id, ot=object_type, p=predicate) query_clause = "AND s=%(s)s" if predicate: query_clause += " AND p=%(p)s" if object_type: query_clause += " AND ot=%(ot)s" query_clause = self._add_access_filter(access_args, qual_ds_name, query_clause, query_args) extra_clause = view_args.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() obj_assocs = [self._persistence_dict_to_ion_object(row[-1]) for row in rows] #log.debug("find_objects() found %s objects", len(obj_assocs)) if id_only: res_ids = [self._prep_id(row[0]) for row in rows] return res_ids, obj_assocs else: res_objs = [self._persistence_dict_to_ion_object(row[0]) for row in rows] return res_objs, obj_assocs def find_subjects(self, subject_type=None, predicate=None, obj=None, id_only=False, access_args=None, **kwargs): #log.debug("find_subjects(subject_type=%s, predicate=%s, object=%s, id_only=%s", subject_type, predicate, obj, id_only) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if not obj: raise BadRequest("Must provide object") if subject_type and not predicate: raise BadRequest("Cannot provide subject type without a predicate") if type(obj) is str: object_id = obj else: if "_id" not in obj: raise BadRequest("Object id not available in object") else: object_id = obj._id qual_ds_name = self._get_datastore_name() assoc_table_name = qual_ds_name+"_assoc" table_names = dict(ds=qual_ds_name, dsa=assoc_table_name) view_args = self._get_view_args(kwargs, access_args) if id_only: #query = "SELECT s, doc FROM %(dsa)s WHERE retired<>true " % table_names query = "SELECT %(dsa)s.s, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.s=%(ds)s.id " % table_names else: query = "SELECT %(ds)s.doc, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.s=%(ds)s.id " % table_names query_args = dict(o=object_id, st=subject_type, p=predicate) query_clause = "AND o=%(o)s" if predicate: query_clause += " AND p=%(p)s" if subject_type: query_clause += " AND st=%(st)s" query_clause = self._add_access_filter(access_args, qual_ds_name, query_clause, query_args) extra_clause = view_args.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() obj_assocs = [self._persistence_dict_to_ion_object(row[-1]) for row in rows] #log.debug("find_subjects() found %s subjects", len(obj_assocs)) if id_only: res_ids = [self._prep_id(row[0]) for row in rows] return res_ids, obj_assocs else: res_objs = [self._persistence_dict_to_ion_object(row[0]) for row in rows] return res_objs, obj_assocs def find_associations(self, subject=None, predicate=None, obj=None, assoc_type=None, id_only=True, anyside=None, query=None, **kwargs): if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if not (subject or obj or predicate or anyside or query): raise BadRequest("Illegal parameters: No S/P/O or anyside") if anyside and (subject or obj): raise BadRequest("Illegal parameters: anyside cannot be combined with S/O") if anyside and predicate and type(anyside) in (list, tuple): raise BadRequest("Illegal parameters: anyside list cannot be combined with P") if query: query["query_args"]["id_only"] = id_only query["query_args"]["ds_sub"] = "assoc" # TODO: filter out retired return self.find_by_query(query) subject_id, object_id, anyside_ids = None, None, None if subject: if type(subject) is str: subject_id = subject else: if "_id" not in subject: raise BadRequest("Object id not available in subject") else: subject_id = subject._id if obj: if type(obj) is str: object_id = obj else: if "_id" not in obj: raise BadRequest("Object id not available in object") else: object_id = obj._id if anyside: if type(anyside) is str: anyside_ids = [anyside] elif type(anyside) in (list, tuple): if not all([type(o) in (str, list, tuple) for o in anyside]): raise BadRequest("List of object ids or (object id, predicate) expected") anyside_ids = anyside else: if "_id" not in anyside: raise BadRequest("Object id not available in anyside") else: anyside_ids = [anyside._id] #log.debug("find_associations(subject=%s, predicate=%s, object=%s, anyside=%s)", subject_id, predicate, object_id, anyside_ids) qual_ds_name = self._get_datastore_name() table = qual_ds_name + "_assoc" view_args = self._get_view_args(kwargs) if id_only: query = "SELECT id FROM " + table else: query = "SELECT id, doc, s, st, p, o, ot FROM " + table query_clause = " WHERE retired<>true AND " query_args = dict(s=subject_id, o=object_id, p=predicate) if subject and obj: query_clause += "s=%(s)s AND o=%(o)s" if predicate: query_clause += " AND p=%(p)s" elif subject: query_clause += "s=%(s)s" if predicate: query_clause += " AND p=%(p)s" elif obj: query_clause += "o=%(o)s" if predicate: query_clause += " AND p=%(p)s" elif anyside: if predicate: query_clause += "p=%(p)s AND (s=%(any)s OR o=%(any)s)" query_args["any"] = anyside elif type(anyside_ids[0]) is str: # keys are IDs of resources for i, key in enumerate(anyside_ids): if i > 0: query_clause += " OR " argname = "id%s" % i query_args[argname] = key query_clause += "(s=%("+argname+")s OR o=%("+argname+")s)" else: # keys are tuples of (id, pred) for i, (key, pred) in enumerate(anyside_ids): if i > 0: query_clause += " OR " argname_id = "id%s" % i argname_p = "p%s" % i query_args[argname_id] = key query_args[argname_p] = pred query_clause += "(p=%("+argname_p+")s AND (s=%("+argname_id+")s OR o=%("+argname_id+")s))" elif predicate: if predicate == "*": query_clause += "p is not null" else: query_clause += "p=%(p)s" else: raise BadRequest("Illegal arguments") extra_clause = view_args.get("extra_clause", "") sql = query + query_clause + extra_clause #print "find_associations(): SQL=", sql, query_args with self.pool.cursor(**self.cursor_args) as cur: cur.execute(sql, query_args) rows = cur.fetchall() if id_only: assocs = [self._prep_id(row[0]) for row in rows] else: assocs = [self._persistence_dict_to_ion_object(row[1]) for row in rows] #log.debug("find_associations() found %s associations", len(assocs)) return assocs def _prepare_find_return(self, rows, res_assocs=None, id_only=True, **kwargs): if id_only: res_ids = [self._prep_id(row[0]) for row in rows] return res_ids, res_assocs else: res_docs = [self._persistence_dict_to_ion_object(row[-1]) for row in rows] return res_docs, res_assocs def _add_access_filter(self, view_args, tablename, query_clause, query_args, add_where=True, tablealias=None): """Returns a Postgres SQL filter clause and referenced values for resource queries filtered by resource visibility and current actor role/facility membership/superuser status""" view_args = view_args if view_args is not None else {} current_actor_id = view_args.get("current_actor_id", None) superuser_actor_ids = view_args.get("superuser_actor_ids", None) or [] tablealias = tablealias or tablename access_filter = "" access_args = {} access_args["current_actor_id"] = current_actor_id assoc_tablename = tablename + "_assoc" if current_actor_id in superuser_actor_ids: # Current user is a superuser - no additional filter pass elif current_actor_id and current_actor_id != "anonymous": # Registered actor # - Return all PUBLIC, REGISTERED access_filter += tablealias + ".visibility NOT IN (3,4)" # 1, 2, null and other values # - Return all owned by user independent of visibility access_filter += " OR (" + tablealias + ".id IN (SELECT s FROM " + assoc_tablename + \ " WHERE p='hasOwner' AND o=%(current_actor_id)s))" # - Return all FACILITY if user is in same facility access_filter += " OR (" + tablealias + ".visibility=3 AND " + tablealias + ".id IN (SELECT o FROM " + assoc_tablename + \ " WHERE p='hasResource' AND st='Org' AND s IN (SELECT s FROM " + assoc_tablename + \ " WHERE p='hasMember' AND st='Org' AND o=%(current_actor_id)s)))" else: # Anonymous access # All public resources access_filter += tablealias + ".visibility NOT IN (2,3,4)" if query_clause and access_filter: query_clause += " AND (" + access_filter + ")" elif not query_clause and access_filter: if add_where: query_clause = " WHERE " + access_filter else: query_clause = access_filter query_args.update(access_args) return query_clause def _add_deleted_filter(self, tablename, ds_sub, query_clause, query_args, show_all=False): if show_all: return query_clause deleted_filter = "" if not ds_sub: deleted_filter = tablename + ".lcstate<>'DELETED'" elif ds_sub == "assoc": deleted_filter = tablename + ".retired<>true" if query_clause and deleted_filter: query_clause += " AND " + deleted_filter elif not query_clause and deleted_filter: query_clause = deleted_filter return query_clause def find_resources(self, restype="", lcstate="", name="", id_only=True, access_args=None): return self.find_resources_ext(restype=restype, lcstate=lcstate, name=name, id_only=id_only, access_args=access_args) def find_resources_ext(self, restype="", lcstate="", name="", keyword=None, nested_type=None, attr_name=None, attr_value=None, alt_id=None, alt_id_ns=None, limit=None, skip=None, descending=None, id_only=True, query=None, access_args=None): filter_kwargs = self._get_view_args(dict(limit=limit, skip=skip, descending=descending), access_args) if query: qargs = query["query_args"] if id_only is not None: qargs["id_only"] = id_only if limit is not None and limit != 0: qargs["limit"] = limit if skip is not None and skip != 0: qargs["skip"] = skip return self.find_by_query(query, access_args=access_args) elif name: if lcstate: raise BadRequest("find by name does not support lcstate") return self.find_res_by_name(name, restype, id_only, filter=filter_kwargs) elif keyword: return self.find_res_by_keyword(keyword, restype, id_only, filter=filter_kwargs) elif alt_id or alt_id_ns: return self.find_res_by_alternative_id(alt_id, alt_id_ns, id_only, filter=filter_kwargs) elif nested_type: return self.find_res_by_nested_type(nested_type, restype, id_only, filter=filter_kwargs) elif restype and attr_name: return self.find_res_by_attribute(restype, attr_name, attr_value, id_only=id_only, filter=filter_kwargs) elif restype and lcstate: return self.find_res_by_lcstate(lcstate, restype, id_only, filter=filter_kwargs) elif restype: return self.find_res_by_type(restype, lcstate, id_only, filter=filter_kwargs) elif lcstate: return self.find_res_by_lcstate(lcstate, restype, id_only, filter=filter_kwargs) elif not restype and not lcstate and not name: return self.find_res_by_type(None, None, id_only, filter=filter_kwargs) def find_res_by_type(self, restype, lcstate=None, id_only=False, filter=None): log.debug("find_res_by_type(restype=%s, lcstate=%s)", restype, lcstate) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if lcstate: raise BadRequest('lcstate not supported anymore in find_res_by_type') filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, name, type_, lcstate FROM " + qual_ds_name else: query = "SELECT id, name, type_, lcstate, doc FROM " + qual_ds_name query_clause = " WHERE lcstate<>'DELETED' " query_args = dict(type_=restype, lcstate=lcstate) if restype: query_clause += "AND type_=%(type_)s" else: # Returns ALL documents, only limited by filter query_clause = "" query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args) extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [dict(id=self._prep_id(row[0]), name=row[1], type=row[2]) for row in rows] log.debug("find_res_by_type() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_lcstate(self, lcstate, restype=None, id_only=False, filter=None): log.debug("find_res_by_lcstate(lcstate=%s, restype=%s)", lcstate, restype) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if '_' in lcstate: log.warn("Search for compound lcstate restricted to maturity: %s", lcstate) lcstate,_ = lcstate.split("_", 1) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, name, type_, lcstate, availability FROM " + qual_ds_name else: query = "SELECT id, name, type_, lcstate, availability, doc FROM " + qual_ds_name query_clause = " WHERE " query_args = dict(type_=restype, lcstate=lcstate) is_maturity = lcstate not in AvailabilityStates if is_maturity: query_clause += "lcstate=%(lcstate)s" else: query_clause += "availability=%(lcstate)s" if restype: query_clause += " AND type_=%(type_)s" query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args) extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [dict(id=self._prep_id(row[0]), name=row[1], type=row[2], lcstate=row[3] if is_maturity else row[4]) for row in rows] log.debug("find_res_by_lcstate() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_name(self, name, restype=None, id_only=False, filter=None): log.debug("find_res_by_name(name=%s, restype=%s)", name, restype) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, name, type_ FROM " + qual_ds_name else: query = "SELECT id, name, type_, doc FROM " + qual_ds_name query_clause = " WHERE lcstate<>'DELETED' " query_args = dict(name=name, type_=restype) query_clause += "AND name=%(name)s" if restype: query_clause += " AND type_=%(type_)s" query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args) extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [dict(id=self._prep_id(row[0]), name=row[1], type=row[2]) for row in rows] log.debug("find_res_by_name() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_keyword(self, keyword, restype=None, id_only=False, filter=None): log.debug("find_res_by_keyword(keyword=%s, restype=%s)", keyword, restype) if not keyword or type(keyword) is not str: raise BadRequest('Argument keyword illegal') if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, type_ FROM " + qual_ds_name else: query = "SELECT id, type_, doc FROM " + qual_ds_name query_clause = " WHERE lcstate<>'DELETED' " query_args = dict(type_=restype, kw=[keyword]) query_clause += "AND %(kw)s <@ json_keywords(doc)" if restype: query_clause += " AND type_=%(type_)s" query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args) extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [dict(id=self._prep_id(row[0]), type=row[1], keyword=keyword) for row in rows] log.debug("find_res_by_keyword() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_nested_type(self, nested_type, restype=None, id_only=False, filter=None): log.debug("find_res_by_nested_type(nested_type=%s, restype=%s)", nested_type, restype) if not nested_type or type(nested_type) is not str: raise BadRequest('Argument nested_type illegal') if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, type_ FROM " + qual_ds_name else: query = "SELECT id, type_, doc FROM " + qual_ds_name query_clause = " WHERE lcstate<>'DELETED' " query_args = dict(type_=restype, nest=[nested_type]) query_clause += "AND %(nest)s <@ json_nested(doc)" if restype: query_clause += " AND type_=%(type_)s" query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args) extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [dict(id=self._prep_id(row[0]), type=row[1], nested_type=nested_type) for row in rows] log.debug("find_res_by_nested_type() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_attribute(self, restype, attr_name, attr_value=None, id_only=False, filter=None): log.debug("find_res_by_attribute(restype=%s, attr_name=%s, attr_value=%s)", restype, attr_name, attr_value) if not attr_name or type(attr_name) is not str: raise BadRequest('Argument attr_name illegal') if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, type_, json_specialattr(doc) FROM " + qual_ds_name else: query = "SELECT id, type_, json_specialattr(doc), doc FROM " + qual_ds_name query_clause = " WHERE lcstate<>'DELETED' " query_args = dict(type_=restype, att=attr_name, val=attr_value) if attr_value: # Note: cannot make None test here (and allow empty string because of default service args "") query_clause += "AND json_specialattr(doc)=%(spc)s" query_args['spc'] = "%s=%s" % (attr_name, attr_value) else: query_clause += "AND json_specialattr(doc) LIKE %(spc)s" query_args['spc'] = "%s=%%" % (attr_name, ) if restype: query_clause += " AND type_=%(type_)s" query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args) extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [dict(id=self._prep_id(row[0]), type=row[1], attr_name=attr_name, attr_value=row[2].split("=",1)[-1]) for row in rows] log.debug("find_res_by_attribute() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_alternative_id(self, alt_id=None, alt_id_ns=None, id_only=False, filter=None): log.debug("find_res_by_alternative_id(restype=%s, alt_id_ns=%s)", alt_id, alt_id_ns) if alt_id and type(alt_id) is not str: raise BadRequest('Argument alt_id illegal') if alt_id_ns and type(alt_id_ns) is not str: raise BadRequest('Argument alt_id_ns illegal') if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() query = "SELECT id, type_, doc FROM " + qual_ds_name query_args = dict(aid=[alt_id], ans=[alt_id_ns]) query_clause = " WHERE lcstate<>'DELETED' " if not alt_id and not alt_id_ns: query_clause += "AND json_altids_ns(doc) is not null" elif alt_id and not alt_id_ns: query_clause += "AND %(aid)s <@ json_altids_id(doc)" elif alt_id_ns and not alt_id: query_clause += "AND %(ans)s <@ json_altids_ns(doc)" else: query_clause += "AND %(aid)s <@ json_altids_id(doc) AND %(ans)s <@ json_altids_ns(doc)" query_clause = self._add_access_filter(filter, qual_ds_name, query_clause, query_args) extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() # Need to fake the return format of the Couch view for alt_ids. One record per alt_id, not one per resource. res_assocs = [] res_rows = [] for row in rows: doc_id = self._prep_id(row[0]) doc = row[-1] for aid in doc.get("alt_ids", []): aid_parts = aid.split(":", 1) aid_ns = aid_parts[0] if len(aid_parts)>1 else "_" aid_id = aid_parts[-1] if alt_id_ns and alt_id: if alt_id_ns == aid_ns and alt_id == aid_id: res_assocs.append(dict(id=doc_id, alt_id_ns=aid_ns, alt_id=aid_id)) res_rows.append((doc_id, doc)) elif (not alt_id_ns and not alt_id) or (alt_id_ns and alt_id_ns == aid_ns) or (alt_id and alt_id == aid_id): res_assocs.append(dict(id=doc_id, alt_id_ns=aid_ns, alt_id=aid_id)) res_rows.append((doc_id, doc)) log.debug("find_res_by_alternative_id() found %s objects", len(res_assocs)) return self._prepare_find_return(res_rows, res_assocs, id_only=id_only) def find_by_view(self, design_name, view_name, key=None, keys=None, start_key=None, end_key=None, id_only=True, convert_doc=True, **kwargs): """ Generic find function using a defined index @param design_name design document @param view_name view name @param key specific key to find @param keys list of keys to find @param start_key find range start value @param end_key find range end value @param id_only if True, the 4th element of each triple is the document @param convert_doc if True, make IonObject out of doc @retval Returns a list of 3-tuples: (document id, index key, index value or document) """ res_rows = self.find_docs_by_view(design_name=design_name, view_name=view_name, key=key, keys=keys, start_key=start_key, end_key=end_key, id_only=id_only, **kwargs) res_rows = [(rid, key, self._persistence_dict_to_ion_object(doc) if convert_doc and isinstance(doc, dict) else doc) for rid, key, doc in res_rows] log.debug("find_by_view() found %s objects" % (len(res_rows))) return res_rows def find_by_query(self, query, access_args=None): """ Find resources given a datastore query expression dict. @param query a dict representation of a datastore query @retval list of resource ids or resource objects matching query (dependent on id_only value) """ qual_ds_name = self._get_datastore_name() query_ds_sub = query["query_args"].get("ds_sub", None) query_format = query["query_args"].get("format", "") pqb = PostgresQueryBuilder(query, qual_ds_name) if self.profile == DataStore.DS_PROFILE.RESOURCES and not query_ds_sub: table_alias = qual_ds_name if query_format != "complex" else "base" pqb.where = self._add_access_filter(access_args, qual_ds_name, pqb.where, pqb.values, add_where=False, tablealias=table_alias) if self.profile == DataStore.DS_PROFILE.RESOURCES: pqb.where = self._add_deleted_filter(pqb.table_aliases[0], query_ds_sub, pqb.where, pqb.values, show_all=query["query_args"].get("show_all", False)) with self.pool.cursor(**self.cursor_args) as cur: exec_query = pqb.get_query() cur.execute(exec_query, pqb.get_values()) rows = cur.fetchall() log.info("find_by_query() QUERY: %s (%s rows)", cur.query, cur.rowcount) query_res = {} query["_result"] = query_res query_res["statement_gen"] = exec_query query_res["statement_sql"] = cur.query query_res["rowcount"] = cur.rowcount id_only = query["query_args"].get("id_only", True) if query_format == "complex" and pqb.has_basic_cols: # Return format is list of lists if id_only: res_vals = [[self._prep_id(row[0])] + list(row[1:]) for row in rows] else: res_vals = [[self._persistence_dict_to_ion_object(row[1])] + list(rows[2:]) for row in rows] elif query_format == "complex": res_vals = [list(row) for row in rows] else: if id_only: res_vals = [self._prep_id(row[0]) for row in rows] else: res_vals = [self._persistence_dict_to_ion_object(row[-1]) for row in rows] return res_vals # ------------------------------------------------------------------------- # Internal operations def _ion_object_to_persistence_dict(self, ion_object): if ion_object is None: return None obj_dict = self._io_serializer.serialize(ion_object, update_version=True) return obj_dict def _persistence_dict_to_ion_object(self, obj_dict): if obj_dict is None: return None ion_object = self._io_deserializer.deserialize(obj_dict) return ion_object
class PostgresPyonDataStore(PostgresDataStore): """ Base class common to both CouchDB and Couchbase datastores. """ def __init__(self, datastore_name=None, config=None, scope=None, profile=None): """ @param datastore_name Name of datastore within server. May be scoped to sysname @param config A server config dict with connection params @param scope Prefix for the datastore name (e.g. sysname) to separate multiple systems """ PostgresDataStore.__init__(self, datastore_name=datastore_name, config=config or CFG.get_safe("server.postgresql"), profile=profile or DataStore.DS_PROFILE.BASIC, scope=scope) # IonObject Serializers self._io_serializer = IonObjectSerializer() self._io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry()) # ------------------------------------------------------------------------- # Couch document operations def create(self, obj, object_id=None, attachments=None, datastore_name=""): """ Converts ion objects to python dictionary before persisting them using the optional suggested identifier and creates attachments to the object. Returns an identifier and revision number of the object """ if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.create_doc(self._ion_object_to_persistence_dict(obj), object_id=object_id, datastore_name=datastore_name, attachments=attachments) def create_mult(self, objects, object_ids=None, allow_ids=None): if any([not isinstance(obj, IonObjectBase) for obj in objects]): raise BadRequest("Obj param is not instance of IonObjectBase") return self.create_doc_mult([self._ion_object_to_persistence_dict(obj) for obj in objects], object_ids) def update(self, obj, datastore_name=""): if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.update_doc(self._ion_object_to_persistence_dict(obj)) def update_mult(self, objects): if any([not isinstance(obj, IonObjectBase) for obj in objects]): raise BadRequest("Obj param is not instance of IonObjectBase") return self.update_doc_mult([self._ion_object_to_persistence_dict(obj) for obj in objects]) def read(self, object_id, rev_id="", datastore_name="", object_type=None): if not isinstance(object_id, str): raise BadRequest("Object id param is not string") doc = self.read_doc(object_id, rev_id, datastore_name=datastore_name, object_type=object_type) obj = self._persistence_dict_to_ion_object(doc) return obj def read_mult(self, object_ids, datastore_name="", strict=True): if any([not isinstance(object_id, str) for object_id in object_ids]): raise BadRequest("Object ids are not string: %s" % str(object_ids)) docs = self.read_doc_mult(object_ids, datastore_name, strict=strict) obj_list = [self._persistence_dict_to_ion_object(doc) if doc is not None else None for doc in docs] return obj_list def delete(self, obj, datastore_name="", object_type=None): if not isinstance(obj, IonObjectBase) and not isinstance(obj, str): raise BadRequest("Obj param is not instance of IonObjectBase or string id") if type(obj) is str: self.delete_doc(obj, datastore_name=datastore_name, object_type=object_type) else: if '_id' not in obj: raise BadRequest("Doc must have '_id'") if '_rev' not in obj: raise BadRequest("Doc must have '_rev'") self.delete_doc(self._ion_object_to_persistence_dict(obj), datastore_name=datastore_name, object_type=object_type) def delete_mult(self, object_ids, datastore_name=None): return self.delete_doc_mult(object_ids, datastore_name) # ------------------------------------------------------------------------- # View operations def find_objects_mult(self, subjects, id_only=False): """ Returns a list of associations for a given list of subjects """ #ds, datastore_name = self._get_datastore() #validate_is_instance(subjects, list, 'subjects is not a list of resource_ids') #view_args = dict(keys=subjects, include_docs=True) #results = self.query_view(self._get_view_name("association", "by_bulk"), view_args) #ids = [i['value'] for i in results] #assocs = [i['doc'] for i in results] #self._count(find_assocs_mult_call=1, find_assocs_mult_obj=len(ids)) #if id_only: # return ids, assocs #else: # return self.read_mult(ids), assocs # TODO: Port this implementation to Postgres single query res_list = [[], []] if not subjects: return res_list for sub in subjects: res_ids, res_assocs = self.find_objects(subject=sub, id_only=id_only) res_list[0].extend(res_ids) res_list[1].extend(res_assocs) return res_list def find_subjects_mult(self, objects, id_only=False): """ Returns a list of associations for a given list of objects """ #ds, datastore_name = self._get_datastore() #validate_is_instance(objects, list, 'objects is not a list of resource_ids') #view_args = dict(keys=objects, include_docs=True) #results = self.query_view(self._get_view_name("association", "by_subject_bulk"), view_args) #ids = [i['value'] for i in results] #assocs = [i['doc'] for i in results] #self._count(find_assocs_mult_call=1, find_assocs_mult_obj=len(ids)) #if id_only: # return ids, assocs #else: # return self.read_mult(ids), assocs # TODO: Port this implementation to Postgres single query res_list = [[], []] if not objects: return res_list for obj in objects: res_ids, res_assocs = self.find_subjects(obj=obj, id_only=id_only) res_list[0].extend(res_ids) res_list[1].extend(res_assocs) return res_list def find_objects(self, subject, predicate=None, object_type=None, id_only=False, **kwargs): #log.debug("find_objects(subject=%s, predicate=%s, object_type=%s, id_only=%s", subject, predicate, object_type, id_only) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if not subject: raise BadRequest("Must provide subject") if object_type and not predicate: raise BadRequest("Cannot provide object type without a predicate") if type(subject) is str: subject_id = subject else: if "_id" not in subject: raise BadRequest("Object id not available in subject") else: subject_id = subject._id qual_ds_name = self._get_datastore_name() view_args = self._get_view_args(kwargs) if id_only: query = "SELECT o, doc FROM %(dsa)s WHERE retired<>true " % dict(dsa=qual_ds_name+"_assoc") else: query = "SELECT %(ds)s.doc, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.o=%(ds)s.id " % dict(ds=qual_ds_name, dsa=qual_ds_name+"_assoc") query_args = dict(s=subject_id, ot=object_type, p=predicate) query_clause = "AND s=%(s)s" if predicate: query_clause += " AND p=%(p)s" if object_type: query_clause += " AND ot=%(ot)s" extra_clause = view_args.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() obj_assocs = [self._persistence_dict_to_ion_object(row[-1]) for row in rows] #log.debug("find_objects() found %s objects", len(obj_assocs)) if id_only: res_ids = [self._prep_id(row[0]) for row in rows] return res_ids, obj_assocs else: res_objs = [self._persistence_dict_to_ion_object(row[0]) for row in rows] return res_objs, obj_assocs def find_subjects(self, subject_type=None, predicate=None, obj=None, id_only=False, **kwargs): #log.debug("find_subjects(subject_type=%s, predicate=%s, object=%s, id_only=%s", subject_type, predicate, obj, id_only) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if not obj: raise BadRequest("Must provide object") if subject_type and not predicate: raise BadRequest("Cannot provide subject type without a predicate") if type(obj) is str: object_id = obj else: if "_id" not in obj: raise BadRequest("Object id not available in object") else: object_id = obj._id qual_ds_name = self._get_datastore_name() view_args = self._get_view_args(kwargs) if id_only: query = "SELECT s, doc FROM %(dsa)s WHERE retired<>true " % dict(dsa=qual_ds_name+"_assoc") else: query = "SELECT %(ds)s.doc, %(dsa)s.doc FROM %(dsa)s, %(ds)s WHERE retired<>true AND %(dsa)s.s=%(ds)s.id " % dict(ds=qual_ds_name, dsa=qual_ds_name+"_assoc") query_args = dict(o=object_id, st=subject_type, p=predicate) query_clause = "AND o=%(o)s" if predicate: query_clause += " AND p=%(p)s" if subject_type: query_clause += " AND st=%(st)s" extra_clause = view_args.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() obj_assocs = [self._persistence_dict_to_ion_object(row[-1]) for row in rows] #log.debug("find_subjects() found %s subjects", len(obj_assocs)) if id_only: res_ids = [self._prep_id(row[0]) for row in rows] return res_ids, obj_assocs else: res_objs = [self._persistence_dict_to_ion_object(row[0]) for row in rows] return res_objs, obj_assocs def find_associations(self, subject=None, predicate=None, obj=None, assoc_type=None, id_only=True, anyside=None, **kwargs): if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if not (subject or obj or predicate or anyside): raise BadRequest("Illegal parameters: No S/P/O or anyside") #if assoc_type: # raise BadRequest("Illegal parameters: assoc_type deprecated") if anyside and (subject or obj): raise BadRequest("Illegal parameters: anyside cannot be combined with S/O") if anyside and predicate and type(anyside) in (list, tuple): raise BadRequest("Illegal parameters: anyside list cannot be combined with P") subject_id, object_id, anyside_ids = None, None, None if subject: if type(subject) is str: subject_id = subject else: if "_id" not in subject: raise BadRequest("Object id not available in subject") else: subject_id = subject._id if obj: if type(obj) is str: object_id = obj else: if "_id" not in obj: raise BadRequest("Object id not available in object") else: object_id = obj._id if anyside: if type(anyside) is str: anyside_ids = [anyside] elif type(anyside) in (list, tuple): if not all([type(o) in (str, list, tuple) for o in anyside]): raise BadRequest("List of object ids or (object id, predicate) expected") anyside_ids = anyside else: if "_id" not in anyside: raise BadRequest("Object id not available in anyside") else: anyside_ids = [anyside._id] #log.debug("find_associations(subject=%s, predicate=%s, object=%s, anyside=%s)", subject_id, predicate, object_id, anyside_ids) qual_ds_name = self._get_datastore_name() table = qual_ds_name + "_assoc" view_args = self._get_view_args(kwargs) if id_only: query = "SELECT id FROM " + table else: query = "SELECT id, doc, s, st, p, o, ot FROM " + table query_clause = " WHERE retired<>true AND " query_args = dict(s=subject_id, o=object_id, p=predicate) if subject and obj: query_clause += "s=%(s)s AND o=%(o)s" if predicate: query_clause += " AND p=%(p)s" elif subject: query_clause += "s=%(s)s" if predicate: query_clause += " AND p=%(p)s" elif obj: query_clause += "o=%(o)s" if predicate: query_clause += " AND p=%(p)s" elif anyside: if predicate: query_clause += "p=%(p)s AND (s=%(any)s OR o=%(any)s)" query_args["any"] = anyside elif type(anyside_ids[0]) is str: # keys are IDs of resources for i, key in enumerate(anyside_ids): if i > 0: query_clause += " OR " argname = "id%s" % i query_args[argname] = key query_clause += "(s=%("+argname+")s OR o=%("+argname+")s)" else: # keys are tuples of (id, pred) for i, (key, pred) in enumerate(anyside_ids): if i > 0: query_clause += " OR " argname_id = "id%s" % i argname_p = "p%s" % i query_args[argname_id] = key query_args[argname_p] = pred query_clause += "(p=%("+argname_p+")s AND (s=%("+argname_id+")s OR o=%("+argname_id+")s))" elif predicate: if predicate == "*": query_clause += "p is not null" else: query_clause += "p=%(p)s" else: raise BadRequest("Illegal arguments") extra_clause = view_args.get("extra_clause", "") sql = query + query_clause + extra_clause #print "find_associations(): SQL=", sql, query_args with self.pool.cursor(**self.cursor_args) as cur: cur.execute(sql, query_args) rows = cur.fetchall() if id_only: assocs = [self._prep_id(row[0]) for row in rows] else: assocs = [self._persistence_dict_to_ion_object(row[1]) for row in rows] #log.debug("find_associations() found %s associations", len(assocs)) return assocs def _prepare_find_return(self, rows, res_assocs=None, id_only=True, **kwargs): if id_only: res_ids = [self._prep_id(row[0]) for row in rows] return res_ids, res_assocs else: res_docs = [self._persistence_dict_to_ion_object(row[-1]) for row in rows] return res_docs, res_assocs def find_resources(self, restype="", lcstate="", name="", id_only=True): return self.find_resources_ext(restype=restype, lcstate=lcstate, name=name, id_only=id_only) def find_resources_ext(self, restype="", lcstate="", name="", keyword=None, nested_type=None, attr_name=None, attr_value=None, alt_id=None, alt_id_ns=None, limit=None, skip=None, descending=None, id_only=True): filter_kwargs = self._get_view_args(dict(limit=limit, skip=skip, descending=descending)) if name: if lcstate: raise BadRequest("find by name does not support lcstate") return self.find_res_by_name(name, restype, id_only, filter=filter_kwargs) elif keyword: return self.find_res_by_keyword(keyword, restype, id_only, filter=filter_kwargs) elif alt_id or alt_id_ns: return self.find_res_by_alternative_id(alt_id, alt_id_ns, id_only, filter=filter_kwargs) elif nested_type: return self.find_res_by_nested_type(nested_type, restype, id_only, filter=filter_kwargs) elif restype and attr_name: return self.find_res_by_attribute(restype, attr_name, attr_value, id_only=id_only, filter=filter_kwargs) elif restype and lcstate: return self.find_res_by_lcstate(lcstate, restype, id_only, filter=filter_kwargs) elif restype: return self.find_res_by_type(restype, lcstate, id_only, filter=filter_kwargs) elif lcstate: return self.find_res_by_lcstate(lcstate, restype, id_only, filter=filter_kwargs) elif not restype and not lcstate and not name: return self.find_res_by_type(None, None, id_only, filter=filter_kwargs) def find_res_by_type(self, restype, lcstate=None, id_only=False, filter=None): log.debug("find_res_by_type(restype=%s, lcstate=%s)", restype, lcstate) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if lcstate: raise BadRequest('lcstate not supported anymore in find_res_by_type') filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, name, type_, lcstate FROM " + qual_ds_name else: query = "SELECT id, name, type_, lcstate, doc FROM " + qual_ds_name query_clause = " WHERE lcstate<>'RETIRED' " query_args = dict(type_=restype, lcstate=lcstate) if restype: query_clause += "AND type_=%(type_)s" else: # Returns ALL documents, only limited by filter query_clause = "" extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [dict(id=self._prep_id(row[0]), name=row[1], type=row[2]) for row in rows] log.debug("find_res_by_type() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_lcstate(self, lcstate, restype=None, id_only=False, filter=None): log.debug("find_res_by_lcstate(lcstate=%s, restype=%s)", lcstate, restype) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if '_' in lcstate: log.warn("Search for compound lcstate restricted to maturity: %s", lcstate) lcstate,_ = lcstate.split("_", 1) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, name, type_, lcstate, availability FROM " + qual_ds_name else: query = "SELECT id, name, type_, lcstate, availability, doc FROM " + qual_ds_name query_clause = " WHERE " query_args = dict(type_=restype, lcstate=lcstate) is_maturity = lcstate not in CommonResourceLifeCycleSM.AVAILABILITY if is_maturity: query_clause += "lcstate=%(lcstate)s" else: query_clause += "availability=%(lcstate)s" if restype: query_clause += " AND type_=%(type_)s" extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [dict(id=self._prep_id(row[0]), name=row[1], type=row[2], lcstate=row[3] if is_maturity else row[4]) for row in rows] log.debug("find_res_by_lcstate() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_name(self, name, restype=None, id_only=False, filter=None): log.debug("find_res_by_name(name=%s, restype=%s)", name, restype) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, name, type_ FROM " + qual_ds_name else: query = "SELECT id, name, type_, doc FROM " + qual_ds_name query_clause = " WHERE lcstate<>'RETIRED' " query_args = dict(name=name, type_=restype) query_clause += "AND name=%(name)s" if restype: query_clause += " AND type_=%(type_)s" extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [dict(id=self._prep_id(row[0]), name=row[1], type=row[2]) for row in rows] log.debug("find_res_by_name() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_keyword(self, keyword, restype=None, id_only=False, filter=None): log.debug("find_res_by_keyword(keyword=%s, restype=%s)", keyword, restype) if not keyword or type(keyword) is not str: raise BadRequest('Argument keyword illegal') if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, type_ FROM " + qual_ds_name else: query = "SELECT id, type_, doc FROM " + qual_ds_name query_clause = " WHERE lcstate<>'RETIRED' " query_args = dict(type_=restype, kw=[keyword]) query_clause += "AND %(kw)s <@ json_keywords(doc)" if restype: query_clause += " AND type_=%(type_)s" extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [dict(id=self._prep_id(row[0]), type=row[1], keyword=keyword) for row in rows] log.debug("find_res_by_keyword() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_nested_type(self, nested_type, restype=None, id_only=False, filter=None): log.debug("find_res_by_nested_type(nested_type=%s, restype=%s)", nested_type, restype) if not nested_type or type(nested_type) is not str: raise BadRequest('Argument nested_type illegal') if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, type_ FROM " + qual_ds_name else: query = "SELECT id, type_, doc FROM " + qual_ds_name query_clause = " WHERE lcstate<>'RETIRED' " query_args = dict(type_=restype, nest=[nested_type]) query_clause += "AND %(nest)s <@ json_nested(doc)" if restype: query_clause += " AND type_=%(type_)s" extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [dict(id=self._prep_id(row[0]), type=row[1], nested_type=nested_type) for row in rows] log.debug("find_res_by_nested_type() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_attribute(self, restype, attr_name, attr_value=None, id_only=False, filter=None): log.debug("find_res_by_attribute(restype=%s, attr_name=%s, attr_value=%s)", restype, attr_name, attr_value) if not attr_name or type(attr_name) is not str: raise BadRequest('Argument attr_name illegal') if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() if id_only: query = "SELECT id, type_, json_specialattr(doc) FROM " + qual_ds_name else: query = "SELECT id, type_, json_specialattr(doc), doc FROM " + qual_ds_name query_clause = " WHERE lcstate<>'RETIRED' " query_args = dict(type_=restype, att=attr_name, val=attr_value) if attr_value is not None: query_clause += "AND json_specialattr(doc)=%(spc)s" query_args['spc'] = "%s=%s" % (attr_name, attr_value) else: query_clause += "AND json_specialattr(doc) LIKE %(spc)s" query_args['spc'] = "%s=%%" % (attr_name, ) if restype: query_clause += " AND type_=%(type_)s" extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() res_assocs = [dict(id=self._prep_id(row[0]), type=row[1], attr_name=attr_name, attr_value=row[2].split("=",1)[-1]) for row in rows] log.debug("find_res_by_attribute() found %s objects", len(res_assocs)) return self._prepare_find_return(rows, res_assocs, id_only=id_only) def find_res_by_alternative_id(self, alt_id=None, alt_id_ns=None, id_only=False, filter=None): log.debug("find_res_by_alternative_id(restype=%s, alt_id_ns=%s)", alt_id, alt_id_ns) if alt_id and type(alt_id) is not str: raise BadRequest('Argument alt_id illegal') if alt_id_ns and type(alt_id_ns) is not str: raise BadRequest('Argument alt_id_ns illegal') if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) filter = filter if filter is not None else {} qual_ds_name = self._get_datastore_name() #if id_only: # query = "SELECT id, x[1], x[2] FROM (SELECT json_altids(doc) as x, * FROM " + qual_ds_name + ") AS A" #else: # query = "SELECT id, x[1], x[2], doc FROM (SELECT json_altids(doc) as x, * FROM " + qual_ds_name + ") AS A" #query_args = dict(aid=alt_id, ans=alt_id_ns) #query_clause = " WHERE lcstate<>'RETIRED' " # #if not alt_id and not alt_id_ns: # query_clause += " " #elif alt_id and not alt_id_ns: # query_clause += " AND x[2]=%(aid)s" #elif alt_id_ns and not alt_id: # query_clause += " AND x[1]=%(ans)s" #else: # query_clause += " AND x[1]=%(ans)s AND x[2]=%(aid)s" query = "SELECT id, type_, doc FROM " + qual_ds_name query_args = dict(aid=[alt_id], ans=[alt_id_ns]) query_clause = " WHERE lcstate<>'RETIRED' " if not alt_id and not alt_id_ns: query_clause += "AND json_altids_ns(doc) is not null" elif alt_id and not alt_id_ns: query_clause += "AND %(aid)s <@ json_altids_id(doc)" elif alt_id_ns and not alt_id: query_clause += "AND %(ans)s <@ json_altids_ns(doc)" else: query_clause += "AND %(aid)s <@ json_altids_id(doc) AND %(ans)s <@ json_altids_ns(doc)" extra_clause = filter.get("extra_clause", "") with self.pool.cursor(**self.cursor_args) as cur: cur.execute(query + query_clause + extra_clause, query_args) rows = cur.fetchall() # Need to fake the return format of the Couch view for alt_ids. One record per alt_id, not one per resource. res_assocs = [] res_rows = [] for row in rows: doc_id = self._prep_id(row[0]) doc = row[-1] for aid in doc.get("alt_ids", []): aid_parts = aid.split(":", 1) aid_ns = aid_parts[0] if len(aid_parts)>1 else "_" aid_id = aid_parts[-1] if alt_id_ns and alt_id: if alt_id_ns == aid_ns and alt_id == aid_id: res_assocs.append(dict(id=doc_id, alt_id_ns=aid_ns, alt_id=aid_id)) res_rows.append((doc_id, doc)) elif (not alt_id_ns and not alt_id) or (alt_id_ns and alt_id_ns == aid_ns) or (alt_id and alt_id == aid_id): res_assocs.append(dict(id=doc_id, alt_id_ns=aid_ns, alt_id=aid_id)) res_rows.append((doc_id, doc)) log.debug("find_res_by_alternative_id() found %s objects", len(res_assocs)) return self._prepare_find_return(res_rows, res_assocs, id_only=id_only) def find_by_view(self, design_name, view_name, key=None, keys=None, start_key=None, end_key=None, id_only=True, convert_doc=True, **kwargs): """ Generic find function using a defined index @param design_name design document @param view_name view name @param key specific key to find @param keys list of keys to find @param start_key find range start value @param end_key find range end value @param id_only if True, the 4th element of each triple is the document @param convert_doc if True, make IonObject out of doc @retval Returns a list of 3-tuples: (document id, index key, index value or document) """ res_rows = self.find_docs_by_view(design_name=design_name, view_name=view_name, key=key, keys=keys, start_key=start_key, end_key=end_key, id_only=id_only, **kwargs) res_rows = [(rid, key, self._persistence_dict_to_ion_object(doc) if convert_doc and isinstance(doc, dict) else doc) for rid, key, doc in res_rows] log.debug("find_by_view() found %s objects" % (len(res_rows))) return res_rows def find_resources_mult(self, query): """ Find resources given a datastore query expression dict. @param query a dict representation of a datastore query @retval list of resource ids or resource objects matching query (dependent on id_only value) """ qual_ds_name = self._get_datastore_name() pqb = PostgresQueryBuilder(query, qual_ds_name) with self.pool.cursor(**self.cursor_args) as cur: cur.execute(pqb.get_query(), pqb.get_values()) rows = cur.fetchall() log.info("find_resources_mult() QUERY: %s (%s rows)", cur.query, cur.rowcount) id_only = query["query_args"].get("id_only", True) if id_only: res_ids = [self._prep_id(row[0]) for row in rows] return res_ids else: res_docs = [self._persistence_dict_to_ion_object(row[-1]) for row in rows] return res_docs # ------------------------------------------------------------------------- # Internal operations def _ion_object_to_persistence_dict(self, ion_object): if ion_object is None: return None obj_dict = self._io_serializer.serialize(ion_object) return obj_dict def _persistence_dict_to_ion_object(self, obj_dict): if obj_dict is None: return None ion_object = self._io_deserializer.deserialize(obj_dict) return ion_object
class IngestionManagementService(BaseIngestionManagementService): """ id_p = cc.spawn_process('ingestion_worker', 'ion.services.dm.ingestion.ingestion_management_service', 'IngestionManagementService') cc.proc_manager.procs['%s.%s' %(cc.id,id_p)].start() """ base_exchange_name = 'ingestion_queue' def __init__(self): BaseIngestionManagementService.__init__(self) xs_dot_xp = CFG.core_xps.science_data try: self.XS, xp_base = xs_dot_xp.split('.') self.XP = '.'.join([bootstrap.get_sys_name(), xp_base]) except ValueError: raise StandardError('Invalid CFG for core_xps.science_data: "%s"; must have "xs.xp" structure' % xs_dot_xp) self.serializer = IonObjectSerializer() self.process_definition_id = None def on_start(self): super(IngestionManagementService,self).on_start() self.event_publisher = EventPublisher(event_type="DatasetIngestionConfigurationEvent") res_list , _ = self.clients.resource_registry.find_resources( restype=RT.ProcessDefinition, name='ingestion_worker_process', id_only=True) if len(res_list): self.process_definition_id = res_list[0] def on_quit(self): #self.clients.process_dispatcher.delete_process_definition(process_definition_id=self.process_definition_id) super(IngestionManagementService,self).on_quit() def create_ingestion_configuration(self, exchange_point_id='', couch_storage=None, hdf_storage=None,number_of_workers=0): """ @brief Setup ingestion workers to ingest all the data from a single exchange point. @param exchange_point_id is the resource id for the exchagne point to ingest from @param couch_storage is the specification of the couch database to use @param hdf_storage is the specification of the filesystem to use for hdf data files @param number_of_workers is the number of ingestion workers to create """ if self.process_definition_id is None: process_definition = ProcessDefinition(name='ingestion_worker_process', description='Worker transform process for ingestion of datasets') process_definition.executable['module']='ion.processes.data.ingestion.ingestion_worker' process_definition.executable['class'] = 'IngestionWorker' self.process_definition_id = self.clients.process_dispatcher.create_process_definition(process_definition=process_definition) # Give each ingestion configuration its own queue name to receive data on exchange_name = 'ingestion_queue' ##------------------------------------------------------------------------------------ ## declare our intent to subscribe to all messages on the exchange point query = ExchangeQuery() subscription_id = self.clients.pubsub_management.create_subscription(query=query,\ exchange_name=exchange_name, name='Ingestion subscription', description='Subscription for ingestion workers') ##------------------------------------------------------------------------------------------ # create an ingestion_configuration instance and update the registry # @todo: right now sending in the exchange_point_id as the name... ingestion_configuration = IngestionConfiguration( name = self.XP) ingestion_configuration.description = '%s exchange point ingestion configuration' % self.XP ingestion_configuration.number_of_workers = number_of_workers if hdf_storage is not None: ingestion_configuration.hdf_storage.update(hdf_storage) if couch_storage is not None: ingestion_configuration.couch_storage.update(couch_storage) ingestion_configuration_id, _ = self.clients.resource_registry.create(ingestion_configuration) self._launch_transforms( ingestion_configuration.number_of_workers, subscription_id, ingestion_configuration_id, ingestion_configuration, self.process_definition_id ) return ingestion_configuration_id def _launch_transforms(self, number_of_workers, subscription_id, ingestion_configuration_id, ingestion_configuration, process_definition_id): """ This method spawns the two transform processes without activating them...Note: activating the transforms does the binding """ description = 'Ingestion worker' configuration = self.serializer.serialize(ingestion_configuration) configuration.pop('type_') configuration['configuration_id'] = ingestion_configuration_id # launch the transforms for i in xrange(number_of_workers): name = '(%s)_Ingestion_Worker_%s' % (ingestion_configuration_id, i+1) transform_id = self.clients.transform_management.create_transform( name = name, description = description, in_subscription_id= subscription_id, out_streams = {}, process_definition_id=process_definition_id, configuration=ingestion_configuration) # create association between ingestion configuration and the transforms that act as Ingestion Workers if not transform_id: raise IngestionManagementServiceException('Transform could not be launched by ingestion.') self.clients.resource_registry.create_association(ingestion_configuration_id, PRED.hasTransform, transform_id) def update_ingestion_configuration(self, ingestion_configuration=None): """Change the number of workers or the default policy for ingesting data on each stream @param ingestion_configuration IngestionConfiguration """ log.debug("Updating ingestion configuration") id, rev = self.clients.resource_registry.update(ingestion_configuration) def read_ingestion_configuration(self, ingestion_configuration_id=''): """Get an existing ingestion configuration object. @param ingestion_configuration_id str @retval ingestion_configuration IngestionConfiguration @throws NotFound if ingestion configuration did not exist """ log.debug("Reading ingestion configuration object id: %s", ingestion_configuration_id) ingestion_configuration = self.clients.resource_registry.read(ingestion_configuration_id) if ingestion_configuration is None: raise NotFound("Ingestion configuration %s does not exist" % ingestion_configuration_id) return ingestion_configuration def delete_ingestion_configuration(self, ingestion_configuration_id=''): """Delete an existing ingestion configuration object. @param ingestion_configuration_id str @throws NotFound if ingestion configuration did not exist """ log.debug("Deleting ingestion configuration: %s", ingestion_configuration_id) #ingestion_configuration = self.read_ingestion_configuration(ingestion_configuration_id) #@todo Should we check to see if the ingestion configuration exists? #delete the transforms associated with the ingestion_configuration_id transform_ids = self.clients.resource_registry.find_objects(ingestion_configuration_id, PRED.hasTransform, RT.Transform, True) if len(transform_ids) < 1: raise NotFound('No transforms associated with this ingestion configuration!') log.debug('len(transform_ids): %s' % len(transform_ids)) for transform_id in transform_ids: # To Delete - we need to actually remove each of the transforms self.clients.transform_management.delete_transform(transform_id) # delete the associations too... associations = self.clients.resource_registry.find_associations(ingestion_configuration_id,PRED.hasTransform) log.info('associations: %s' % associations) for association in associations: self.clients.resource_registry.delete_association(association) #@todo How should we deal with failure? self.clients.resource_registry.delete(ingestion_configuration_id) def activate_ingestion_configuration(self, ingestion_configuration_id=''): """Activate an ingestion configuration and the transform processes that execute it @param ingestion_configuration_id str @throws NotFound The ingestion configuration id did not exist """ log.debug("Activating ingestion configuration") # check whether the ingestion configuration object exists #ingestion_configuration = self.read_ingestion_configuration(ingestion_configuration_id) #@todo Should we check to see if the ingestion configuration exists? # read the transforms transform_ids, _ = self.clients.resource_registry.find_objects(ingestion_configuration_id, PRED.hasTransform, RT.Transform, True) if len(transform_ids) < 1: raise NotFound('The ingestion configuration %s does not exist' % str(ingestion_configuration_id)) # since all ingestion worker transforms have the same subscription, only deactivate one self.clients.transform_management.activate_transform(transform_ids[0]) return True def deactivate_ingestion_configuration(self, ingestion_configuration_id=''): """Deactivate one of the transform processes that uses an ingestion configuration @param ingestion_configuration_id str @throws NotFound The ingestion configuration id did not exist """ log.debug("Deactivating ingestion configuration") # check whether the ingestion configuration object exists #ingestion_configuration = self.read_ingestion_configuration(ingestion_configuration_id) #@todo Should we check to see if the ingestion configuration exists? # use the deactivate method in transformation management service transform_ids, _ = self.clients.resource_registry.find_objects(ingestion_configuration_id, PRED.hasTransform, RT.Transform, True) if len(transform_ids) < 1: raise NotFound('The ingestion configuration %s does not exist' % str(ingestion_configuration_id)) # since all ingestion worker transforms have the same subscription, only deactivate one self.clients.transform_management.deactivate_transform(transform_ids[0]) return True def create_dataset_configuration(self, dataset_id='', archive_data=True, archive_metadata=True, ingestion_configuration_id=''): """Create a configuration for ingestion of a particular dataset and associate it to a ingestion configuration. @param dataset_id str @param archive_data bool @param archive_metadata bool @param ingestion_configuration_id str @retval dataset_ingestion_configuration_id str """ if not dataset_id: raise IngestionManagementServiceException('Must pass a dataset id to create_dataset_configuration') log.debug("Creating dataset configuration") dataset = self.clients.dataset_management.read_dataset(dataset_id=dataset_id) stream_id =dataset.primary_view_key # Read the stream to get the stream definition #stream = self.clients.pubsub_management.read_stream(stream_id=stream_id) # Get the associated stream definition! stream_defs, _ = self.clients.resource_registry.find_objects(stream_id, PRED.hasStreamDefinition) if len(stream_defs)!=1: raise IngestionManagementServiceException('The stream is associated with more than one stream definition!') stream_def_resource = stream_defs[0] # Get the container object out of the stream def resource and set the stream id field in the local instance stream_def_container = stream_def_resource.container stream_def_container.stream_resource_id = stream_id # Get the ingestion configuration ingestion_configuration = self.clients.resource_registry.read(ingestion_configuration_id) couch_storage = ingestion_configuration.couch_storage log.info('Adding stream definition for stream "%s" to ingestion database "%s"' % (stream_id, couch_storage.datastore_name)) db = self.container.datastore_manager.get_datastore(ds_name = couch_storage.datastore_name, config = self.CFG) # put it in couch db! db.create(stream_def_container) db.close() #@todo Add business logic to create the right kind of dataset ingestion configuration config = DatasetIngestionByStream( archive_data=archive_data, archive_metadata=archive_metadata, stream_id=stream_id, dataset_id=dataset_id) dset_ingest_config = DatasetIngestionConfiguration( name = 'Dataset config %s' % dataset_id, description = 'configuration for dataset %s' % dataset_id, configuration = config, type = DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM ) dset_ingest_config_id , _ = self.clients.resource_registry.create(dset_ingest_config) self.clients.resource_registry.create_association(dset_ingest_config_id, PRED.hasIngestionConfiguration, ingestion_configuration_id) self.clients.resource_registry.create_association(dataset_id, PRED.hasIngestionConfiguration, ingestion_configuration_id) self.event_publisher.publish_event( origin=ingestion_configuration_id, # Use the ingestion configuration ID as the origin! description = dset_ingest_config.description, configuration = config, type = DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM, resource_id = dset_ingest_config_id ) return dset_ingest_config_id def update_dataset_config(self, dataset_ingestion_configuration=None): """Update the ingestion configuration for a dataset @param dataset_ingestion_configuration DatasetIngestionConfiguration """ #@todo - make it an exception to change the dataset_id or the stream_id in the dataset config! log.info('dataset configuration to update: %s' % dataset_ingestion_configuration) log.debug("Updating dataset config") dset_ingest_config_id, rev = self.clients.resource_registry.update(dataset_ingestion_configuration) ingest_config_ids, _ = self.clients.resource_registry.find_objects(dset_ingest_config_id, PRED.hasIngestionConfiguration, id_only=True) if len(ingest_config_ids)!=1: raise IngestionManagementServiceException('The dataset ingestion configuration is associated with more than one ingestion configuration!') ingest_config_id = ingest_config_ids[0] #@todo - what is it okay to update? self.event_publisher.publish_event( origin=ingest_config_id, description = dataset_ingestion_configuration.description, configuration = dataset_ingestion_configuration.configuration, type = DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM, resource_id = dset_ingest_config_id ) def read_dataset_config(self, dataset_ingestion_configuration_id=''): """Get an existing dataset configuration. @param dataset_ingestion_configuration_id str @retval dataset_ingestion_configuration DatasetIngestionConfiguration @throws NotFound if ingestion configuration did not exist """ log.debug("Reading dataset configuration") dataset_ingestion_configuration = self.clients.resource_registry.read(dataset_ingestion_configuration_id) return dataset_ingestion_configuration def delete_dataset_config(self,dataset_ingestion_configuration_id=''): """Delete an existing dataset configuration. @param dataset_ingestion_configuration_id str @throws NotFound if ingestion configuration did not exist """ dataset_ingestion_configuration = self.clients.resource_registry.read(dataset_ingestion_configuration_id) log.debug("Deleting dataset configuration") self.clients.resource_registry.delete(dataset_ingestion_configuration_id) ingest_config_ids, association_ids = self.clients.resource_registry.find_objects(dataset_ingestion_configuration_id, PRED.hasIngestionConfiguration, id_only=True) if len(ingest_config_ids)!=1: raise IngestionManagementServiceException('The dataset ingestion configuration is associated with more than one ingestion configuration!') ingest_config_id = ingest_config_ids[0] self.clients.resource_registry.delete_association(association=association_ids[0]) self.event_publisher.publish_event( origin=ingest_config_id, configuration = dataset_ingestion_configuration.configuration, type = DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM, resource_id = dataset_ingestion_configuration_id, deleted = True )
class MockDB_DataStore(DataStore): """ Data store implementation utilizing in-memory dict of dicts to persist documents. """ def __init__(self, datastore_name='prototype'): self.datastore_name = datastore_name log.debug('Creating in-memory dict of dicts that will simulate data stores') self.root = {} # serializers self._io_serializer = IonObjectSerializer() self._io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry()) def create_datastore(self, datastore_name="", create_indexes=True): if not datastore_name: datastore_name = self.datastore_name log.info('Creating data store %s' % datastore_name) if self.datastore_exists(datastore_name): raise BadRequest("Data store with name %s already exists" % datastore_name) if datastore_name not in self.root: self.root[datastore_name] = {} def delete_datastore(self, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name log.info('Deleting data store %s' % datastore_name) if datastore_name in self.root: del self.root[datastore_name] else: log.info('Data store %s does not exist' % datastore_name) def list_datastores(self): log.debug('Listing all data stores') dsList = self.root.keys() log.debug('Data stores: %s' % str(dsList)) return dsList def info_datastore(self, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name log.debug('Listing information about data store %s' % datastore_name) if datastore_name in self.root: info = 'Data store exists' else: raise BadRequest("Data store with name %s does not exist" % datastore_name) log.debug('Data store info: %s' % str(info)) return info def datastore_exists(self, datastore_name=""): return datastore_name in self.root def list_objects(self, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name log.debug('Listing all objects in data store %s' % datastore_name) objs = [] for key, value in self.root[datastore_name].items(): if key.find('_version_counter') == -1 and key.find('_version_') == -1: objs.append(key) log.debug('Objects: %s' % str(objs)) return objs def list_object_revisions(self, object_id, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name log.debug('Listing all versions of object %s/%s' % (datastore_name, str(object_id))) res = [] for key, value in self.root[datastore_name].items(): if (key.find('_version_counter') == -1 and (key.find(object_id + '_version_') == 0)): res.append(key) log.debug('Versions: %s' % str(res)) return res def create(self, obj, object_id=None, datastore_name=""): if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.create_doc(self._ion_object_to_persistence_dict(obj), object_id=object_id, datastore_name=datastore_name) def create_doc(self, doc, object_id=None, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name if '_id' in doc: raise BadRequest("Doc must not have '_id'") if '_rev' in doc: raise BadRequest("Doc must not have '_rev'") try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') if object_id: if object_id in datastore_dict: raise BadRequest("Object with id %s already exist" % object_id) # Assign an id to doc doc["_id"] = object_id or uuid4().hex object_id = doc["_id"] log.debug('Creating new object %s/%s' % (datastore_name, object_id)) # Create key for version counter entry. Will be used # on update to increment version number easily. version_counter_key = '__' + object_id + '_version_counter' version_counter = 1 # Assign initial version to doc doc["_rev"] = str(version_counter) # Write HEAD, version and version counter dicts datastore_dict[object_id] = doc datastore_dict[version_counter_key] = version_counter datastore_dict[object_id + '_version_' + str(version_counter)] = doc # Return list that identifies the id of the new doc and its version res = [object_id, str(version_counter)] log.debug('Create result: %s' % str(res)) return res def create_mult(self, objects, object_ids=None): if any([not isinstance(obj, IonObjectBase) for obj in objects]): raise BadRequest("Obj param is not instance of IonObjectBase") return self.create_doc_mult([self._ion_object_to_persistence_dict(obj) for obj in objects], object_ids) def create_doc_mult(self, docs, object_ids=None): if any(["_id" in doc for doc in docs]): raise BadRequest("Docs must not have '_id'") if any(["_rev" in doc for doc in docs]): raise BadRequest("Docs must not have '_rev'") if object_ids and len(object_ids) != len(docs): raise BadRequest("Invalid object_ids") # Assign an id to doc (recommended in CouchDB documentation) object_ids = object_ids or [uuid4().hex for i in xrange(len(docs))] res = [] for doc, oid in zip(docs, object_ids): oid,rev = self.create_doc(doc, oid) res.append((True,oid,rev)) return res def read(self, object_id, rev_id="", datastore_name=""): if not isinstance(object_id, str): raise BadRequest("Object id param is not string") doc = self.read_doc(object_id, rev_id, datastore_name) # Convert doc into Ion object obj = self._persistence_dict_to_ion_object(doc) log.debug('Ion object: %s' % str(obj)) return obj def read_doc(self, object_id, rev_id="", datastore_name=""): if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') try: key = object_id if rev_id != None and rev_id != "": log.debug('Reading version %s of object %s/%s' % (str(rev_id), datastore_name, str(object_id))) key += '_version_' + str(rev_id) else: log.debug('Reading head version of object %s/%s' % (datastore_name, str(object_id))) doc = datastore_dict[key] except KeyError: raise NotFound('Object with id %s does not exist.' % str(object_id)) log.debug('Read result: %s' % str(doc)) return doc def read_mult(self, object_ids, datastore_name=""): if any([not isinstance(object_id, str) for object_id in object_ids]): raise BadRequest("Object id param is not string") docs = self.read_doc_mult(object_ids, datastore_name) # Convert docs into Ion objects obj_list = [self._persistence_dict_to_ion_object(doc) for doc in docs] return obj_list def read_doc_mult(self, object_ids, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') doc_list = [] try: for object_id in object_ids: log.debug('Reading head version of object %s/%s' % (datastore_name, str(object_id))) doc = datastore_dict[object_id] doc_list.append(doc.copy()) except KeyError: raise NotFound('Object with id %s does not exist.' % str(object_id)) return doc_list def update(self, obj, datastore_name=""): if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.update_doc(self._ion_object_to_persistence_dict(obj)) def update_doc(self, doc, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name if '_id' not in doc: raise BadRequest("Doc must have '_id'") if '_rev' not in doc: raise BadRequest("Doc must have '_rev'") try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') try: object_id = doc["_id"] # Find the next doc version version_counter_key = '__' + object_id + '_version_counter' baseVersion = doc["_rev"] version_counter = datastore_dict[version_counter_key] + 1 if baseVersion != str(version_counter - 1): raise Conflict('Object not based on most current version') except KeyError: raise BadRequest("Object missing required _id and/or _rev values") log.debug('Saving new version of object %s/%s' % (datastore_name, doc["_id"])) doc["_rev"] = str(version_counter) # Overwrite HEAD and version counter dicts, add new version dict datastore_dict[object_id] = doc datastore_dict[version_counter_key] = version_counter datastore_dict[object_id + '_version_' + str(version_counter)] = doc res = [object_id, str(version_counter)] log.debug('Update result: %s' % str(res)) return res def delete(self, obj, datastore_name=""): if not isinstance(obj, IonObjectBase) and not isinstance(obj, str): raise BadRequest("Obj param is not instance of IonObjectBase or string id") if type(obj) is str: return self.delete_doc(obj, datastore_name=datastore_name) return self.delete_doc(self._ion_object_to_persistence_dict(obj), datastore_name=datastore_name) def delete_doc(self, doc, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') if type(doc) is str: object_id = doc else: object_id = doc["_id"] log.info('Deleting object %s/%s' % (datastore_name, object_id)) if object_id in datastore_dict.keys(): if self._is_in_association(object_id, datastore_name): obj = self.read(object_id, "", datastore_name) log.warn("XXXXXXX Attempt to delete object %s that still has associations" % str(obj)) # raise BadRequest("Object cannot be deleted until associations are broken") # Find all version dicts and delete them for key in datastore_dict.keys(): if key.find(object_id + '_version_') == 0: del datastore_dict[key] # Delete the HEAD dict del datastore_dict[object_id] # Delete the version counter dict del datastore_dict['__' + object_id + '_version_counter'] else: raise NotFound('Object with id ' + object_id + ' does not exist.') log.info('Delete result: True') def _is_in_association(self, obj_id, datastore_name=""): log.debug("_is_in_association(%s)" % obj_id) if not obj_id: raise BadRequest("Must provide object id") if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'type_' in obj and obj['type_'] == "Association": association = obj if association["s"] == obj_id or association["o"] == obj_id: log.debug("association found(%s)" % association) return True return False def find_objects(self, subject, predicate=None, object_type=None, id_only=False): log.debug("find_objects(subject=%s, predicate=%s, object_type=%s, id_only=%s" % (subject, predicate, object_type, id_only)) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if not subject: raise BadRequest("Must provide subject") try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + self.datastore_name + ' does not exist.') if type(subject) is str: subject_id = subject else: if "_id" not in subject: raise BadRequest("Object id not available in subject") else: subject_id = subject._id assoc_list = [] target_id_list = [] target_list = [] for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'type_' in obj and obj['type_'] == "Association": if obj['s'] == subject_id: if predicate and obj['p'] == predicate: if (object_type and obj['ot'] == object_type) or not object_type: assoc_list.append(obj) target_id_list.append(obj['o']) target_list.append(self.read(obj['o'])) elif not predicate: assoc_list.append(obj) target_id_list.append(obj['o']) target_list.append(self.read(obj['o'])) log.debug("find_objects() found %s objects" % (len(target_list))) if id_only: return (target_id_list, assoc_list) else: return (target_list, assoc_list) def find_subjects(self, subject_type=None, predicate=None, obj=None, id_only=False): log.debug("find_subjects(subject_type=%s, predicate=%s, object=%s, id_only=%s" % (subject_type, predicate, obj, id_only)) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if not obj: raise BadRequest("Must provide object") try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + self.datastore_name + ' does not exist.') if type(obj) is str: object_id = obj else: if "_id" not in obj: raise BadRequest("Object id not available in object") else: object_id = obj._id assoc_list = [] target_id_list = [] target_list = [] for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'type_' in obj and obj['type_'] == "Association": if obj['o'] == object_id: if predicate and obj['p'] == predicate: if (subject_type and obj['st'] == subject_type) or not subject_type: assoc_list.append(obj) target_id_list.append(obj['s']) target_list.append(self.read(obj['s'])) elif not predicate: assoc_list.append(obj) target_id_list.append(obj['s']) target_list.append(self.read(obj['s'])) log.debug("find_subjects() found %s subjects" % (len(target_list))) if id_only: return (target_id_list, assoc_list) else: return (target_list, assoc_list) def find_associations(self, subject=None, predicate=None, obj=None, assoc_type=None, id_only=True): log.debug("find_associations(subject=%s, predicate=%s, object=%s, assoc_type=%s)" % (subject, predicate, obj, assoc_type)) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if subject and obj or predicate: pass else: raise BadRequest("Illegal parameters") try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + self.datastore_name + ' does not exist.') if subject and obj: if type(subject) is str: subject_id = subject else: if "_id" not in subject: raise BadRequest("Object id not available in subject") else: subject_id = subject._id if type(obj) is str: object_id = obj else: if "_id" not in obj: raise BadRequest("Object id not available in object") else: object_id = obj._id target_list = [] for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'type_' in obj and obj['type_'] == "Association": if obj['s'] == subject_id and obj['o'] == object_id: if assoc_type: if obj['at'] == assoc_type: target_list.append(obj) else: target_list.append(obj) else: target_list = [] for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'type_' in obj and obj['type_'] == "Association": if obj['p'] == predicate: target_list.append(obj) if id_only: assocs = [row['_id'] for row in target_list] else: assocs = [self._persistence_dict_to_ion_object(row) for row in target_list] log.debug("find_associations() found %s associations" % (len(assocs))) return assocs def find_res_by_type(self, restype, lcstate=None, id_only=False): log.debug("find_res_by_type(restype=%s, lcstate=%s)" % (restype, lcstate)) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + self.datastore_name + ' does not exist.') assoc_list = [] target_id_list = [] target_list = [] for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'type_' in obj and (obj['type_'] == restype or (not restype and obj['type_'] != "Association")): if (lcstate and 'lcstate' in obj and obj['lcstate'] == lcstate) or not lcstate or not restype: target_id_list.append(obj['_id']) target_list.append(self._persistence_dict_to_ion_object(obj)) assoc_list.append([]) log.debug("find_res_by_type() found %s resources" % (len(target_list))) if id_only: return (target_id_list, assoc_list) else: return (target_list, assoc_list) def find_res_by_lcstate(self, lcstate, restype=None, id_only=False): log.debug("find_res_by_type(lcstate=%s, restype=%s)" % (lcstate, restype)) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + self.datastore_name + ' does not exist.') if lcstate in CommonResourceLifeCycleSM.STATE_ALIASES: lcstate_match = CommonResourceLifeCycleSM.STATE_ALIASES[lcstate] else: lcstate_match = [lcstate] assoc_list = [] target_id_list = [] target_list = [] for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'lcstate' in obj and obj['lcstate'] in lcstate_match: if (restype and obj['type_'] == restype) or not restype: target_id_list.append(obj['_id']) target_list.append(self._persistence_dict_to_ion_object(obj)) assoc_list.append([]) log.debug("find_res_by_lcstate() found %s resources" % (len(target_list))) if id_only: return (target_id_list, assoc_list) else: return (target_list, assoc_list) def _pass(self): pass def find_res_by_name(self, name, restype=None, id_only=False): log.debug("find_res_by_name(name=%s, restype=%s)" % (name, restype)) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + self.datastore_name + ' does not exist.') assoc_list = [] target_id_list = [] target_list = [] for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'name' in obj and obj['name'] == name: if (restype and obj['type_'] == restype) or not restype: target_id_list.append(obj['_id']) target_list.append(self._persistence_dict_to_ion_object(obj)) assoc_list.append([]) log.debug("find_res_by_name() found %s resources" % (len(target_list))) if id_only: return (target_id_list, assoc_list) else: return (target_list, assoc_list) def find_dir_entries(self, qname): raise NotImplementedError() def _ion_object_to_persistence_dict(self, ion_object): if ion_object is None: return None obj_dict = self._io_serializer.serialize(ion_object) return obj_dict def _persistence_dict_to_ion_object(self, obj_dict): if obj_dict is None: return None ion_object = self._io_deserializer.deserialize(obj_dict) return ion_object
class IngestionManagementService(BaseIngestionManagementService): """ id_p = cc.spawn_process('ingestion_worker', 'ion.services.dm.ingestion.ingestion_management_service', 'IngestionManagementService') cc.proc_manager.procs['%s.%s' %(cc.id,id_p)].start() """ base_exchange_name = 'ingestion_queue' def __init__(self): BaseIngestionManagementService.__init__(self) xs_dot_xp = CFG.core_xps.science_data try: self.XS, xp_base = xs_dot_xp.split('.') self.XP = '.'.join([bootstrap.get_sys_name(), xp_base]) except ValueError: raise StandardError( 'Invalid CFG for core_xps.science_data: "%s"; must have "xs.xp" structure' % xs_dot_xp) self.serializer = IonObjectSerializer() self.process_definition_id = None def on_start(self): super(IngestionManagementService, self).on_start() self.event_publisher = EventPublisher( event_type="DatasetIngestionConfigurationEvent") res_list, _ = self.clients.resource_registry.find_resources( restype=RT.ProcessDefinition, name='ingestion_worker_process', id_only=True) if len(res_list): self.process_definition_id = res_list[0] def on_quit(self): #self.clients.process_dispatcher.delete_process_definition(process_definition_id=self.process_definition_id) super(IngestionManagementService, self).on_quit() def create_ingestion_configuration(self, exchange_point_id='', couch_storage=None, hdf_storage=None, number_of_workers=0): """ @brief Setup ingestion workers to ingest all the data from a single exchange point. @param exchange_point_id is the resource id for the exchagne point to ingest from @param couch_storage is the specification of the couch database to use @param hdf_storage is the specification of the filesystem to use for hdf data files @param number_of_workers is the number of ingestion workers to create """ if self.process_definition_id is None: process_definition = ProcessDefinition( name='ingestion_worker_process', description='Worker transform process for ingestion of datasets' ) process_definition.executable[ 'module'] = 'ion.processes.data.ingestion.ingestion_worker' process_definition.executable['class'] = 'IngestionWorker' self.process_definition_id = self.clients.process_dispatcher.create_process_definition( process_definition=process_definition) # Give each ingestion configuration its own queue name to receive data on exchange_name = 'ingestion_queue' ##------------------------------------------------------------------------------------ ## declare our intent to subscribe to all messages on the exchange point query = ExchangeQuery() subscription_id = self.clients.pubsub_management.create_subscription(query=query,\ exchange_name=exchange_name, name='Ingestion subscription', description='Subscription for ingestion workers') ##------------------------------------------------------------------------------------------ # create an ingestion_configuration instance and update the registry # @todo: right now sending in the exchange_point_id as the name... ingestion_configuration = IngestionConfiguration(name=self.XP) ingestion_configuration.description = '%s exchange point ingestion configuration' % self.XP ingestion_configuration.number_of_workers = number_of_workers if hdf_storage is not None: ingestion_configuration.hdf_storage.update(hdf_storage) if couch_storage is not None: ingestion_configuration.couch_storage.update(couch_storage) ingestion_configuration_id, _ = self.clients.resource_registry.create( ingestion_configuration) self._launch_transforms(ingestion_configuration.number_of_workers, subscription_id, ingestion_configuration_id, ingestion_configuration, self.process_definition_id) return ingestion_configuration_id def _launch_transforms(self, number_of_workers, subscription_id, ingestion_configuration_id, ingestion_configuration, process_definition_id): """ This method spawns the two transform processes without activating them...Note: activating the transforms does the binding """ description = 'Ingestion worker' configuration = self.serializer.serialize(ingestion_configuration) configuration.pop('type_') configuration['configuration_id'] = ingestion_configuration_id # launch the transforms for i in xrange(number_of_workers): name = '(%s)_Ingestion_Worker_%s' % (ingestion_configuration_id, i + 1) transform_id = self.clients.transform_management.create_transform( name=name, description=description, in_subscription_id=subscription_id, out_streams={}, process_definition_id=process_definition_id, configuration=ingestion_configuration) # create association between ingestion configuration and the transforms that act as Ingestion Workers if not transform_id: raise IngestionManagementServiceException( 'Transform could not be launched by ingestion.') self.clients.resource_registry.create_association( ingestion_configuration_id, PRED.hasTransform, transform_id) def update_ingestion_configuration(self, ingestion_configuration=None): """Change the number of workers or the default policy for ingesting data on each stream @param ingestion_configuration IngestionConfiguration """ log.debug("Updating ingestion configuration") id, rev = self.clients.resource_registry.update( ingestion_configuration) def read_ingestion_configuration(self, ingestion_configuration_id=''): """Get an existing ingestion configuration object. @param ingestion_configuration_id str @retval ingestion_configuration IngestionConfiguration @throws NotFound if ingestion configuration did not exist """ log.debug("Reading ingestion configuration object id: %s", ingestion_configuration_id) ingestion_configuration = self.clients.resource_registry.read( ingestion_configuration_id) if ingestion_configuration is None: raise NotFound("Ingestion configuration %s does not exist" % ingestion_configuration_id) return ingestion_configuration def delete_ingestion_configuration(self, ingestion_configuration_id=''): """Delete an existing ingestion configuration object. @param ingestion_configuration_id str @throws NotFound if ingestion configuration did not exist """ log.debug("Deleting ingestion configuration: %s", ingestion_configuration_id) #ingestion_configuration = self.read_ingestion_configuration(ingestion_configuration_id) #@todo Should we check to see if the ingestion configuration exists? #delete the transforms associated with the ingestion_configuration_id transform_ids = self.clients.resource_registry.find_objects( ingestion_configuration_id, PRED.hasTransform, RT.Transform, True) if len(transform_ids) < 1: raise NotFound( 'No transforms associated with this ingestion configuration!') log.debug('len(transform_ids): %s' % len(transform_ids)) for transform_id in transform_ids: # To Delete - we need to actually remove each of the transforms self.clients.transform_management.delete_transform(transform_id) # delete the associations too... associations = self.clients.resource_registry.find_associations( ingestion_configuration_id, PRED.hasTransform) log.info('associations: %s' % associations) for association in associations: self.clients.resource_registry.delete_association(association) #@todo How should we deal with failure? self.clients.resource_registry.delete(ingestion_configuration_id) def activate_ingestion_configuration(self, ingestion_configuration_id=''): """Activate an ingestion configuration and the transform processes that execute it @param ingestion_configuration_id str @throws NotFound The ingestion configuration id did not exist """ log.debug("Activating ingestion configuration") # check whether the ingestion configuration object exists #ingestion_configuration = self.read_ingestion_configuration(ingestion_configuration_id) #@todo Should we check to see if the ingestion configuration exists? # read the transforms transform_ids, _ = self.clients.resource_registry.find_objects( ingestion_configuration_id, PRED.hasTransform, RT.Transform, True) if len(transform_ids) < 1: raise NotFound('The ingestion configuration %s does not exist' % str(ingestion_configuration_id)) # since all ingestion worker transforms have the same subscription, only deactivate one self.clients.transform_management.activate_transform(transform_ids[0]) return True def deactivate_ingestion_configuration(self, ingestion_configuration_id=''): """Deactivate one of the transform processes that uses an ingestion configuration @param ingestion_configuration_id str @throws NotFound The ingestion configuration id did not exist """ log.debug("Deactivating ingestion configuration") # check whether the ingestion configuration object exists #ingestion_configuration = self.read_ingestion_configuration(ingestion_configuration_id) #@todo Should we check to see if the ingestion configuration exists? # use the deactivate method in transformation management service transform_ids, _ = self.clients.resource_registry.find_objects( ingestion_configuration_id, PRED.hasTransform, RT.Transform, True) if len(transform_ids) < 1: raise NotFound('The ingestion configuration %s does not exist' % str(ingestion_configuration_id)) # since all ingestion worker transforms have the same subscription, only deactivate one self.clients.transform_management.deactivate_transform( transform_ids[0]) return True def create_dataset_configuration(self, dataset_id='', archive_data=True, archive_metadata=True, ingestion_configuration_id=''): """Create a configuration for ingestion of a particular dataset and associate it to a ingestion configuration. @param dataset_id str @param archive_data bool @param archive_metadata bool @param ingestion_configuration_id str @retval dataset_ingestion_configuration_id str """ if not dataset_id: raise IngestionManagementServiceException( 'Must pass a dataset id to create_dataset_configuration') log.debug("Creating dataset configuration") dataset = self.clients.dataset_management.read_dataset( dataset_id=dataset_id) stream_id = dataset.primary_view_key # Read the stream to get the stream definition #stream = self.clients.pubsub_management.read_stream(stream_id=stream_id) # Get the associated stream definition! stream_defs, _ = self.clients.resource_registry.find_objects( stream_id, PRED.hasStreamDefinition) if len(stream_defs) != 1: raise IngestionManagementServiceException( 'The stream is associated with more than one stream definition!' ) stream_def_resource = stream_defs[0] # Get the container object out of the stream def resource and set the stream id field in the local instance stream_def_container = stream_def_resource.container stream_def_container.stream_resource_id = stream_id # Get the ingestion configuration ingestion_configuration = self.clients.resource_registry.read( ingestion_configuration_id) couch_storage = ingestion_configuration.couch_storage log.info( 'Adding stream definition for stream "%s" to ingestion database "%s"' % (stream_id, couch_storage.datastore_name)) db = self.container.datastore_manager.get_datastore( ds_name=couch_storage.datastore_name, config=self.CFG) # put it in couch db! db.create(stream_def_container) db.close() #@todo Add business logic to create the right kind of dataset ingestion configuration config = DatasetIngestionByStream(archive_data=archive_data, archive_metadata=archive_metadata, stream_id=stream_id, dataset_id=dataset_id) dset_ingest_config = DatasetIngestionConfiguration( name='Dataset config %s' % dataset_id, description='configuration for dataset %s' % dataset_id, configuration=config, type=DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM) dset_ingest_config_id, _ = self.clients.resource_registry.create( dset_ingest_config) self.clients.resource_registry.create_association( dset_ingest_config_id, PRED.hasIngestionConfiguration, ingestion_configuration_id) self.clients.resource_registry.create_association( dataset_id, PRED.hasIngestionConfiguration, ingestion_configuration_id) self.event_publisher.publish_event( origin= ingestion_configuration_id, # Use the ingestion configuration ID as the origin! description=dset_ingest_config.description, configuration=config, type=DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM, resource_id=dset_ingest_config_id) return dset_ingest_config_id def update_dataset_config(self, dataset_ingestion_configuration=None): """Update the ingestion configuration for a dataset @param dataset_ingestion_configuration DatasetIngestionConfiguration """ #@todo - make it an exception to change the dataset_id or the stream_id in the dataset config! log.info('dataset configuration to update: %s' % dataset_ingestion_configuration) log.debug("Updating dataset config") dset_ingest_config_id, rev = self.clients.resource_registry.update( dataset_ingestion_configuration) ingest_config_ids, _ = self.clients.resource_registry.find_objects( dset_ingest_config_id, PRED.hasIngestionConfiguration, id_only=True) if len(ingest_config_ids) != 1: raise IngestionManagementServiceException( 'The dataset ingestion configuration is associated with more than one ingestion configuration!' ) ingest_config_id = ingest_config_ids[0] #@todo - what is it okay to update? self.event_publisher.publish_event( origin=ingest_config_id, description=dataset_ingestion_configuration.description, configuration=dataset_ingestion_configuration.configuration, type=DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM, resource_id=dset_ingest_config_id) def read_dataset_config(self, dataset_ingestion_configuration_id=''): """Get an existing dataset configuration. @param dataset_ingestion_configuration_id str @retval dataset_ingestion_configuration DatasetIngestionConfiguration @throws NotFound if ingestion configuration did not exist """ log.debug("Reading dataset configuration") dataset_ingestion_configuration = self.clients.resource_registry.read( dataset_ingestion_configuration_id) return dataset_ingestion_configuration def delete_dataset_config(self, dataset_ingestion_configuration_id=''): """Delete an existing dataset configuration. @param dataset_ingestion_configuration_id str @throws NotFound if ingestion configuration did not exist """ dataset_ingestion_configuration = self.clients.resource_registry.read( dataset_ingestion_configuration_id) log.debug("Deleting dataset configuration") self.clients.resource_registry.delete( dataset_ingestion_configuration_id) ingest_config_ids, association_ids = self.clients.resource_registry.find_objects( dataset_ingestion_configuration_id, PRED.hasIngestionConfiguration, id_only=True) if len(ingest_config_ids) != 1: raise IngestionManagementServiceException( 'The dataset ingestion configuration is associated with more than one ingestion configuration!' ) ingest_config_id = ingest_config_ids[0] self.clients.resource_registry.delete_association( association=association_ids[0]) self.event_publisher.publish_event( origin=ingest_config_id, configuration=dataset_ingestion_configuration.configuration, type=DatasetIngestionTypeEnum.DATASETINGESTIONBYSTREAM, resource_id=dataset_ingestion_configuration_id, deleted=True)
class MockDB_DataStore(DataStore): """ Data store implementation utilizing in-memory dict of dicts to persist documents. """ def __init__(self, datastore_name='prototype'): self.datastore_name = datastore_name log.debug( 'Creating in-memory dict of dicts that will simulate data stores') self.root = {} # serializers self._io_serializer = IonObjectSerializer() self._io_deserializer = IonObjectDeserializer( obj_registry=obj_registry) def create_datastore(self, datastore_name="", create_indexes=True): if not datastore_name: datastore_name = self.datastore_name log.info('Creating data store %s' % datastore_name) if self.datastore_exists(datastore_name): raise BadRequest("Data store with name %s already exists" % datastore_name) if datastore_name not in self.root: self.root[datastore_name] = {} def delete_datastore(self, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name log.info('Deleting data store %s' % datastore_name) if datastore_name in self.root: del self.root[datastore_name] else: log.info('Data store %s does not exist' % datastore_name) def list_datastores(self): log.debug('Listing all data stores') dsList = self.root.keys() log.debug('Data stores: %s' % str(dsList)) return dsList def info_datastore(self, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name log.debug('Listing information about data store %s' % datastore_name) if datastore_name in self.root: info = 'Data store exists' else: raise BadRequest("Data store with name %s does not exist" % datastore_name) log.debug('Data store info: %s' % str(info)) return info def datastore_exists(self, datastore_name=""): return datastore_name in self.root def list_objects(self, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name log.debug('Listing all objects in data store %s' % datastore_name) objs = [] for key, value in self.root[datastore_name].items(): if key.find('_version_counter') == -1 and key.find( '_version_') == -1: objs.append(key) log.debug('Objects: %s' % str(objs)) return objs def list_object_revisions(self, object_id, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name log.debug('Listing all versions of object %s/%s' % (datastore_name, str(object_id))) res = [] for key, value in self.root[datastore_name].items(): if (key.find('_version_counter') == -1 and (key.find(object_id + '_version_') == 0)): res.append(key) log.debug('Versions: %s' % str(res)) return res def create(self, obj, object_id=None, datastore_name=""): if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.create_doc(self._ion_object_to_persistence_dict(obj), object_id=object_id, datastore_name=datastore_name) def create_doc(self, doc, object_id=None, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name if '_id' in doc: raise BadRequest("Doc must not have '_id'") if '_rev' in doc: raise BadRequest("Doc must not have '_rev'") try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') if object_id: if object_id in datastore_dict: raise BadRequest("Object with id %s already exist" % object_id) # Assign an id to doc doc["_id"] = object_id or uuid4().hex object_id = doc["_id"] log.debug('Creating new object %s/%s' % (datastore_name, object_id)) # Create key for version counter entry. Will be used # on update to increment version number easily. version_counter_key = '__' + object_id + '_version_counter' version_counter = 1 # Assign initial version to doc doc["_rev"] = str(version_counter) # Write HEAD, version and version counter dicts datastore_dict[object_id] = doc datastore_dict[version_counter_key] = version_counter datastore_dict[object_id + '_version_' + str(version_counter)] = doc # Return list that identifies the id of the new doc and its version res = [object_id, str(version_counter)] log.debug('Create result: %s' % str(res)) return res def create_mult(self, objects, object_ids=None): if any([not isinstance(obj, IonObjectBase) for obj in objects]): raise BadRequest("Obj param is not instance of IonObjectBase") return self.create_doc_mult( [self._ion_object_to_persistence_dict(obj) for obj in objects], object_ids) def create_doc_mult(self, docs, object_ids=None): if any(["_id" in doc for doc in docs]): raise BadRequest("Docs must not have '_id'") if any(["_rev" in doc for doc in docs]): raise BadRequest("Docs must not have '_rev'") if object_ids and len(object_ids) != len(docs): raise BadRequest("Invalid object_ids") # Assign an id to doc (recommended in CouchDB documentation) object_ids = object_ids or [uuid4().hex for i in xrange(len(docs))] res = [] for doc, oid in zip(docs, object_ids): oid, rev = self.create_doc(doc, oid) res.append((True, oid, rev)) return res def read(self, object_id, rev_id="", datastore_name=""): if not isinstance(object_id, str): raise BadRequest("Object id param is not string") doc = self.read_doc(object_id, rev_id, datastore_name) # Convert doc into Ion object obj = self._persistence_dict_to_ion_object(doc) log.debug('Ion object: %s' % str(obj)) return obj def read_doc(self, object_id, rev_id="", datastore_name=""): if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') try: key = object_id if rev_id != None and rev_id != "": log.debug('Reading version %s of object %s/%s' % (str(rev_id), datastore_name, str(object_id))) key += '_version_' + str(rev_id) else: log.debug('Reading head version of object %s/%s' % (datastore_name, str(object_id))) doc = datastore_dict[key] except KeyError: raise NotFound('Object with id %s does not exist.' % str(object_id)) log.debug('Read result: %s' % str(doc)) return doc def read_mult(self, object_ids, datastore_name=""): if any([not isinstance(object_id, str) for object_id in object_ids]): raise BadRequest("Object id param is not string") docs = self.read_doc_mult(object_ids, datastore_name) # Convert docs into Ion objects obj_list = [self._persistence_dict_to_ion_object(doc) for doc in docs] return obj_list def read_doc_mult(self, object_ids, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') doc_list = [] try: for object_id in object_ids: log.debug('Reading head version of object %s/%s' % (datastore_name, str(object_id))) doc = datastore_dict[object_id] doc_list.append(doc.copy()) except KeyError: raise NotFound('Object with id %s does not exist.' % str(object_id)) return doc_list def update(self, obj, datastore_name=""): if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.update_doc(self._ion_object_to_persistence_dict(obj)) def update_doc(self, doc, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name if '_id' not in doc: raise BadRequest("Doc must have '_id'") if '_rev' not in doc: raise BadRequest("Doc must have '_rev'") try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') try: object_id = doc["_id"] # Find the next doc version version_counter_key = '__' + object_id + '_version_counter' baseVersion = doc["_rev"] version_counter = datastore_dict[version_counter_key] + 1 if baseVersion != str(version_counter - 1): raise Conflict('Object not based on most current version') except KeyError: raise BadRequest("Object missing required _id and/or _rev values") log.debug('Saving new version of object %s/%s' % (datastore_name, doc["_id"])) doc["_rev"] = str(version_counter) # Overwrite HEAD and version counter dicts, add new version dict datastore_dict[object_id] = doc datastore_dict[version_counter_key] = version_counter datastore_dict[object_id + '_version_' + str(version_counter)] = doc res = [object_id, str(version_counter)] log.debug('Update result: %s' % str(res)) return res def delete(self, obj, datastore_name=""): if not isinstance(obj, IonObjectBase) and not isinstance(obj, str): raise BadRequest( "Obj param is not instance of IonObjectBase or string id") if type(obj) is str: return self.delete_doc(obj, datastore_name=datastore_name) return self.delete_doc(self._ion_object_to_persistence_dict(obj), datastore_name=datastore_name) def delete_doc(self, doc, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') if type(doc) is str: object_id = doc else: object_id = doc["_id"] log.info('Deleting object %s/%s' % (datastore_name, object_id)) if object_id in datastore_dict.keys(): if self._is_in_association(object_id, datastore_name): obj = self.read(object_id, "", datastore_name) log.warn( "XXXXXXX Attempt to delete object %s that still has associations" % str(obj)) # raise BadRequest("Object cannot be deleted until associations are broken") # Find all version dicts and delete them for key in datastore_dict.keys(): if key.find(object_id + '_version_') == 0: del datastore_dict[key] # Delete the HEAD dict del datastore_dict[object_id] # Delete the version counter dict del datastore_dict['__' + object_id + '_version_counter'] else: raise NotFound('Object with id ' + object_id + ' does not exist.') log.info('Delete result: True') def _is_in_association(self, obj_id, datastore_name=""): log.debug("_is_in_association(%s)" % obj_id) if not obj_id: raise BadRequest("Must provide object id") if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') for objname, obj in datastore_dict.iteritems(): if (objname.find('_version_') > 0) or (not type(obj) is dict): continue if 'type_' in obj and obj['type_'] == "Association": association = obj if association["s"] == obj_id or association["o"] == obj_id: log.debug("association found(%s)" % association) return True return False def find_objects(self, subject, predicate=None, object_type=None, id_only=False): log.debug( "find_objects(subject=%s, predicate=%s, object_type=%s, id_only=%s" % (subject, predicate, object_type, id_only)) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if not subject: raise BadRequest("Must provide subject") try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + self.datastore_name + ' does not exist.') if type(subject) is str: subject_id = subject else: if "_id" not in subject: raise BadRequest("Object id not available in subject") else: subject_id = subject._id assoc_list = [] target_id_list = [] target_list = [] for objname, obj in datastore_dict.iteritems(): if (objname.find('_version_') > 0) or (not type(obj) is dict): continue if 'type_' in obj and obj['type_'] == "Association": if obj['s'] == subject_id: if predicate and obj['p'] == predicate: if (object_type and obj['ot'] == object_type) or not object_type: assoc_list.append(obj) target_id_list.append(obj['o']) target_list.append(self.read(obj['o'])) elif not predicate: assoc_list.append(obj) target_id_list.append(obj['o']) target_list.append(self.read(obj['o'])) log.debug("find_objects() found %s objects" % (len(target_list))) if id_only: return (target_id_list, assoc_list) else: return (target_list, assoc_list) def find_subjects(self, subject_type=None, predicate=None, obj=None, id_only=False): log.debug( "find_subjects(subject_type=%s, predicate=%s, object=%s, id_only=%s" % (subject_type, predicate, obj, id_only)) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if not obj: raise BadRequest("Must provide object") try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + self.datastore_name + ' does not exist.') if type(obj) is str: object_id = obj else: if "_id" not in obj: raise BadRequest("Object id not available in object") else: object_id = obj._id assoc_list = [] target_id_list = [] target_list = [] for objname, obj in datastore_dict.iteritems(): if (objname.find('_version_') > 0) or (not type(obj) is dict): continue if 'type_' in obj and obj['type_'] == "Association": if obj['o'] == object_id: if predicate and obj['p'] == predicate: if (subject_type and obj['st'] == subject_type) or not subject_type: assoc_list.append(obj) target_id_list.append(obj['s']) target_list.append(self.read(obj['s'])) elif not predicate: assoc_list.append(obj) target_id_list.append(obj['s']) target_list.append(self.read(obj['s'])) log.debug("find_subjects() found %s subjects" % (len(target_list))) if id_only: return (target_id_list, assoc_list) else: return (target_list, assoc_list) def find_associations(self, subject=None, predicate=None, obj=None, assoc_type=None, id_only=True): log.debug( "find_associations(subject=%s, predicate=%s, object=%s, assoc_type=%s)" % (subject, predicate, obj, assoc_type)) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) if subject and obj or predicate: pass else: raise BadRequest("Illegal parameters") try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + self.datastore_name + ' does not exist.') if subject and obj: if type(subject) is str: subject_id = subject else: if "_id" not in subject: raise BadRequest("Object id not available in subject") else: subject_id = subject._id if type(obj) is str: object_id = obj else: if "_id" not in obj: raise BadRequest("Object id not available in object") else: object_id = obj._id target_list = [] for objname, obj in datastore_dict.iteritems(): if (objname.find('_version_') > 0) or (not type(obj) is dict): continue if 'type_' in obj and obj['type_'] == "Association": if obj['s'] == subject_id and obj['o'] == object_id: if assoc_type: if obj['at'] == assoc_type: target_list.append(obj) else: target_list.append(obj) else: target_list = [] for objname, obj in datastore_dict.iteritems(): if (objname.find('_version_') > 0) or (not type(obj) is dict): continue if 'type_' in obj and obj['type_'] == "Association": if obj['p'] == predicate: target_list.append(obj) if id_only: assocs = [row['_id'] for row in target_list] else: assocs = [ self._persistence_dict_to_ion_object(row) for row in target_list ] log.debug("find_associations() found %s associations" % (len(assocs))) return assocs def find_res_by_type(self, restype, lcstate=None, id_only=False): log.debug("find_res_by_type(restype=%s, lcstate=%s)" % (restype, lcstate)) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + self.datastore_name + ' does not exist.') assoc_list = [] target_id_list = [] target_list = [] for objname, obj in datastore_dict.iteritems(): if (objname.find('_version_') > 0) or (not type(obj) is dict): continue if 'type_' in obj and (obj['type_'] == restype or (not restype and obj['type_'] != "Association")): if (lcstate and 'lcstate' in obj and obj['lcstate'] == lcstate) or not lcstate or not restype: target_id_list.append(obj['_id']) target_list.append( self._persistence_dict_to_ion_object(obj)) assoc_list.append([]) log.debug("find_res_by_type() found %s resources" % (len(target_list))) if id_only: return (target_id_list, assoc_list) else: return (target_list, assoc_list) def find_res_by_lcstate(self, lcstate, restype=None, id_only=False): log.debug("find_res_by_type(lcstate=%s, restype=%s)" % (lcstate, restype)) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + self.datastore_name + ' does not exist.') if lcstate in CommonResourceLifeCycleSM.STATE_ALIASES: lcstate_match = CommonResourceLifeCycleSM.STATE_ALIASES[lcstate] else: lcstate_match = [lcstate] assoc_list = [] target_id_list = [] target_list = [] for objname, obj in datastore_dict.iteritems(): if (objname.find('_version_') > 0) or (not type(obj) is dict): continue if 'lcstate' in obj and obj['lcstate'] in lcstate_match: if (restype and obj['type_'] == restype) or not restype: target_id_list.append(obj['_id']) target_list.append( self._persistence_dict_to_ion_object(obj)) assoc_list.append([]) log.debug("find_res_by_lcstate() found %s resources" % (len(target_list))) if id_only: return (target_id_list, assoc_list) else: return (target_list, assoc_list) def _pass(self): pass def find_res_by_name(self, name, restype=None, id_only=False): log.debug("find_res_by_name(name=%s, restype=%s)" % (name, restype)) if type(id_only) is not bool: raise BadRequest('id_only must be type bool, not %s' % type(id_only)) try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + self.datastore_name + ' does not exist.') assoc_list = [] target_id_list = [] target_list = [] for objname, obj in datastore_dict.iteritems(): if (objname.find('_version_') > 0) or (not type(obj) is dict): continue if 'name' in obj and obj['name'] == name: if (restype and obj['type_'] == restype) or not restype: target_id_list.append(obj['_id']) target_list.append( self._persistence_dict_to_ion_object(obj)) assoc_list.append([]) log.debug("find_res_by_name() found %s resources" % (len(target_list))) if id_only: return (target_id_list, assoc_list) else: return (target_list, assoc_list) def find_dir_entries(self, qname): raise NotImplementedError() def _ion_object_to_persistence_dict(self, ion_object): if ion_object is None: return None obj_dict = self._io_serializer.serialize(ion_object) return obj_dict def _persistence_dict_to_ion_object(self, obj_dict): if obj_dict is None: return None ion_object = self._io_deserializer.deserialize(obj_dict) return ion_object
class FileDataStore(object): def __init__(self, container, datastore_name=""): self.container = container self.datastore_name = datastore_name # Object serialization/deserialization self._io_serializer = IonObjectSerializer() self._io_deserializer = IonObjectDeserializer(obj_registry=get_obj_registry()) def start(self): if self.container.has_capability(self.container.CCAP.FILE_SYSTEM): self.datastore_dir = FileSystem.get_url(FS.FILESTORE, self.datastore_name) else: self.datastore_dir = "./tmp/%s" % self.datastore_name def stop(self): pass def _get_filename(self, object_id): return "%s/%s" % (self.datastore_dir, object_id) def create(self, obj, object_id=None, attachments=None, datastore_name=""): """ Converts ion objects to python dictionary before persisting them using the optional suggested identifier and creates attachments to the object. Returns an identifier and revision number of the object """ if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.create_doc(self._ion_object_to_persistence_dict(obj), object_id=object_id, datastore_name=datastore_name, attachments=attachments) def create_doc(self, doc, object_id=None, attachments=None, datastore_name=""): """ Persists the document using the optionally suggested doc_id, and creates attachments to it. Returns the identifier and version number of the document """ if '_id' in doc: raise BadRequest("Doc must not have '_id'") # Assign an id to doc (recommended in CouchDB documentation) doc["_id"] = object_id or uuid4().hex log.debug('Creating new object %s/%s' % (datastore_name, doc["_id"])) log.debug('create doc contents: %s', doc) filename = self._get_filename(doc["_id"]) doc_json = json.dumps(doc) with open(filename, "w") as f: f.write(doc_json) return doc["_id"], 1 def update(self, obj, datastore_name=""): if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.update_doc(self._ion_object_to_persistence_dict(obj)) def update_doc(self, doc, datastore_name=""): if '_id' not in doc: raise BadRequest("Doc must have '_id'") log.debug('update doc contents: %s', doc) filename = self._get_filename(doc["_id"]) doc_json = json.dumps(doc) with open(filename, "w") as f: f.write(doc_json) return doc["_id"], 2 def read(self, object_id, rev_id="", datastore_name=""): if not isinstance(object_id, str): raise BadRequest("Object id param is not string") doc = self.read_doc(object_id, rev_id, datastore_name) # Convert doc into Ion object obj = self._persistence_dict_to_ion_object(doc) log.debug('Ion object: %s', str(obj)) return obj def read_doc(self, doc_id, rev_id="", datastore_name=""): log.debug('Reading head version of object %s/%s', datastore_name, doc_id) filename = self._get_filename(doc_id) doc = None with open(filename, "r") as f: doc_json = f.read() doc = json.loads(doc_json) if doc is None: raise NotFound('Object with id %s does not exist.' % str(doc_id)) log.debug('read doc contents: %s', doc) return doc def delete(self, obj, datastore_name="", del_associations=False): if not isinstance(obj, IonObjectBase) and not isinstance(obj, str): raise BadRequest("Obj param is not instance of IonObjectBase or string id") if type(obj) is str: self.delete_doc(obj, datastore_name=datastore_name, del_associations=del_associations) else: if '_id' not in obj: raise BadRequest("Doc must have '_id'") self.delete_doc(self._ion_object_to_persistence_dict(obj), datastore_name=datastore_name, del_associations=del_associations) def delete_doc(self, doc, datastore_name="", del_associations=False): doc_id = doc if type(doc) is str else doc["_id"] log.debug('Deleting object %s/%s', datastore_name, doc_id) filename = self._get_filename(doc_id) try: os.remove(filename) except OSError: raise NotFound('Object with id %s does not exist.' % doc_id) def _ion_object_to_persistence_dict(self, ion_object): if ion_object is None: return None obj_dict = self._io_serializer.serialize(ion_object) return obj_dict def _persistence_dict_to_ion_object(self, obj_dict): if obj_dict is None: return None ion_object = self._io_deserializer.deserialize(obj_dict) return ion_object
class MockDB_DataStore(DataStore): """ Data store implementation utilizing in-memory dict of dicts to persist documents. """ def __init__(self, datastore_name='prototype'): self.datastore_name = datastore_name log.debug('Creating in-memory dict of dicts that will simulate data stores') self.root = {} # serializers self._io_serializer = IonObjectSerializer() self._io_deserializer = IonObjectDeserializer(obj_registry=obj_registry) def create_datastore(self, datastore_name="", create_indexes=True): if not datastore_name: datastore_name = self.datastore_name log.info('Creating data store %s' % datastore_name) if self.datastore_exists(datastore_name): raise BadRequest("Data store with name %s already exists" % datastore_name) if datastore_name not in self.root: self.root[datastore_name] = {} def delete_datastore(self, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name log.info('Deleting data store %s' % datastore_name) if datastore_name in self.root: del self.root[datastore_name] else: log.info('Data store %s does not exist' % datastore_name) def list_datastores(self): log.debug('Listing all data stores') dsList = self.root.keys() log.debug('Data stores: %s' % str(dsList)) return dsList def info_datastore(self, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name log.debug('Listing information about data store %s' % datastore_name) if datastore_name in self.root: info = 'Data store exists' else: raise BadRequest("Data store with name %s does not exist" % datastore_name) log.debug('Data store info: %s' % str(info)) return info def datastore_exists(self, datastore_name=""): return datastore_name in self.root def list_objects(self, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name log.debug('Listing all objects in data store %s' % datastore_name) objs = [] for key, value in self.root[datastore_name].items(): if key.find('_version_counter') == -1 and key.find('_version_') == -1: objs.append(key) log.debug('Objects: %s' % str(objs)) return objs def list_object_revisions(self, object_id, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name log.debug('Listing all versions of object %s/%s' % (datastore_name, str(object_id))) res = [] for key, value in self.root[datastore_name].items(): if (key.find('_version_counter') == -1 and (key.find(object_id + '_version_') == 0)): res.append(key) log.debug('Versions: %s' % str(res)) return res def create(self, obj, object_id=None, datastore_name=""): if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.create_doc(self._ion_object_to_persistence_dict(obj), object_id=object_id, datastore_name=datastore_name) def create_doc(self, doc, object_id=None, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name if '_id' in doc: raise BadRequest("Doc must not have '_id'") if '_rev' in doc: raise BadRequest("Doc must not have '_rev'") try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') if object_id: if object_id in datastore_dict: raise BadRequest("Object with id %s already exist" % object_id) # Assign an id to doc doc["_id"] = object_id or uuid4().hex object_id = doc["_id"] log.debug('Creating new object %s/%s' % (datastore_name, object_id)) # Create key for version counter entry. Will be used # on update to increment version number easily. versionCounterKey = '__' + object_id + '_version_counter' versionCounter = 1 # Assign initial version to doc doc["_rev"] = str(versionCounter) # Write HEAD, version and version counter dicts datastore_dict[object_id] = doc datastore_dict[versionCounterKey] = versionCounter datastore_dict[object_id + '_version_' + str(versionCounter)] = doc # Return list that identifies the id of the new doc and its version res = [object_id, str(versionCounter)] log.debug('Create result: %s' % str(res)) return res def create_mult(self, objects, object_ids=None): if any([not isinstance(obj, IonObjectBase) for obj in objects]): raise BadRequest("Obj param is not instance of IonObjectBase") return self.create_doc_mult([self._ion_object_to_persistence_dict(obj) for obj in objects], object_ids) def create_doc_mult(self, docs, object_ids=None): if any(["_id" in doc for doc in docs]): raise BadRequest("Docs must not have '_id'") if any(["_rev" in doc for doc in docs]): raise BadRequest("Docs must not have '_rev'") if object_ids and len(object_ids) != len(docs): raise BadRequest("Invalid object_ids") # Assign an id to doc (recommended in CouchDB documentation) object_ids = object_ids or [uuid4().hex for i in xrange(len(docs))] res = [] for doc, oid in zip(docs, object_ids): oid,rev = self.create_doc(doc, oid) res.append((True,oid,rev)) return res def read(self, object_id, rev_id="", datastore_name=""): if not isinstance(object_id, str): raise BadRequest("Object id param is not string") doc = self.read_doc(object_id, rev_id, datastore_name) # Convert doc into Ion object obj = self._persistence_dict_to_ion_object(doc) log.debug('Ion object: %s' % str(obj)) return obj def read_doc(self, object_id, rev_id="", datastore_name=""): if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') try: key = object_id if rev_id == "": log.debug('Reading head version of object %s/%s' % (datastore_name, str(object_id))) else: log.debug('Reading version %s of object %s/%s' % (str(rev_id), datastore_name, str(object_id))) key += '_version_' + str(rev_id) doc = datastore_dict[key] except KeyError: raise NotFound('Object with id %s does not exist.' % str(object_id)) log.debug('Read result: %s' % str(doc)) return doc def read_mult(self, object_ids, datastore_name=""): if any([not isinstance(object_id, str) for object_id in object_ids]): raise BadRequest("Object id param is not string") docs = self.read_doc_mult(object_ids, datastore_name) # Convert docs into Ion objects obj_list = [self._persistence_dict_to_ion_object(doc) for doc in docs] return obj_list def read_doc_mult(self, object_ids, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') doc_list = [] try: for object_id in object_ids: log.debug('Reading head version of object %s/%s' % (datastore_name, str(object_id))) doc = datastore_dict[object_id] doc_list.append(doc.copy()) except KeyError: raise NotFound('Object with id %s does not exist.' % str(object_id)) return doc_list def update(self, obj, datastore_name=""): if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.update_doc(self._ion_object_to_persistence_dict(obj)) def update_doc(self, doc, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name if '_id' not in doc: raise BadRequest("Doc must have '_id'") if '_rev' not in doc: raise BadRequest("Doc must have '_rev'") try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') try: object_id = doc["_id"] # Find the next doc version versionCounterKey = '__' + object_id + '_version_counter' baseVersion = doc["_rev"] versionCounter = datastore_dict[versionCounterKey] + 1 if baseVersion != str(versionCounter - 1): raise Conflict('Object not based on most current version') except KeyError: raise BadRequest("Object missing required _id and/or _rev values") log.debug('Saving new version of object %s/%s' % (datastore_name, doc["_id"])) doc["_rev"] = str(versionCounter) # Overwrite HEAD and version counter dicts, add new version dict datastore_dict[object_id] = doc datastore_dict[versionCounterKey] = versionCounter datastore_dict[object_id + '_version_' + str(versionCounter)] = doc res = [object_id, str(versionCounter)] log.debug('Update result: %s' % str(res)) return res def delete(self, obj, datastore_name=""): if not isinstance(obj, IonObjectBase) and not isinstance(obj, str): raise BadRequest("Obj param is not instance of IonObjectBase or string id") if type(obj) is str: return self.delete_doc(obj, datastore_name=datastore_name) return self.delete_doc(self._ion_object_to_persistence_dict(obj), datastore_name=datastore_name) def delete_doc(self, doc, datastore_name=""): if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') if type(doc) is str: object_id = doc else: object_id = doc["_id"] log.info('Deleting object %s/%s' % (datastore_name, object_id)) if object_id in datastore_dict.keys(): if self._is_in_association(object_id, datastore_name): obj = self.read(object_id, datastore_name) log.warn("XXXXXXX Attempt to delete object %s that still has associations" % str(obj)) # raise BadRequest("Object cannot be deleted until associations are broken") # Find all version dicts and delete them for key in datastore_dict.keys(): if key.find(object_id + '_version_') == 0: del datastore_dict[key] # Delete the HEAD dict del datastore_dict[object_id] # Delete the version counter dict del datastore_dict['__' + object_id + '_version_counter'] else: raise NotFound('Object ' + object_id + ' does not exist.') log.info('Delete result: True') def find(self, criteria=[], datastore_name=""): docList = self.find_doc(criteria, datastore_name) results = [] # Convert each returned doc to its associated Ion object for doc in docList: obj = self._persistence_dict_to_ion_object(doc) log.debug('Ion object: %s' % str(obj)) results.append(obj) return results def find_doc(self, criteria=[], datastore_name=""): if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') results = [] log_string = "Searching for objects matching criteria list: " + str(criteria) log.debug(log_string) # Traverse entire data store, checking each HEAD version for equality # with specified criterion for obj_id in self.list_objects(datastore_name): try: doc = self.read_doc(obj_id, rev_id="", datastore_name=datastore_name) log.debug("Doc: %s" % str(doc)) if len(criteria) == 0: results.append(doc) else: criteria_satisfied = False for criterion in criteria: if isinstance(criterion, list): if len(criterion) != 3: raise BadRequest("Insufficient criterion values specified. Much match [<field>, <logical constant>, <value>]") for item in criterion: if not isinstance(item, str): raise BadRequest("All criterion values must be strings") key = criterion[0] logical_operation = criterion[1] value = criterion[2] if key in doc: if logical_operation == DataStore.EQUAL: if doc[key] == value: criteria_satisfied = True else: criteria_satisfied = False elif logical_operation == DataStore.NOT_EQUAL: if doc[key] != value: criteria_satisfied = True else: criteria_satisfied = False elif logical_operation == DataStore.GREATER_THAN: if doc[key] > value: criteria_satisfied = True else: criteria_satisfied = False elif logical_operation == DataStore.GREATER_THAN_OR_EQUAL: if doc[key] >= value: criteria_satisfied = True else: criteria_satisfied = False elif logical_operation == DataStore.LESS_THAN: if doc[key] < value: criteria_satisfied = True else: criteria_satisfied = False elif logical_operation == DataStore.LESS_THAN_OR_EQUAL: if doc[key] <= value: criteria_satisfied = True else: criteria_satisfied = False else: if criterion == DataStore.AND: # Can shortcut the query at this point if the # previous criterion failed if criteria_satisfied == False: break if criteria_satisfied: results.append(doc) except KeyError: pass log.debug('Find results: %s' % str(results)) if len(results) == 0: raise NotFound('No objects matched criteria %s' % criteria) return results def find_by_idref(self, criteria=[], association="", datastore_name=""): doc_list = self.find_by_idref_doc(criteria, association, datastore_name) results = [] # Convert each returned doc to its associated Ion object for doc in doc_list: obj = self._persistence_dict_to_ion_object(doc) log.debug('Ion object: %s' % str(obj)) results.append(obj) return results def find_by_idref_doc(self, criteria=[], association="", datastore_name=""): if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') ids = [] log_string = "Searching for objects matching criteria list: " + str(criteria) log.debug(log_string) # Traverse entire data store, checking each HEAD version for equality # with specified criterion for obj_id in self.list_objects(datastore_name): try: doc = self.read_doc(obj_id, rev_id="", datastore_name=datastore_name) log.debug("Doc: %s" % str(doc)) if len(criteria) == 0: if association in doc: for id in doc[association]: ids.append(id) else: criteria_satisfied = False for criterion in criteria: if isinstance(criterion, list): key = criterion[0] logical_operation = criterion[1] value = criterion[2] if key in doc: if logical_operation == DataStore.EQUAL: if doc[key] == value: criteria_satisfied = True else: criteria_satisfied = False elif logical_operation == DataStore.NOT_EQUAL: if doc[key] != value: criteria_satisfied = True else: criteria_satisfied = False elif logical_operation == DataStore.GREATER_THAN: if doc[key] > value: criteria_satisfied = True else: criteria_satisfied = False elif logical_operation == DataStore.GREATER_THAN_OR_EQUAL: if doc[key] >= value: criteria_satisfied = True else: criteria_satisfied = False elif logical_operation == DataStore.LESS_THAN: if doc[key] < value: criteria_satisfied = True else: criteria_satisfied = False elif logical_operation == DataStore.LESS_THAN_OR_EQUAL: if doc[key] <= value: criteria_satisfied = True else: criteria_satisfied = False else: if criterion == DataStore.AND: # Can shortcut the query at this point if the # previous criterion failed if criteria_satisfied == False: break if criteria_satisfied: if association in doc: for id in doc[association]: ids.append(id) except KeyError: pass results = [] for id in ids: doc = self.read_doc(id, "", datastore_name) results.append(doc) log.debug('Find results: %s' % str(results)) if len(results) == 0: raise NotFound('No objects matched criteria %s' % criteria) return results def resolve_idref(self, subject="", predicate="", obj="", datastore_name=""): res_list = self.resolve_idref_doc(subject, predicate, obj, datastore_name) results = [] # Convert each returned doc to its associated Ion object for item in res_list: subject_dict = item[0] object_dict = item[2] subject = self._persistence_dict_to_ion_object(subject_dict) log.debug('Subject Ion object: %s' % str(subject)) obj = self._persistence_dict_to_ion_object(object_dict) log.debug('Object Ion object: %s' % str(obj)) results.append([subject, item[1], obj]) return results def resolve_idref_doc(self, subject="", predicate="", obj="", datastore_name=""): if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') if subject == "": if predicate == "": if obj == "": # throw exception raise BadRequest("Data store query does not specify subject, predicate or object") else: # Find all subjects with any association to object object_doc = self.read_doc(obj, "", datastore_name) res = [] all_doc_ids = self.list_objects(datastore_name) for subject_doc_id in all_doc_ids: if subject_doc_id == obj: continue subject_doc = self.read_doc(subject_doc_id, "", datastore_name) for key in subject_doc: if isinstance(subject_doc[key], list): if obj in subject_doc[key]: res.append([subject_doc, key, object_doc]) else: if obj == subject_doc[key]: res.append([subject_doc, key, object_doc]) if len(res) == 0: raise NotFound("Data store query for association %s/%s/%s failed" % (subject, predicate, obj)) else: return res else: # Find all subjects with association to object object_doc = self.read_doc(obj, "", datastore_name) res = [] all_doc_ids = self.list_objects(datastore_name) for subject_doc_id in all_doc_ids: if subject_doc_id == obj: continue subject_doc = self.read_doc(subject_doc_id, "", datastore_name) if predicate in subject_doc: if obj in subject_doc[predicate]: res.append([subject_doc, predicate, object_doc]) if len(res) == 0: raise NotFound("Data store query for association %s/%s/%s failed" % (subject, predicate, obj)) else: return res else: if predicate == "": if obj == "": # Find all objects with any association to subject # TODO would need some way to indicate a key is an association predicate pass else: # Find all associations between subject and object subject_doc = self.read_doc(subject, "", datastore_name) object_doc = self.read_doc(obj, "", datastore_name) res = [] for key in subject_doc: if isinstance(subject_doc[key], list): if obj in subject_doc[key]: res.append([subject_doc, key, object_doc]) else: if obj == subject_doc[key]: res.append([subject_doc, key, object_doc]) if len(res) == 0: raise NotFound("Data store query for association %s/%s/%s failed" % (subject, predicate, obj)) else: return res else: if obj == "": # Find all associated objects subject_doc = self.read_doc(subject, "", datastore_name) res = [] if predicate in subject_doc: for id in subject_doc[predicate]: object_doc = self.read_doc(id, "", datastore_name) res.append([subject_doc, predicate, object_doc]) return res raise NotFound("Data store query for association %s/%s/%s failed" % (subject, predicate, obj)) else: # Determine if association exists subject_doc = self.read_doc(subject, "", datastore_name) object_doc = self.read_doc(obj, "", datastore_name) if predicate in subject_doc: if obj in subject_doc[predicate]: return [[subject_doc, predicate, object_doc]] raise NotFound("Data store query for association %s/%s/%s failed" % (subject, predicate, obj)) def _is_in_association(self, obj_id, datastore_name=""): log.debug("_is_in_association(%s)" % obj_id) if not obj_id: raise BadRequest("Must provide object id") if not datastore_name: datastore_name = self.datastore_name try: datastore_dict = self.root[datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'type_' in obj and obj['type_'] == "Association": association = obj if association["s"] == obj_id or association["o"] == obj_id: log.debug("association found(%s)" % association) return True return False def find_objects(self, subject, predicate=None, object_type=None, id_only=False): log.debug("find_objects(subject=%s, predicate=%s, object_type=%s, id_only=%s" % (subject, predicate, object_type, id_only)) if not subject: raise BadRequest("Must provide subject") try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') if type(subject) is str: subject_id = subject else: if "_id" not in subject: raise BadRequest("Object id not available in subject") else: subject_id = subject._id assoc_list = [] target_id_list = [] target_list = [] for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'type_' in obj and obj['type_'] == "Association": if obj['s'] == subject_id: if predicate and obj['p'] == predicate: if (object_type and obj['ot'] == object_type) or not object_type: assoc_list.append(obj) target_id_list.append(obj['o']) target_list.append(self.read(obj['o'])) elif not predicate: assoc_list.append(obj) target_id_list.append(obj['o']) target_list.append(self.read(obj['o'])) log.debug("find_objects() found %s objects" % (len(target_list))) if id_only: return (target_id_list, assoc_list) else: return (target_list, assoc_list) def find_subjects(self, subject_type=None, predicate=None, obj=None, id_only=False): log.debug("find_subjects(subject_type=%s, predicate=%s, object=%s, id_only=%s" % (subject_type, predicate, obj, id_only)) if not obj: raise BadRequest("Must provide object") try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') if type(obj) is str: object_id = obj else: if "_id" not in obj: raise BadRequest("Object id not available in object") else: object_id = obj._id assoc_list = [] target_id_list = [] target_list = [] for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'type_' in obj and obj['type_'] == "Association": if obj['o'] == object_id: if predicate and obj['p'] == predicate: if (subject_type and obj['st'] == subject_type) or not subject_type: assoc_list.append(obj) target_id_list.append(obj['s']) target_list.append(self.read(obj['s'])) elif not predicate: assoc_list.append(obj) target_id_list.append(obj['s']) target_list.append(self.read(obj['s'])) log.debug("find_subjects() found %s subjects" % (len(target_list))) if id_only: return (target_id_list, assoc_list) else: return (target_list, assoc_list) def find_associations(self, subject=None, predicate=None, obj=None, id_only=True): log.debug("find_associations(subject=%s, predicate=%s, object=%s)" % (subject, predicate, obj)) if subject and obj or predicate: pass else: raise BadRequest("Illegal parameters") try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') if subject and obj: if type(subject) is str: subject_id = subject else: if "_id" not in subject: raise BadRequest("Object id not available") else: subject_id = subject._id if type(obj) is str: object_id = obj else: if "_id" not in obj: raise BadRequest("Object id not available in object") else: object_id = obj._id target_list = [] for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'type_' in obj and obj['type_'] == "Association": if obj['s'] == subject_id and obj['o'] == object_id: target_list.append(obj) else: target_list = [] for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'type_' in obj and obj['type_'] == "Association": if obj['p'] == predicate: target_list.append(obj) if id_only: assocs = [row['_id'] for row in target_list] else: assocs = [self._persistence_dict_to_ion_object(row) for row in target_list] log.debug("find_associations() found %s associations" % (len(assocs))) return assocs def find_res_by_type(self, restype, lcstate=None, id_only=False): log.debug("find_res_by_type(restype=%s, lcstate=%s)" % (restype, lcstate)) try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') assoc_list = [] target_id_list = [] target_list = [] for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'type_' in obj and (obj['type_'] == restype or (not restype and obj['type_'] != "Association")): if (lcstate and 'lcstate' in obj and obj['lcstate'] == lcstate) or not lcstate or not restype: target_id_list.append(obj['_id']) target_list.append(self._persistence_dict_to_ion_object(obj)) assoc_list.append([]) log.debug("find_res_by_type() found %s resources" % (len(target_list))) if id_only: return (target_id_list, assoc_list) else: return (target_list, assoc_list) def find_res_by_lcstate(self, lcstate, restype=None, id_only=False): log.debug("find_res_by_type(lcstate=%s, restype=%s)" % (lcstate, restype)) try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') if lcstate in ResourceLifeCycleSM.STATE_ALIASES: lcstate_match = ResourceLifeCycleSM.STATE_ALIASES[lcstate] else: lcstate_match = [lcstate] assoc_list = [] target_id_list = [] target_list = [] for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'lcstate' in obj and obj['lcstate'] in lcstate_match: if (restype and obj['type_'] == restype) or not restype: target_id_list.append(obj['_id']) target_list.append(self._persistence_dict_to_ion_object(obj)) assoc_list.append([]) log.debug("find_res_by_lcstate() found %s resources" % (len(target_list))) if id_only: return (target_id_list, assoc_list) else: return (target_list, assoc_list) def _pass(self): pass def find_res_by_name(self, name, restype=None, id_only=False): log.debug("find_res_by_name(name=%s, restype=%s)" % (name, restype)) try: datastore_dict = self.root[self.datastore_name] except KeyError: raise BadRequest('Data store ' + datastore_name + ' does not exist.') assoc_list = [] target_id_list = [] target_list = [] for objname,obj in datastore_dict.iteritems(): if (objname.find('_version_')>0) or (not type(obj) is dict): continue if 'name' in obj and obj['name'] == name: if (restype and obj['type_'] == restype) or not restype: target_id_list.append(obj['_id']) target_list.append(self._persistence_dict_to_ion_object(obj)) assoc_list.append([]) log.debug("find_res_by_name() found %s resources" % (len(target_list))) if id_only: return (target_id_list, assoc_list) else: return (target_list, assoc_list) def find_dir_entries(self, qname): raise NotImplementedError() def _ion_object_to_persistence_dict(self, ion_object): if ion_object is None: return None obj_dict = self._io_serializer.serialize(ion_object) return obj_dict def _persistence_dict_to_ion_object(self, obj_dict): if obj_dict is None: return None ion_object = self._io_deserializer.deserialize(obj_dict) return ion_object
class TransformManagementService(BaseTransformManagementService): """Provides the main orchestration for stream processing subscription, data process definition and computation request (scheduling). The transformation service handles content format transformation, mediation, qualification, verification and validation """ def __init__(self): BaseTransformManagementService.__init__(self) self.serializer = IonObjectSerializer() def on_start(self): super(TransformManagementService, self).on_start() def _strip_types(self, obj): if not isinstance(obj, dict): return for k, v in obj.iteritems(): if isinstance(v, dict): self._strip_types(v) if "type_" in obj: del obj['type_'] def create_transform(self, name='', description='', in_subscription_id='', out_streams=None, process_definition_id='', configuration=None): """Creates the transform and registers it with the resource registry @param process_definition_id The process definition contains the module and class of the process to be spawned @param in_subscription_id The subscription id corresponding to the input subscription @param out_stream_id The stream id for the output @param configuration {} @return The transform_id to the transform """ # ------------------------------------------------------------------------------------ # Resources and Initial Configs # ------------------------------------------------------------------------------------ # Determine Transform Name if isinstance(configuration, IonObjectBase): configuration = self.serializer.serialize(configuration) # strip the type self._strip_types(configuration) elif not configuration: configuration = {} # Handle the name uniqueness factor res, _ = self.clients.resource_registry.find_resources(name=name, id_only=True) if len(res) > 0: raise BadRequest( 'The transform resource with name: %s, already exists.' % name) transform_name = name if not process_definition_id: raise NotFound('No process definition was provided') # Transform Resource for association management and pid transform_res = Transform(name=name, description=description) transform_id, _ = self.clients.resource_registry.create(transform_res) transform_res = self.clients.resource_registry.read(transform_id) # ------------------------------------------------------------------------------------ # Spawn Configuration and Parameters # ------------------------------------------------------------------------------------ subscription = self.clients.pubsub_management.read_subscription( subscription_id=in_subscription_id) listen_name = subscription.exchange_name configuration['process'] = dict({ 'name': transform_name, 'type': 'stream_process', 'listen_name': listen_name, 'transform_id': transform_id }) if out_streams: configuration['process']['publish_streams'] = out_streams stream_ids = list(v for k, v in out_streams.iteritems()) else: stream_ids = [] transform_res.configuration = configuration # ------------------------------------------------------------------------------------ # Process Spawning # ------------------------------------------------------------------------------------ # Spawn the process pid = self.clients.process_dispatcher.schedule_process( process_definition_id=process_definition_id, configuration=configuration) transform_res.process_id = pid # ------------------------------------------------------------------------------------ # Handle Resources # ------------------------------------------------------------------------------------ self.clients.resource_registry.update(transform_res) self.clients.resource_registry.create_association( transform_id, PRED.hasProcessDefinition, process_definition_id) self.clients.resource_registry.create_association( transform_id, PRED.hasSubscription, in_subscription_id) for stream_id in stream_ids: self.clients.resource_registry.create_association( transform_id, PRED.hasOutStream, stream_id) return transform_id def update_transform(self, configuration=None): """Not currently possible to update a transform @throws NotImplementedError """ raise NotImplementedError def read_transform(self, transform_id=''): """Reads a transform from the resource registry @param transform_id The unique transform identifier @return Transform resource @throws NotFound when transform doesn't exist """ log.debug('(%s): Reading Transform: %s' % (self.name, transform_id)) transform = self.clients.resource_registry.read(object_id=transform_id, rev_id='') return transform def delete_transform(self, transform_id=''): """Deletes and stops an existing transform process @param transform_id The unique transform identifier @throws NotFound when a transform doesn't exist """ # get the transform resource (also verifies it's existence before continuing) transform_res = self.read_transform(transform_id=transform_id) pid = transform_res.process_id # get the resources process_definition_ids, _ = self.clients.resource_registry.find_objects( transform_id, PRED.hasProcessDefinition, RT.ProcessDefinition, True) in_subscription_ids, _ = self.clients.resource_registry.find_objects( transform_id, PRED.hasSubscription, RT.Subscription, True) out_stream_ids, _ = self.clients.resource_registry.find_objects( transform_id, PRED.hasOutStream, RT.Stream, True) # build a list of all the ids above id_list = process_definition_ids + in_subscription_ids + out_stream_ids # stop the transform process #@note: terminate_process does not raise or confirm if there termination was successful or not self.clients.process_dispatcher.cancel_process(pid) log.debug('(%s): Terminated Process (%s)' % (self.name, pid)) # delete the associations for predicate in [ PRED.hasProcessDefinition, PRED.hasSubscription, PRED.hasOutStream ]: associations = self.clients.resource_registry.find_associations( transform_id, predicate) for association in associations: self.clients.resource_registry.delete_association(association) #@todo: should I delete the resources, or should dpms? # iterate through the list and delete each #for res_id in id_list: # self.clients.resource_registry.delete(res_id) self.clients.resource_registry.delete(transform_id) return True # --------------------------------------------------------------------------- def execute_transform(self, process_definition_id='', data=None, configuration=None): process_definition = self.clients.process_dispatcher.read_process_definition( process_definition_id) module = process_definition.executable.get('module') cls = process_definition.executable.get('class') module = __import__(module, fromlist=[cls]) cls = getattr(module, cls) instance = cls() result = gevent.event.AsyncResult() def execute(data): result.set(instance.execute(data)) g = gevent.greenlet.Greenlet(execute, data) g.start() retval = result.get(timeout=10) return retval def activate_transform(self, transform_id=''): """Activate the subscription to bind (start) the transform @param transform_id @retval True on success @throws NotFound if either the subscription doesn't exist or the transform object doesn't exist. """ subscription_ids, _ = self.clients.resource_registry.find_objects( transform_id, PRED.hasSubscription, RT.Subscription, True) if len(subscription_ids) < 1: raise NotFound for subscription_id in subscription_ids: self.clients.pubsub_management.activate_subscription( subscription_id) return True def deactivate_transform(self, transform_id=''): """Decativates the subscriptions for the specified transform @param transform_id @retval True on success @throws NotFound if either the subscription doesn't exist or the transform object doesn't exist """ subscription_ids, _ = self.clients.resource_registry.find_objects( transform_id, PRED.hasSubscription, RT.Subscription, True) if len(subscription_ids) < 1: raise NotFound for subscription_id in subscription_ids: self.clients.pubsub_management.deactivate_subscription( subscription_id) return True def schedule_transform(self, transform_id=''): """Not currently implemented @throws NotImplementedError """ raise NotImplementedError
class FileDataStore(object): def __init__(self, container, datastore_name=""): self.container = container self.datastore_name = datastore_name # Object serialization/deserialization self._io_serializer = IonObjectSerializer() self._io_deserializer = IonObjectDeserializer( obj_registry=get_obj_registry()) def start(self): if self.container.has_capability(self.container.CCAP.FILE_SYSTEM): self.datastore_dir = FileSystem.get_url(FS.FILESTORE, self.datastore_name) else: self.datastore_dir = "./tmp/%s" % self.datastore_name def stop(self): pass def _get_filename(self, object_id): return "%s/%s" % (self.datastore_dir, object_id) def create(self, obj, object_id=None, attachments=None, datastore_name=""): """ Converts ion objects to python dictionary before persisting them using the optional suggested identifier and creates attachments to the object. Returns an identifier and revision number of the object """ if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.create_doc(self._ion_object_to_persistence_dict(obj), object_id=object_id, datastore_name=datastore_name, attachments=attachments) def create_doc(self, doc, object_id=None, attachments=None, datastore_name=""): """ Persists the document using the optionally suggested doc_id, and creates attachments to it. Returns the identifier and version number of the document """ if '_id' in doc: raise BadRequest("Doc must not have '_id'") # Assign an id to doc (recommended in CouchDB documentation) doc["_id"] = object_id or uuid4().hex log.debug('Creating new object %s/%s' % (datastore_name, doc["_id"])) log.debug('create doc contents: %s', doc) filename = self._get_filename(doc["_id"]) doc_json = json.dumps(doc) with open(filename, "w") as f: f.write(doc_json) return doc["_id"], 1 def update(self, obj, datastore_name=""): if not isinstance(obj, IonObjectBase): raise BadRequest("Obj param is not instance of IonObjectBase") return self.update_doc(self._ion_object_to_persistence_dict(obj)) def update_doc(self, doc, datastore_name=""): if '_id' not in doc: raise BadRequest("Doc must have '_id'") log.debug('update doc contents: %s', doc) filename = self._get_filename(doc["_id"]) doc_json = json.dumps(doc) with open(filename, "w") as f: f.write(doc_json) return doc["_id"], 2 def read(self, object_id, rev_id="", datastore_name=""): if not isinstance(object_id, str): raise BadRequest("Object id param is not string") doc = self.read_doc(object_id, rev_id, datastore_name) # Convert doc into Ion object obj = self._persistence_dict_to_ion_object(doc) log.debug('Ion object: %s', str(obj)) return obj def read_doc(self, doc_id, rev_id="", datastore_name=""): log.debug('Reading head version of object %s/%s', datastore_name, doc_id) filename = self._get_filename(doc_id) doc = None with open(filename, "r") as f: doc_json = f.read() doc = json.loads(doc_json) if doc is None: raise NotFound('Object with id %s does not exist.' % str(doc_id)) log.debug('read doc contents: %s', doc) return doc def delete(self, obj, datastore_name="", del_associations=False): if not isinstance(obj, IonObjectBase) and not isinstance(obj, str): raise BadRequest( "Obj param is not instance of IonObjectBase or string id") if type(obj) is str: self.delete_doc(obj, datastore_name=datastore_name, del_associations=del_associations) else: if '_id' not in obj: raise BadRequest("Doc must have '_id'") self.delete_doc(self._ion_object_to_persistence_dict(obj), datastore_name=datastore_name, del_associations=del_associations) def delete_doc(self, doc, datastore_name="", del_associations=False): doc_id = doc if type(doc) is str else doc["_id"] log.debug('Deleting object %s/%s', datastore_name, doc_id) filename = self._get_filename(doc_id) try: os.remove(filename) except OSError: raise NotFound('Object with id %s does not exist.' % doc_id) def _ion_object_to_persistence_dict(self, ion_object): if ion_object is None: return None obj_dict = self._io_serializer.serialize(ion_object) return obj_dict def _persistence_dict_to_ion_object(self, obj_dict): if obj_dict is None: return None ion_object = self._io_deserializer.deserialize(obj_dict) return ion_object