class Session( Browser ): """ Object to handle connection and client-server data transfer """ def __init__(self, profile_data, model_data): # 1. load meta info: store all settings in Meta class as _meta attribute meta = Meta( profile_data, model_data ) self._meta = meta # 2. init Cache and Remote backend self._cache = Cache( meta ) self._remote = Remote( meta ) self._remote.open() # authenticate at the remote # 3. load odML terminologies # TODO make odML to load terms into our cache folder, not default /tmp terms = terminology.terminologies.load(profile_data['odml_repository']) self.terminologies = terms.sections # 4. attach supported models self.models = dict( models_map ) warnings.simplefilter('ignore', tb.NaturalNameWarning) print "Session initialized." def clear_cache(self): """ removes all objects from the cache """ self._cache.clear_cache() @activate_remote def select(self, model_name, params={}, data_load=False, mode='obj'): """ requests objects of a given type from server in bulk mode. caching: caches files only cascade: no data_load: yes/no Arguments: model_name: type of the object (like 'block', 'segment' or 'section'.) params: dict that can contain several categories of key-value pairs data_load: fetch the data or not (applied if mode == 'obj') mode: 'obj' or 'json' - return mode, python object or JSON Params can be: 1. filters, like: 'owner__username': '******' 'segment__id__in': [19485,56223,89138] 'n_definition__icontains': 'blafoo' # negative filter! (has 'n_') 2. common params, like 'at_time': '2013-02-22 15:34:57' 'offset': 50 'max_results': 20 3. data params, to get only parts of the original object(s). These only work for the data-related objects (like 'analogsignal' or 'spiketrain'). start_time - start time of the required range (calculated using the same time unit as the t_start of the signal) end_time - end time of the required range (calculated using the same time unit as the t_start of the signal) duration - duration of the required range (calculated using the same time unit as the t_start of the signal) start_index - start index of the required datarange (an index of the starting datapoint) end_index - end index of the required range (an index of the end datapoint) samples_count - number of points of the required range (an index of the end datapoint) downsample - number of datapoints. This parameter is used to indicate whether downsampling is needed. The downsampling is applied on top of the selected data range using other parameters (if specified) Examples: get('analogsignal', params={'id__in': [38551], 'downsample': 100}) get('analogsignal', params={'segment__id': 93882, 'start_time': 500.0}) get('section', params={'odml_type': 'experiment', 'date_created': '2013-02-22'}) """ if model_name in self._meta.cls_aliases.values(): # TODO put into model_safe decorator model_name = [k for k, v in self._meta.cls_aliases.items() if v==model_name][0] if not model_name in self._meta.models_map.keys(): raise TypeError('Objects of that type are not supported.') # fetch from remote + save in cache if possible json_objs = self._remote.get_list( model_name, params ) if mode == 'json': # return pure JSON (no data) if requested objects = json_objs else: # convert to objects in 'obj' mode app = self._meta.app_prefix_dict[ model_name ] model = models_map[ model_name ] objects = [] for json_obj in json_objs: data_refs = {} # is a dict like {'signal': <array...>, ...} if data_load: data_refs = self.__parse_data_from_json( json_obj ) obj = Serializer.deserialize( json_obj, self._meta, data_refs ) objects.append( obj ) self._cache.save_data_map() # updates on-disk cache with new datafiles self._cache.save_h5_map() return objects @activate_remote def pull(self, location, params={}, cascade=True, data_load=True): """ pulls object from the specified location on the server. caching: yes cascade: True/False data_load: True/False Arguments: location: object location as URL like 'http://<host>/metadata/section/2394/', or just a location '/metadata/section/2394' or a stripped version like '/mtd/sec/2394' params: dict that can contain several categories of key-value pairs cascade: fetch related objects recursively (True/False) data_load: fetch the data (True/False) Params can be: 1. common params, like 'at_time': '2013-02-22 15:34:57' 2. data params, to get only parts of the original object(s). These only work for the data-related objects (like 'analogsignal' or 'spiketrain'). start_time - start time of the required range (calculated using the same time unit as the t_start of the signal) end_time - end time of the required range (calculated using the same time unit as the t_start of the signal) duration - duration of the required range (calculated using the same time unit as the t_start of the signal) start_index - start index of the required datarange (an index of the starting datapoint) end_index - end index of the required range (an index of the end datapoint) samples_count - number of points of the required range (an index of the end datapoint) downsample - number of datapoints. This parameter is used to indicate whether downsampling is needed. The downsampling is applied on top of the selected data range using other parameters (if specified) """ location = self._meta.parse_location( location ) supp_models = [k for k in models_map.keys() if \ not k in ['property', 'value']] if not location[1] in supp_models: raise TypeError('Objects of that type are not pull-supported.') processed = {} # collector of processed objects like # {"metadata/section/2394/": <object..>, ...} to_clean = [] # collector of ids of objects to clean parent stack = [ location ] # a stack of objects to sync while len( stack ) > 0: loc = stack[0] # find object in cache etag = None cached_obj = self._cache.get_obj_by_location( loc ) if not type(cached_obj) == type(None): obj_descr = self._meta.get_gnode_descr(cached_obj) if obj_descr and obj_descr['fields'].has_key('guid'): etag = obj_descr['fields']['guid'] # request object from the server (with ETag) json_obj = self._remote.get(loc, params, etag) if json_obj == 304: # get object from cache obj = cached_obj print_status('%s loaded from cache.' % str(loc)) else: # request from server # download related data data_refs = {} # is a dict like {'signal': <array...>, ...} if data_load: data_refs = self.__parse_data_from_json( json_obj ) # parse json (+data) into python object obj = Serializer.deserialize( json_obj, self._meta, data_refs ) # put metadata in the stack #if json_obj['fields'].has_key('metadata'): # for value in json_obj['fields']['metadata']: # cl_value = self._meta.clean_location( value ) # stack.append( cl_value ) # or just download attached metadata here? # metadata = self._fetch_metadata_by_json(cls, json_obj) print_status("%s fetched from server." % loc) stack.remove( loc ) # not to forget to remove processed object processed[ str(loc) ] = obj # add it to processed app, cls, lid = loc[0], loc[1], loc[2] children = self._meta.app_definitions[cls]['children'] # child object types obj_descr = self._meta.get_gnode_descr(obj) if cascade and children and obj_descr: for child in children: # 'child' is like 'segment', 'event' etc. field_name = child + '_set' if obj_descr['fields'].has_key( field_name ) and \ obj_descr['fields'][ field_name ]: for rel_link in obj_descr['fields'][ field_name ]: cl_link = self._meta.parse_location( rel_link ) if not str(cl_link) in processed.keys() and not \ str(cl_link) in [str(o) for o in stack]: stack.insert( 0, cl_link ) # building relationships for python objects for key, obj in processed.items(): # TODO make some iterator below to avoid duplicate code loc = self._meta.parse_location( key ) app, cls, lid = loc[0], loc[1], loc[2] children = self._meta.app_definitions[cls]['children'] obj_descr = self._meta.get_gnode_descr(obj) if cascade and children and obj_descr: for child in children: # 'child' is like 'segment', 'event' etc. field_name = child + '_set' if obj_descr['fields'].has_key( field_name ) and \ obj_descr['fields'][ field_name ]: rel_objs = [] for rel_link in obj_descr['fields'][ field_name ]: cl_link = self._meta.parse_location( rel_link ) rel_objs.append( processed[str(cl_link)] ) if rel_objs: # parse children into parent attrs # a way to assign kids depends on object type self.__assign_child( child, obj, rel_objs ) """ TODO add metadata to objects # parse related metadata if not json_obj['fields'].has_key('metadata') or \ not json_obj['fields']['metadata']: else: mobj = Metadata() for p, v in raw_json['metadata']: prp = Serializer.deserialize(p, self) val = Serializer.deserialize(v, self) prp.append( val ) # save both objects to cache self._cache.add_object( prp ) self._cache.add_object( val ) setattr( mobj, prp.name, prp ) """ print_status( 'Object(s) loaded.\n' ) obj = processed[ str(location) ] self._cache.add_object(obj) self._cache.save_data_map() self._cache.save_h5_map() return obj @activate_remote def sync(self, obj_to_sync, cascade=False): """ syncs a given object to the server (updates or creates a new one). cascade: True/False Arguments: obj_to_sync:a python object to sync. If an object has gnode attribute, it means it will be updated on the server. If no gnode attribute is found a new object will be submitted. cascade: sync all children recursively (True/False) """ supp_models = [m for k, m in models_map.items() if \ not k in ['property', 'value']] if not obj_to_sync.__class__ in supp_models: raise TypeError('Objects of that type are not supported.') processed = [] # collector of permalinks of processed objects to_clean = [] # collector of ids of objects to clean parent stack = [ obj_to_sync ] # a stack of objects to sync self._cache.add_object(obj_to_sync) # if not yet there while len( stack ) > 0: obj = stack[0] # take first object from stack success = False # flag to indicate success of the syncing cls = self._meta.get_type_by_obj( obj ) # type of the object, like 'segment' # bloody workaround for duplications because of NEO obj_descr = self._meta.get_gnode_descr(obj) if obj_descr and obj_descr['permalink'] in processed: stack.remove( obj ) continue # 1. validate class type if not obj.__class__ in supported_models: # skip this object completely stack.remove( obj ) print_status('Object %s is not supported.\n' % cut_to_render( obj.__repr__() )) continue # 2. pre-push new/changed array data to the server (+put in cache) # data_refs is a dict like {'signal': 'http://host:/neo/signal/148348', ...} try: data_refs = self.__push_data_from_obj( obj ) except (errors.FileUploadError, errors.UnitsError), e: # skip this object completely stack.remove( obj ) print_status('%s skipped: %s\n' % (cut_to_render(obj.__repr__(), 15), str(e))) continue # 3. pre-sync related metadata if exists (+put in cache) if hasattr(obj, 'metadata'): metadata = getattr(obj, 'metadata') if isinstance(metadata, Metadata): to_sync = [] for name, prp in metadata.__dict__.items(): if prp.value: if not self._meta.get_gnode_descr(prp.value): to_sync.insert(0, prp.value) # sync value if never synced if not self._meta.get_gnode_descr(prp): to_sync.insert(0, prp) # sync property if never synced if not prp.parent: print_status('Cannot sync %s for %s: section is not defined.\n' % \ (name, cut_to_render( obj.__repr__() ))) stack.remove( prp ) continue # move to other property if not self._meta.get_gnode_descr(prp.parent): to_sync.insert(0, prp.parent) # sync parent section if to_sync: # sync what's needed first stack = to_sync + stack continue # 4. sync main object try: json_obj = Serializer.serialize(obj, self._meta, data_refs) # TODO ideally the JSON object representation should be unique # and this code excluded for k in list( json_obj['fields'].keys() ): if k.endswith('_set') or k == 'shared_with': json_obj['fields'].pop( k, None ) raw_json = self._remote.save( json_obj ) if not raw_json == 304: # update local in-memory object with newly acquired params self._meta.set_gnode_descr(obj, raw_json) # a list of children in the gnode attribute in all parent # objects for obj must be updated with a newly synced child. it # should be done here, not at the end of the sync, to keep # objects updated in case the sync fails. Serializer.update_parent_children(obj, self._meta) success = True obj_descr = self._meta.get_gnode_descr(obj) processed.append( obj_descr['permalink'] ) print_status('Object at %s synced.' % obj_descr['location']) except (errors.UnitsError, errors.ValidationError, \ errors.SyncFailed, errors.BadRequestError), e: print_status('%s skipped: %s\n' % (cut_to_render(obj.__repr__(), 20), str(e))) stack.remove( obj ) # not to forget to remove processed object # 5. if cascade put children objects to the stack to sync children = self._meta.app_definitions[cls]['children'] # child object types obj_descr = self._meta.get_gnode_descr(obj) if cascade and children and obj_descr: for child in children: # 'child' is like 'segment', 'event' etc. # cached children references child_link_set = list( obj_descr['fields'][ child + '_set' ] ) for rel in getattr(obj, get_children_field_name( child )): # detect children of a given type that were removed (using cache) rel_descr = self._meta.get_gnode_descr(rel) if rel_descr and rel_descr['permalink'] in child_link_set: child_link_set.remove( rel_descr['permalink'] ) # important to skip already scheduled or processed objs if not (not (rel_descr == None) and rel_descr['permalink'] in processed): # and not obj in stack: # stupid NEO!! NEO object can't be compared with any # other object type (error), so the workaround # would be to check if the object was processed # before processing stack.append( rel ) par_name = get_parent_field_name(cls, child) # collect permalinks of removed objects as (link, par_field_name) to_clean += [(x, par_name) for x in child_link_set]