def classify(self, subject): """Identify a structure type using flextypes above.""" # chart the subject routes = list(catalog(subject)) candidates = [] # get the relevant structures structs = self._structures # compare each structure to the subject for key, struct in structs.items(): strict_this = struct.get('meta', {}).get('strict', False) check_types = struct.get('meta', {}).get('check_types', True) template = list(catalog(struct['struct'])) # make sure that all routes match the data structure if strict_this and self._routes_equality( template, routes, check_types=check_types): candidates.append(key) elif not strict_this and self._routes_subset( template, routes, check_types=check_types): candidates.append(key) #! removed a strict keyword that applied to all classifications and ran after multiple #! ... matches were made in order to find a more specific one. this was too much inference! if len(candidates) > 1: raise Exception('matched multiple data structures to %s' % subject) elif len(candidates) == 0: raise Exception('failed to classify %s' % (subject)) else: #! note that if you are debugging failed classifications above, then it is very useful to #! ... print the successful classification candidate and subjects here. this is useful #! ... in combination with the debug flag to PostData from PostDataLibrary. add a conditional #! ... to see why a particular post file is not being recognized and compare to ones that are. #! ... hopefully this will be less necessary now that the code is tested on legacy post data return candidates[0]
def cross(self, style, data): """Turn a raw definition into multiple constituent components.""" # chart the subject routes = list(catalog(data)) # get the relevant structure and expand it structure = self._structures[style]['struct'] template = list(catalog(structure)) # hold the results and crosses toc, crosses = {}, [] # loop over routes in the subject while routes: route, value = routes.pop() # find the matching route guaranteed by classify index, = [ ii for ii, i in enumerate(list(zip(*template))[0]) if route == i ] path, typ = template[index] # the identifier is the path up to the name hinge = max([ ii for ii, i in enumerate(path) if i.__class__.__name__ == 'StructureKey' ]) # replace the StructureKey with the name found in the route at the hinge identifier = tuple(path[:hinge] + [route[hinge]]) if identifier not in toc: toc[identifier] = {} # the subpath defines the route inside the final object subpath = tuple(path[hinge + 1:]) if typ.__class__.__name__ == 'KeyCombo': # if the terminus is a KeyCombo it will be crossed later crosses.append({ 'identifier': identifier, 'subpath': subpath, 'rename': typ.name, 'values': value }) else: if not subpath: toc[identifier] = value else: delveset(toc[identifier], *subpath, value=value) # apply crosses for cross in crosses: identifier = cross['identifier'] subpath = cross['subpath'] values = cross['values'] rename = cross['rename'] raw = toc.pop(identifier) for vv, val in enumerate(values): key = tuple(list(identifier) + [vv]) toc[key] = copy.deepcopy(raw) delveset(toc[key], *tuple(list(subpath[:-1]) + [rename]), value=val) return toc
def get_post(self,sn,calcname=None,plotname=None,lookup=None): """ Next-generation postprocessing data lookups. Note that this should probably replace portions of code in store.plotload and computer. UNDER DEVELOPMENT. Recommend parting out the interpreter function. """ if calcname and plotname or (not calcname and not plotname): raise Exception('\n[ERROR] specify only one (calcname or plotname)') #---look up post by calculation name #---! this section dripped from store.plotload if calcname: #---get slice name #---! this would fail if we looped over slices slice_name = self.calc[calcname]['slice_name'] #---get the group name if 'group' in self.calc[calcname]: group = self.calc[calcname]['group'] else: group = None #---look up the slice sl = self.slices[sn][slice_name][group if group else 'all'] #---base file name according to group conventions if not group: fn_base = re.findall('^v[0-9]+\.[0-9]+-[0-9]+-[0-9]+',sl['filekey'])[0]+'.%s'%calcname else: fn_base = '%s.%s'%(sl['filekey'],calcname) #---see how many there are candidates = glob.glob(self.path('post_data_spot')+fn_base+'*.spec') if len(candidates)==1: return re.sub('.spec','.dat',candidates[0]) else: options = {} for c in candidates: nnum = int(re.findall('^.+\.n([0-9]+)\.spec',c)[0]) with open(c) as fp: options[c] = eval(fp.read()) meta = self.load_specs() new_calcs = self.interpret_specs(meta['calculations'][calcname]) #---use lookup to whittle these calculations if lookup: index = next(ii for ii,i in enumerate(new_calcs) if all([delve(i,*key)==val for key,val in lookup.items()])) else: raise Exception('\n[ERROR] too many options so you need to specify via lookup kwarg') specs = new_calcs[index] #---driped from interpret function (needs its own function) for path,sub in [(i,j) for i,j in catalog(specs) if type(j)==str and re.match('^\+',j)]: source = delve(self.vars,*sub.strip('+').split('/')) point = delve(specs,*path[:-1]) point[path[-1]] = source #---end drip particular = next(key for key,val in options.items() if val==specs['specs']) return re.sub('.spec','.dat',particular) elif plotname: print "[DEVELOPMENT] need to handle plotnames here" import pdb;pdb.set_trace() #---legit
def interpret_specs(self,details,return_stubs=False): """ The YAML-formatted specifications file must be interpreted if it contains loops. """ #---this loop interpreter allows for a loop key at any point over specs in list or dict #---trim a copy of the specs so all loop keys are terminal details_trim = deepcopy(details) #---get all paths to a loop nonterm_paths = list([tuple(j) for j in set([tuple(i[:i.index('loop')+1]) for i,j in catalog(details_trim) if 'loop' in i[:-1]])]) #---some loops end in a list instead of a sub-dictionary nonterm_paths_list = list([tuple(j) for j in set([tuple(i[:i.index('loop')+1]) for i,j in catalog(details_trim) if i[-1]=='loop'])]) #---for each non-terminal path we save everything below and replace it with a key nonterms = [] for path in nonterm_paths: base = deepcopy(delve(details_trim,*path[:-1])) nonterms.append(base['loop']) pivot = delve(details_trim,*path[:-1]) pivot['loop'] = base['loop'].keys() #---hypothesize over the reduced specifications dictionary sweeps = [{'route':i[:-1],'values':j} for i,j in catalog(details_trim) if 'loop' in i] #---! note that you cannot have loops within loops (yet?) but this would be the right place for it if sweeps == []: new_calcs = [deepcopy(details)] else: new_calcs = hypothesis(sweeps,default=details_trim) new_calcs_stubs = deepcopy(new_calcs) #---replace non-terminal loop paths with their downstream dictionaries for ii,i in enumerate(nonterms): for nc in new_calcs: downkey = delve(nc,*nonterm_paths[ii][:-1]) upkey = nonterm_paths[ii][-2] point = delve(nc,*nonterm_paths[ii][:-2]) point[upkey] = nonterms[ii][downkey] #---loops over lists (instead of dictionaries) carry along the entire loop which most be removed for ii,i in enumerate(nonterm_paths_list): for nc in new_calcs: #---! this section is supposed to excise the redundant "loop" list if it still exists #---! however the PPI project had calculation metadata that didn't require it so we just try try: pivot = delve(nc,*i[:-2]) if len(i)>2 else nc val = delve(nc,*i[:-1])[i[-2]] pivot[i[-2]] = val except: pass return new_calcs if not return_stubs else (new_calcs,new_calcs_stubs)
def action(self,calculation_name=None): """ Parse a specifications file to make changes to a workspace. This function interprets the specifications and acts on it. It manages the irreducible units of an omnicalc operation and ensures that the correct data are sent to analysis functions in the right order. """ status('parsing specs file',tag='status') #---load the yaml specifications file specs = self.load_specs() #### status('done loading specs',tag='status') #---read simulations from the slices dictionary sns = specs['slices'].keys() #---variables are passed directly to self.vars self.vars = deepcopy(specs['variables']) if 'variables' in specs else {} #---apply "+"-delimited internal references in the yaml file for path,sub in [(i,j[-1]) for i,j in catalog(specs) if type(j)==list and type(j)==str and re.match('^\+',j[-1])]: source = delve(self.vars,*sub.strip('+').split('/')) point = delve(specs,*path[:-1]) point[path[-1]][point[path[-1]].index(sub)] = source for path,sub in [(i,j) for i,j in catalog(specs) if type(j)==str and re.match('^\+',j)]: source = delve(self.vars,*sub.strip('+').split('/')) point = delve(specs,*path[:-1]) point[path[-1]] = source #---loop over all simulations to create groups and slices self.save(quiet=True) for route in [('slices',i) for i in sns]: root,sn = delve(specs,*route),route[-1] #---create groups if 'groups' in root: for group,select in root['groups'].items(): kwargs = {'group':group,'select':select,'sn':sn} self.create_group(**kwargs) root.pop('groups') #---slice the trajectory if 'slices' in root: for sl,details in root['slices'].items(): #---! use a default group here? for group in details['groups']: kwargs = {'sn':sn,'start':details['start'], 'end':details['end'],'skip':details['skip'],'slice_name':sl} kwargs['group'] = group if 'pbc' in details: kwargs['pbc'] = details['pbc'] self.create_slice(**kwargs) root.pop('slices') if root != {}: raise Exception('[ERROR] unprocessed specifications %s'%str(root)) else: del root #---we only save after writing all slices. if the slicer fails autoreload will find preexisting files self.save(quiet=True) checktime() #---meta is passed to self.meta if 'meta' in specs: for sn in specs['meta']: self.meta[sn] = specs['meta'][sn] #---collections are groups of simulations if 'collections' in specs: self.vars['collections'] = specs['collections'] #---calculations are executed last and organized in this loop if 'calculations' in specs: status('starting calculations',tag='status') #---note that most variables including calc mirror the specs file self.calc = dict(specs['calculations']) #---infer the correct order for the calculation keys from their upstream dependencies upstream_catalog = [i for i,j in catalog(self.calc) if 'upstream' in i] #---if there are no specs required to get the upstream data object the user can either #---...use none/None as a placeholder or use the name as the key as in "upstream: name" for uu,uc in enumerate(upstream_catalog): if uc[-1]=='upstream': upstream_catalog[uu] = upstream_catalog[uu]+[delve(self.calc,*uc)] depends = {t[0]:[t[ii+1] for ii,i in enumerate(t) if ii<len(t)-1 and t[ii]=='upstream'] for t in upstream_catalog} calckeys = [i for i in self.calc if i not in depends] #---check that the calckeys has enough elements list(set(calckeys+[i for j in depends.values() for i in j])) #---! come back to this! while any(depends): ii,i = depends.popitem() if all([j in calckeys for j in i]) and i!=[]: calckeys.append(ii) else: depends[ii] = i #---if a specific calculation name is given then only perform that calculation if not calculation_name is None: calckeys = [calculation_name] for calcname in calckeys: details = specs['calculations'][calcname] status('checking calculation %s'%calcname,tag='status') new_calcs = self.interpret_specs(details) #---perform calculations for calc in new_calcs: #---find the script with the funtion fns = [] for (dirpath, dirnames, filenames) in os.walk('./'): fns.extend([dirpath+'/'+fn for fn in filenames]) search = filter(lambda x:re.match('^\.\/[^ate].+\/%s\.py$'%calcname,x),fns) if len(search)==0: raise Exception('\n[ERROR] cannot find %s.py'%calcname) elif len(search)>1: raise Exception('\n[ERROR] redundant matches: %s'%str(search)) else: sys.path.insert(0,os.path.dirname(search[0])) function = unpacker(search[0],calcname) status('computing %s'%calcname,tag='loop') computer(function,calc=calc,workspace=self) self.save() checktime() self.save()