Beispiel #1
0
 def classify(self, subject):
     """Identify a structure type using flextypes above."""
     # chart the subject
     routes = list(catalog(subject))
     candidates = []
     # get the relevant structures
     structs = self._structures
     # compare each structure to the subject
     for key, struct in structs.items():
         strict_this = struct.get('meta', {}).get('strict', False)
         check_types = struct.get('meta', {}).get('check_types', True)
         template = list(catalog(struct['struct']))
         # make sure that all routes match the data structure
         if strict_this and self._routes_equality(
                 template, routes, check_types=check_types):
             candidates.append(key)
         elif not strict_this and self._routes_subset(
                 template, routes, check_types=check_types):
             candidates.append(key)
     #! removed a strict keyword that applied to all classifications and ran after multiple
     #! ... matches were made in order to find a more specific one. this was too much inference!
     if len(candidates) > 1:
         raise Exception('matched multiple data structures to %s' % subject)
     elif len(candidates) == 0:
         raise Exception('failed to classify %s' % (subject))
     else:
         #! note that if you are debugging failed classifications above, then it is very useful to
         #! ... print the successful classification candidate and subjects here. this is useful
         #! ... in combination with the debug flag to PostData from PostDataLibrary. add a conditional
         #! ... to see why a particular post file is not being recognized and compare to ones that are.
         #! ... hopefully this will be less necessary now that the code is tested on legacy post data
         return candidates[0]
Beispiel #2
0
 def cross(self, style, data):
     """Turn a raw definition into multiple constituent components."""
     # chart the subject
     routes = list(catalog(data))
     # get the relevant structure and expand it
     structure = self._structures[style]['struct']
     template = list(catalog(structure))
     # hold the results and crosses
     toc, crosses = {}, []
     # loop over routes in the subject
     while routes:
         route, value = routes.pop()
         # find the matching route guaranteed by classify
         index, = [
             ii for ii, i in enumerate(list(zip(*template))[0])
             if route == i
         ]
         path, typ = template[index]
         # the identifier is the path up to the name
         hinge = max([
             ii for ii, i in enumerate(path)
             if i.__class__.__name__ == 'StructureKey'
         ])
         # replace the StructureKey with the name found in the route at the hinge
         identifier = tuple(path[:hinge] + [route[hinge]])
         if identifier not in toc: toc[identifier] = {}
         # the subpath defines the route inside the final object
         subpath = tuple(path[hinge + 1:])
         if typ.__class__.__name__ == 'KeyCombo':
             # if the terminus is a KeyCombo it will be crossed later
             crosses.append({
                 'identifier': identifier,
                 'subpath': subpath,
                 'rename': typ.name,
                 'values': value
             })
         else:
             if not subpath: toc[identifier] = value
             else: delveset(toc[identifier], *subpath, value=value)
     # apply crosses
     for cross in crosses:
         identifier = cross['identifier']
         subpath = cross['subpath']
         values = cross['values']
         rename = cross['rename']
         raw = toc.pop(identifier)
         for vv, val in enumerate(values):
             key = tuple(list(identifier) + [vv])
             toc[key] = copy.deepcopy(raw)
             delveset(toc[key],
                      *tuple(list(subpath[:-1]) + [rename]),
                      value=val)
     return toc
Beispiel #3
0
	def get_post(self,sn,calcname=None,plotname=None,lookup=None):

		"""
		Next-generation postprocessing data lookups.
		Note that this should probably replace portions of code in store.plotload and computer.
		UNDER DEVELOPMENT. Recommend parting out the interpreter function.
		"""

		if calcname and plotname or (not calcname and not plotname):
			raise Exception('\n[ERROR] specify only one (calcname or plotname)')

		#---look up post by calculation name
		#---! this section dripped from store.plotload
		if calcname:

			#---get slice name
			#---! this would fail if we looped over slices
			slice_name = self.calc[calcname]['slice_name']

			#---get the group name
			if 'group' in self.calc[calcname]: group = self.calc[calcname]['group']
			else: group = None

			#---look up the slice
			sl = self.slices[sn][slice_name][group if group else 'all']

			#---base file name according to group conventions
			if not group: 
				fn_base = re.findall('^v[0-9]+\.[0-9]+-[0-9]+-[0-9]+',sl['filekey'])[0]+'.%s'%calcname
			else: fn_base = '%s.%s'%(sl['filekey'],calcname)

			#---see how many there are 
			candidates = glob.glob(self.path('post_data_spot')+fn_base+'*.spec')

			if len(candidates)==1: return re.sub('.spec','.dat',candidates[0])
			else:
				options = {}
				for c in candidates:
					nnum = int(re.findall('^.+\.n([0-9]+)\.spec',c)[0])
					with open(c) as fp: options[c] = eval(fp.read())
				meta = self.load_specs()
				new_calcs = self.interpret_specs(meta['calculations'][calcname])				
				#---use lookup to whittle these calculations
				if lookup:
					index = next(ii for ii,i in enumerate(new_calcs) if 
						all([delve(i,*key)==val for key,val in lookup.items()]))
				else: raise Exception('\n[ERROR] too many options so you need to specify via lookup kwarg')
				specs = new_calcs[index]
				#---driped from interpret function (needs its own function)
				for path,sub in [(i,j) for i,j in catalog(specs) if type(j)==str and re.match('^\+',j)]:
					source = delve(self.vars,*sub.strip('+').split('/'))
					point = delve(specs,*path[:-1])
					point[path[-1]] = source
				#---end drip
				particular = next(key for key,val in options.items() if val==specs['specs'])
				return re.sub('.spec','.dat',particular)

		elif plotname:
			print "[DEVELOPMENT] need to handle plotnames here"
			import pdb;pdb.set_trace() #---legit
Beispiel #4
0
	def interpret_specs(self,details,return_stubs=False):

		"""
		The YAML-formatted specifications file must be interpreted if it contains loops.
		"""

		#---this loop interpreter allows for a loop key at any point over specs in list or dict
		#---trim a copy of the specs so all loop keys are terminal
		details_trim = deepcopy(details)
		#---get all paths to a loop
		nonterm_paths = list([tuple(j) for j in set([tuple(i[:i.index('loop')+1]) 
			for i,j in catalog(details_trim) if 'loop' in i[:-1]])])
		#---some loops end in a list instead of a sub-dictionary
		nonterm_paths_list = list([tuple(j) for j in set([tuple(i[:i.index('loop')+1]) 
			for i,j in catalog(details_trim) if i[-1]=='loop'])])
		#---for each non-terminal path we save everything below and replace it with a key
		nonterms = []
		for path in nonterm_paths:
			base = deepcopy(delve(details_trim,*path[:-1]))
			nonterms.append(base['loop'])
			pivot = delve(details_trim,*path[:-1])
			pivot['loop'] = base['loop'].keys()
		#---hypothesize over the reduced specifications dictionary
		sweeps = [{'route':i[:-1],'values':j} for i,j in catalog(details_trim) if 'loop' in i]
		#---! note that you cannot have loops within loops (yet?) but this would be the right place for it
		if sweeps == []: new_calcs = [deepcopy(details)]
		else: new_calcs = hypothesis(sweeps,default=details_trim)
		new_calcs_stubs = deepcopy(new_calcs)
		#---replace non-terminal loop paths with their downstream dictionaries
		for ii,i in enumerate(nonterms):
			for nc in new_calcs:
				downkey = delve(nc,*nonterm_paths[ii][:-1])
				upkey = nonterm_paths[ii][-2]
				point = delve(nc,*nonterm_paths[ii][:-2])
				point[upkey] = nonterms[ii][downkey]
		#---loops over lists (instead of dictionaries) carry along the entire loop which most be removed
		for ii,i in enumerate(nonterm_paths_list):
			for nc in new_calcs: 
				#---! this section is supposed to excise the redundant "loop" list if it still exists
				#---! however the PPI project had calculation metadata that didn't require it so we just try
				try:
					pivot = delve(nc,*i[:-2]) if len(i)>2 else nc
					val = delve(nc,*i[:-1])[i[-2]]
					pivot[i[-2]] = val
				except: pass
		return new_calcs if not return_stubs else (new_calcs,new_calcs_stubs)
Beispiel #5
0
	def action(self,calculation_name=None):
	
		"""
		Parse a specifications file to make changes to a workspace.
		This function interprets the specifications and acts on it. 
		It manages the irreducible units of an omnicalc operation and ensures
		that the correct data are sent to analysis functions in the right order.
		"""

		status('parsing specs file',tag='status')

		#---load the yaml specifications file
		specs = self.load_specs()
		#### status('done loading specs',tag='status')		
		
		#---read simulations from the slices dictionary
		sns = specs['slices'].keys()
		#---variables are passed directly to self.vars
		self.vars = deepcopy(specs['variables']) if 'variables' in specs else {}

		#---apply "+"-delimited internal references in the yaml file
		for path,sub in [(i,j[-1]) for i,j in catalog(specs) if type(j)==list 
			and type(j)==str and re.match('^\+',j[-1])]:
			source = delve(self.vars,*sub.strip('+').split('/'))
			point = delve(specs,*path[:-1])
			point[path[-1]][point[path[-1]].index(sub)] = source
		for path,sub in [(i,j) for i,j in catalog(specs) if type(j)==str and re.match('^\+',j)]:
			source = delve(self.vars,*sub.strip('+').split('/'))
			point = delve(specs,*path[:-1])
			point[path[-1]] = source
		
		#---loop over all simulations to create groups and slices
		self.save(quiet=True)
		for route in [('slices',i) for i in sns]:
			root,sn = delve(specs,*route),route[-1]
			#---create groups
			if 'groups' in root:
				for group,select in root['groups'].items():
					kwargs = {'group':group,'select':select,'sn':sn}
					self.create_group(**kwargs)
				root.pop('groups')
			#---slice the trajectory
			if 'slices' in root:
				for sl,details in root['slices'].items(): 
					#---! use a default group here?
					for group in details['groups']:
						kwargs = {'sn':sn,'start':details['start'],
							'end':details['end'],'skip':details['skip'],'slice_name':sl}
						kwargs['group'] = group
						if 'pbc' in details: kwargs['pbc'] = details['pbc']
						self.create_slice(**kwargs)
				root.pop('slices')
			if root != {}: raise Exception('[ERROR] unprocessed specifications %s'%str(root))
			else: del root
		#---we only save after writing all slices. if the slicer fails autoreload will find preexisting files
		self.save(quiet=True)
		checktime()

		#---meta is passed to self.meta
		if 'meta' in specs:
			for sn in specs['meta']:
				self.meta[sn] = specs['meta'][sn]

		#---collections are groups of simulations
		if 'collections' in specs: self.vars['collections'] = specs['collections']

		#---calculations are executed last and organized in this loop
		if 'calculations' in specs:
			status('starting calculations',tag='status')
			#---note that most variables including calc mirror the specs file
			self.calc = dict(specs['calculations'])
			#---infer the correct order for the calculation keys from their upstream dependencies
			upstream_catalog = [i for i,j in catalog(self.calc) if 'upstream' in i]
			#---if there are no specs required to get the upstream data object the user can either 
			#---...use none/None as a placeholder or use the name as the key as in "upstream: name"
			for uu,uc in enumerate(upstream_catalog):
				if uc[-1]=='upstream': upstream_catalog[uu] = upstream_catalog[uu]+[delve(self.calc,*uc)]
			depends = {t[0]:[t[ii+1] for ii,i in enumerate(t) if ii<len(t)-1 and t[ii]=='upstream'] 
				for t in upstream_catalog}
			calckeys = [i for i in self.calc if i not in depends]
			#---check that the calckeys has enough elements 
			list(set(calckeys+[i for j in depends.values() for i in j]))			
			#---! come back to this!
			while any(depends):
				ii,i = depends.popitem()
				if all([j in calckeys for j in i]) and i!=[]: calckeys.append(ii)
				else: depends[ii] = i
			#---if a specific calculation name is given then only perform that calculation
			if not calculation_name is None: calckeys = [calculation_name]
			for calcname in calckeys:
				details = specs['calculations'][calcname]
				status('checking calculation %s'%calcname,tag='status')
				new_calcs = self.interpret_specs(details)
				#---perform calculations
				for calc in new_calcs:
					#---find the script with the funtion
					fns = []
					for (dirpath, dirnames, filenames) in os.walk('./'): 
						fns.extend([dirpath+'/'+fn for fn in filenames])
					search = filter(lambda x:re.match('^\.\/[^ate].+\/%s\.py$'%calcname,x),fns)
					if len(search)==0: raise Exception('\n[ERROR] cannot find %s.py'%calcname)
					elif len(search)>1: raise Exception('\n[ERROR] redundant matches: %s'%str(search))
					else:
						sys.path.insert(0,os.path.dirname(search[0]))
						function = unpacker(search[0],calcname)
						status('computing %s'%calcname,tag='loop')
						computer(function,calc=calc,workspace=self)
						self.save()
					checktime()
		self.save()