Example #1
0
	def get_post(self,sn,calcname=None,plotname=None,lookup=None):

		"""
		Next-generation postprocessing data lookups.
		Note that this should probably replace portions of code in store.plotload and computer.
		UNDER DEVELOPMENT. Recommend parting out the interpreter function.
		"""

		if calcname and plotname or (not calcname and not plotname):
			raise Exception('\n[ERROR] specify only one (calcname or plotname)')

		#---look up post by calculation name
		#---! this section dripped from store.plotload
		if calcname:

			#---get slice name
			#---! this would fail if we looped over slices
			slice_name = self.calc[calcname]['slice_name']

			#---get the group name
			if 'group' in self.calc[calcname]: group = self.calc[calcname]['group']
			else: group = None

			#---look up the slice
			sl = self.slices[sn][slice_name][group if group else 'all']

			#---base file name according to group conventions
			if not group: 
				fn_base = re.findall('^v[0-9]+\.[0-9]+-[0-9]+-[0-9]+',sl['filekey'])[0]+'.%s'%calcname
			else: fn_base = '%s.%s'%(sl['filekey'],calcname)

			#---see how many there are 
			candidates = glob.glob(self.path('post_data_spot')+fn_base+'*.spec')

			if len(candidates)==1: return re.sub('.spec','.dat',candidates[0])
			else:
				options = {}
				for c in candidates:
					nnum = int(re.findall('^.+\.n([0-9]+)\.spec',c)[0])
					with open(c) as fp: options[c] = eval(fp.read())
				meta = self.load_specs()
				new_calcs = self.interpret_specs(meta['calculations'][calcname])				
				#---use lookup to whittle these calculations
				if lookup:
					index = next(ii for ii,i in enumerate(new_calcs) if 
						all([delve(i,*key)==val for key,val in lookup.items()]))
				else: raise Exception('\n[ERROR] too many options so you need to specify via lookup kwarg')
				specs = new_calcs[index]
				#---driped from interpret function (needs its own function)
				for path,sub in [(i,j) for i,j in catalog(specs) if type(j)==str and re.match('^\+',j)]:
					source = delve(self.vars,*sub.strip('+').split('/'))
					point = delve(specs,*path[:-1])
					point[path[-1]] = source
				#---end drip
				particular = next(key for key,val in options.items() if val==specs['specs'])
				return re.sub('.spec','.dat',particular)

		elif plotname:
			print "[DEVELOPMENT] need to handle plotnames here"
			import pdb;pdb.set_trace() #---legit
Example #2
0
def hypothesis(sweep,default=None):
	"""
	Code for sweeping an arbitrarily deep dictionary over many dimensions in combinations.
	Adapted from hypothesize so that no default is necessary. Only changed the name slightly for readability.
	"""
	#---create the default hypothesis
	if default==None: default = {}
	for pathway in sweep:
		if pathway['route'][0] not in default: default[pathway['route'][0]] = {}
		for i in range(1,len(pathway['route'])-1):
			level = delve(default,*pathway['route'][:i])
			if pathway['route'][i] not in level: level[pathway['route'][i]] = {}
		if len(pathway['route'])>1: 
			delve(default,*pathway['route'][:-1])[pathway['route'][-1]] = list(pathway['values'])[0]
	for i in default: default[i] = None if default[i] == {} else default[i]

	#---extract a list of lists of parameters to sweep over
	t = [i['values'] for i in sweep]
	#---note that this non-numpythonic way of doing this has not been rigorously tested
	#---note that the previous meshgrid method did not work on all types
	allcombos = list([[i] for i in t[0]])
	for s in t[1:]:
		for bi in range(len(allcombos)):
			b = allcombos.pop(0)
			for r in list(s): allcombos.append(b + [r])

	#---assemble a list of hypotheses from all possible combinations of the sweep values
	#---note that this code is general, and works for an arbitrarily deep dictionary
	hypotheses = []
	#---for each combo generate a new hypothesis
	for combo in allcombos:
		#---start with the default hypothesis
		newhypo = copy.deepcopy(default)
		#---each combo has a value and a route which is a sequence of dictionary keys
		#---...we loop over each route to set each final value for the sweep
		for routenum in range(len(sweep)):
			#---to get to the deepest part of that route we use tmp as a pointer
			#---...and iteratively traverse one level until the second to last level
			tmp = newhypo[sweep[routenum]['route'][0]]
			#---the following checks if we are already at the end of the dictionary 
			if type(newhypo[sweep[routenum]['route'][0]]) != dict:
				newhypo[sweep[routenum]['route'][0]] = combo[routenum]
			else:
				for i in sweep[routenum]['route'][1:-1]: tmp = tmp[i]
				#---at the final level, we now have a pointer to the lowest dictionary to set the value
				tmp[sweep[routenum]['route'][-1]] = combo[routenum]
		#---once we set all the values, the hypothesis is ready
		hypotheses.append(newhypo)	
	return hypotheses
Example #3
0
	def get_timeseries(self,sn_full):
		"""
		Typically EDR times are stored in the toc for a particular spot. 
		This function first figures out which spot you want and then returns the edr data.
		"""
		#---we apply the naming transformation to lookup the shortname in the toc, but below we will send out
		#---...the full name since the purpose of this function is to get filenames on disk for slicing
		sn = sn_full
		#---determine the spot, since the simulation could be in multiple spots
		spot_matches = [spotname for spotname,spot in self.spots.items() 
			if spotname[1]=='edr' and sn in self.toc[spotname]]
		if len(spot_matches)>1: 
			raise Exception('development. need a way to adjucate spots that have redundant simulations')
		elif len(spot_matches)==0:
			import ipdb;ipdb.set_trace()
			raise Exception('cannot find simulation %s in any of the spots: %s'%(sn,self.spots.keys()))
		else: spotname = spot_matches[0]
		edrtree = self.toc[spotname][sn]
		#---! current development only checks EDR files when they are needed instead of pre-populating
		for step in edrtree:
			for part in edrtree[step]:
				#---we have to send the full simulation name to the keyfinder
				fn = self.keyfinder(spotname)(sn_full,step,part)
				times = edrcheck(fn)
				keys = (spotname,sn,step,part)
				leaf = delve(self.toc,*keys)
				leaf['start'],leaf['stop'] = times
		#---naming convention
		#---according to the note above we pass the full simulation name back out for looking up files
		sequence = [((sn_full,step,part),tuple([edrtree[step][part][key] 
			for key in ['start','stop']]))
			for step in edrtree 
			for part in edrtree[step]]
		#---return a list of keys,times pairs
		return sequence
Example #4
0
	def verify(self,scrub=False):

		"""
		Check the post-processing filenames to make sure they are present.
		!!! Needs finished.
		"""

		status('passing through verify',tag='development')
		return

		#---! the following needs to be reincorprated into the workflow
		missing_files = []
		checks = []
		#---group files
		checks += [(('groups',sn,group),val[group]['fn']) 
			for sn,val in self.groups.items() for group in val]
		checks += [sl[name][key] for sn,sl in self.slices.items() 
			for name in sl for key in ['gro',self.trajectory_format] if key in sl[name]]
		for route,fn in checks:
			if not os.path.isfile(self.postdir+fn): missing_files.append([route,fn])
		if missing_files != [] and not scrub: 
			status('missing files: %s'%str(missing_files),tag='warning')
		elif missing_files != []:
			status('scrubbing deleted files from the workspace: %s'%str(missing_files),tag='warning')
			for route,fn in missing_files:
				del delve(self.__dict__,*route[:-1])[route[-1]]
		else: print '[STATUS] verified'
Example #5
0
	def interpret_specs(self,details,return_stubs=False):

		"""
		The YAML-formatted specifications file must be interpreted if it contains loops.
		"""

		#---this loop interpreter allows for a loop key at any point over specs in list or dict
		#---trim a copy of the specs so all loop keys are terminal
		details_trim = deepcopy(details)
		#---get all paths to a loop
		nonterm_paths = list([tuple(j) for j in set([tuple(i[:i.index('loop')+1]) 
			for i,j in catalog(details_trim) if 'loop' in i[:-1]])])
		#---some loops end in a list instead of a sub-dictionary
		nonterm_paths_list = list([tuple(j) for j in set([tuple(i[:i.index('loop')+1]) 
			for i,j in catalog(details_trim) if i[-1]=='loop'])])
		#---for each non-terminal path we save everything below and replace it with a key
		nonterms = []
		for path in nonterm_paths:
			base = deepcopy(delve(details_trim,*path[:-1]))
			nonterms.append(base['loop'])
			pivot = delve(details_trim,*path[:-1])
			pivot['loop'] = base['loop'].keys()
		#---hypothesize over the reduced specifications dictionary
		sweeps = [{'route':i[:-1],'values':j} for i,j in catalog(details_trim) if 'loop' in i]
		#---! note that you cannot have loops within loops (yet?) but this would be the right place for it
		if sweeps == []: new_calcs = [deepcopy(details)]
		else: new_calcs = hypothesis(sweeps,default=details_trim)
		new_calcs_stubs = deepcopy(new_calcs)
		#---replace non-terminal loop paths with their downstream dictionaries
		for ii,i in enumerate(nonterms):
			for nc in new_calcs:
				downkey = delve(nc,*nonterm_paths[ii][:-1])
				upkey = nonterm_paths[ii][-2]
				point = delve(nc,*nonterm_paths[ii][:-2])
				point[upkey] = nonterms[ii][downkey]
		#---loops over lists (instead of dictionaries) carry along the entire loop which most be removed
		for ii,i in enumerate(nonterm_paths_list):
			for nc in new_calcs: 
				#---! this section is supposed to excise the redundant "loop" list if it still exists
				#---! however the PPI project had calculation metadata that didn't require it so we just try
				try:
					pivot = delve(nc,*i[:-2]) if len(i)>2 else nc
					val = delve(nc,*i[:-1])[i[-2]]
					pivot[i[-2]] = val
				except: pass
		return new_calcs if not return_stubs else (new_calcs,new_calcs_stubs)
Example #6
0
	def treeparser_edr(self):

		"""
		A special tree parser gets times from edr files.
		"""

		#---perform this operation on any spotnames with a part named "edr"
		spots_edr = [i for i in self.spots.keys() if i[1]=='edr']
		#---prepare a list of edr files to parse first
		targets = []
		for spot in spots_edr:
			for sn in self.toc[spot].keys():
				steps = self.toc[spot][sn].keys()
				for step in steps:
					parts = self.toc[spot][sn][step].keys()
					for part in parts:
						fn = self.keyfinder(spot)(sn,step,part)
						keys = (spot,sn,step,part)
						targets.append((fn,keys))
		for ii,(fn,keys) in enumerate(targets):
			status('scanning EDR files',i=ii,looplen=len(targets),tag='scan')
			times = edrcheck(fn)
			leaf = delve(self.toc,*keys)
			leaf['start'],leaf['stop'] = times
Example #7
0
	def action(self,calculation_name=None):
	
		"""
		Parse a specifications file to make changes to a workspace.
		This function interprets the specifications and acts on it. 
		It manages the irreducible units of an omnicalc operation and ensures
		that the correct data are sent to analysis functions in the right order.
		"""

		status('parsing specs file',tag='status')

		#---load the yaml specifications file
		specs = self.load_specs()
		#### status('done loading specs',tag='status')		
		
		#---read simulations from the slices dictionary
		sns = specs['slices'].keys()
		#---variables are passed directly to self.vars
		self.vars = deepcopy(specs['variables']) if 'variables' in specs else {}

		#---apply "+"-delimited internal references in the yaml file
		for path,sub in [(i,j[-1]) for i,j in catalog(specs) if type(j)==list 
			and type(j)==str and re.match('^\+',j[-1])]:
			source = delve(self.vars,*sub.strip('+').split('/'))
			point = delve(specs,*path[:-1])
			point[path[-1]][point[path[-1]].index(sub)] = source
		for path,sub in [(i,j) for i,j in catalog(specs) if type(j)==str and re.match('^\+',j)]:
			source = delve(self.vars,*sub.strip('+').split('/'))
			point = delve(specs,*path[:-1])
			point[path[-1]] = source
		
		#---loop over all simulations to create groups and slices
		self.save(quiet=True)
		for route in [('slices',i) for i in sns]:
			root,sn = delve(specs,*route),route[-1]
			#---create groups
			if 'groups' in root:
				for group,select in root['groups'].items():
					kwargs = {'group':group,'select':select,'sn':sn}
					self.create_group(**kwargs)
				root.pop('groups')
			#---slice the trajectory
			if 'slices' in root:
				for sl,details in root['slices'].items(): 
					#---! use a default group here?
					for group in details['groups']:
						kwargs = {'sn':sn,'start':details['start'],
							'end':details['end'],'skip':details['skip'],'slice_name':sl}
						kwargs['group'] = group
						if 'pbc' in details: kwargs['pbc'] = details['pbc']
						self.create_slice(**kwargs)
				root.pop('slices')
			if root != {}: raise Exception('[ERROR] unprocessed specifications %s'%str(root))
			else: del root
		#---we only save after writing all slices. if the slicer fails autoreload will find preexisting files
		self.save(quiet=True)
		checktime()

		#---meta is passed to self.meta
		if 'meta' in specs:
			for sn in specs['meta']:
				self.meta[sn] = specs['meta'][sn]

		#---collections are groups of simulations
		if 'collections' in specs: self.vars['collections'] = specs['collections']

		#---calculations are executed last and organized in this loop
		if 'calculations' in specs:
			status('starting calculations',tag='status')
			#---note that most variables including calc mirror the specs file
			self.calc = dict(specs['calculations'])
			#---infer the correct order for the calculation keys from their upstream dependencies
			upstream_catalog = [i for i,j in catalog(self.calc) if 'upstream' in i]
			#---if there are no specs required to get the upstream data object the user can either 
			#---...use none/None as a placeholder or use the name as the key as in "upstream: name"
			for uu,uc in enumerate(upstream_catalog):
				if uc[-1]=='upstream': upstream_catalog[uu] = upstream_catalog[uu]+[delve(self.calc,*uc)]
			depends = {t[0]:[t[ii+1] for ii,i in enumerate(t) if ii<len(t)-1 and t[ii]=='upstream'] 
				for t in upstream_catalog}
			calckeys = [i for i in self.calc if i not in depends]
			#---check that the calckeys has enough elements 
			list(set(calckeys+[i for j in depends.values() for i in j]))			
			#---! come back to this!
			while any(depends):
				ii,i = depends.popitem()
				if all([j in calckeys for j in i]) and i!=[]: calckeys.append(ii)
				else: depends[ii] = i
			#---if a specific calculation name is given then only perform that calculation
			if not calculation_name is None: calckeys = [calculation_name]
			for calcname in calckeys:
				details = specs['calculations'][calcname]
				status('checking calculation %s'%calcname,tag='status')
				new_calcs = self.interpret_specs(details)
				#---perform calculations
				for calc in new_calcs:
					#---find the script with the funtion
					fns = []
					for (dirpath, dirnames, filenames) in os.walk('./'): 
						fns.extend([dirpath+'/'+fn for fn in filenames])
					search = filter(lambda x:re.match('^\.\/[^ate].+\/%s\.py$'%calcname,x),fns)
					if len(search)==0: raise Exception('\n[ERROR] cannot find %s.py'%calcname)
					elif len(search)>1: raise Exception('\n[ERROR] redundant matches: %s'%str(search))
					else:
						sys.path.insert(0,os.path.dirname(search[0]))
						function = unpacker(search[0],calcname)
						status('computing %s'%calcname,tag='loop')
						computer(function,calc=calc,workspace=self)
						self.save()
					checktime()
		self.save()