Exemple #1
0
def computer(function,**kwargs):

	"""
	Compute function figures out how to run a calculation over a simulation.
	"""

	work = kwargs['workspace']
	calc = kwargs['calc']
	
	#---perform a calculation over all collections
	if 'collections' in calc: 
		cols = tuple([calc['collections']]) if type(calc['collections'])==str else calc['collections']
		sns = unique(flatten([work.vars['collections'][i] for i in cols]))
	else: sns = work.sns()
	
	#---get slices (required)
	slice_name = calc['slice_name']
	group = calc['group'] if 'group' in calc else None
	
	#---pass data to the function according to upstream data type
	incoming_type = calc['uptype']
	jobs,data = [],dict([(sn,{}) for sn in sns])
	combined_slices = []
	for sn in sns:
		new_job = {'sn':sn,'slice_name':slice_name,'group':group}
		if incoming_type == 'simulation':
			#---prepare combinations in a dictionary
			if slice_name not in work.slice(sn):
				raise Exception(
					'\n[ERROR] the slices yaml file is missing a slice named "%s" for simulation "%s"'%
					(slice_name,sn))
			try: mfp = work.slice(sn)[slice_name][group]['missing_frame_percent']
			except: 
				print "[WARNING] no missing frame percentage here"
				mfp = 0.0
			if mfp>work.missing_frame_tolerance:
				status('upstream slice failure: %s,%s,%s missing_frame_percent=%.1f'%(
					sn,slice_name,group,mfp),tag='warning')
				continue
			#---defaulting to 'all' group if group is None
			new_job['grofile'] = work.postdir+\
				work.slice(sn)[slice_name][group if group else 'all']['gro']
			#---! xtc must become a flag. recommend 'xtc' becomes work.cursor[1]
			#---defaulting to 'all' group if group is None
			new_job['trajfile'] = work.postdir+work.slice(sn)[slice_name][group if group else 'all']['xtc']
		if 'specs' not in calc: calc['specs'] = ''
		if 'upstream' in calc['specs']:
			#---if no loop on upstream you can use a list
			if type(calc['specs']['upstream'])==list: 
				upstream_ask = dict([(key,None) for key in calc['specs']['upstream']])
			elif type(calc['specs']['upstream'])==str: 
				upstream_ask = {calc['specs']['upstream']:None}
			else: upstream_ask = calc['specs']['upstream']
			for key,val in upstream_ask.items():
				upspecs = deepcopy(work.calc[key])
				#---identify the list of particular options along with the stubs
				options,stubs = work.interpret_specs(upspecs,return_stubs=True)
				#---identify paths and values over which we "whittle" the total list of specs
				whittles = [(i,j) for i,j in catalog(val)]
				#---if no loop on upstream pickles we interpret none and send blank specs
				if val in ['None','none',None]: 
					specs = [options[ss] for r,v in whittles for ss,s in enumerate(stubs)]
				else:
					#---select the correct option by matching all catalogued routes from the incoming
					#---...key to the original calculation
					specs = [options[ss] for r,v in whittles for ss,s in enumerate(stubs) 
						if delve(s['specs'],*r)==v]
				if len(specs)!=1 and 'loop' not in upspecs['slice_name']: 
					import pdb;pdb.set_trace()
					raise Exception('[ERROR] redundant upstream selection %s'%str(select))
				#---if there are multiple slices
				#---! note that we expect that if slice_names is a list it will be ordered here too
				for slicenum,spec in enumerate(specs):
					#---if the upstream calculation has a group then use it in the filename
					if not group:
						if 'group' in work.calc[key]: upgroup = work.calc[key]['group']
						else: upgroup = None
					else: upgroup = group
					if not upgroup: 
						sl = work.slice(sn)[spec['slice_name']]
						fn_base = re.findall('^v[0-9]+\.[0-9]+-[0-9]+-[0-9]+',
							work.slice(sn)[upspecs['slice_name']]['all']['filekey']
							)[0]+'.%s'%key
					else: 
						sl = work.slice(sn)[spec['slice_name']][upgroup]
						fn_base = '%s.%s'%(sl['filekey'],key)
					#---! moved the following block left recently
					fn = work.select_postdata(fn_base,spec)
					if not fn: 
						print '[ERROR] missing %s'%fn
						import pdb;pdb.set_trace()
					outkey = key if len(specs)==1 else '%s%d'%(key,slicenum)
					#---before each calculation the master loop loads the filename stored here
					data[sn][outkey] = os.path.basename(fn)[:-4]+'dat'
			new_job['upstream'] = data[sn].keys()
		jobs.append(new_job)
	
	#---master loop
	for outgoing in jobs:
		sn,slice_name,group = outgoing['sn'],outgoing['slice_name'],outgoing['group']
		
		#---if we combine slices for this calculation we use the whole time span in the base filename
		if type(slice_name)==list:
			#---! simple method for making the combination file key
			start = min([work.slice(sn)[s]['all' if not group else group]['start'] for s in slice_name])
			end = max([work.slice(sn)[s]['all' if not group else group]['end'] for s in slice_name])
			skip = work.slice(sn)[s]['all' if not group else group]['skip']
			#---! this filekey construction means the user will have to anticipate the names of combos
			fn_base = '%s.%d-%d-%d.%s'%(work.prefixer(sn),start,end,skip,function.__name__)
		else:
			#---we index all calculations automatically in case we loop over specs later
			index,fn_key = -1,''
			if not group:
				fn_base = re.findall('^v[0-9]+\.[0-9]+-[0-9]+-[0-9]+',
					work.slice(sn)[slice_name][
					'all' if not group else group]['filekey'])[0]+'.%s'%function.__name__
			else:
				try: fn_base = work.slice(sn)[slice_name][
					'all' if not group else group]['filekey']+'.%s'%function.__name__
				except:
					print "no group and cannot get base filename"
					import pdb;pdb.set_trace()
		prev = glob.glob(work.postdir+fn_base+'*.dat')
		if prev == []: index = 0
		else: index = max(map(lambda x:int(re.findall('^.+\/%s\.n([0-9]+)\.dat'%fn_base,x)[0]),prev))+1
		fn_key = '.n%d'%index
		fn = fn_base+fn_key+'.dat'
		#---safety check for file errors to prevent overwriting however this should be handled by indices
		if os.path.isfile(work.postdir+fn): raise Exception('[ERROR] %s exists'%(work.postdir+fn))
		
		#---check for specs file with the exact same specifications
		exists = True if index != -1 and work.select_postdata(fn_base,calc) != None else False
		if not exists:
			import ipdb;ipdb.set_trace()
			status("%s %s"%(function.__name__,str(outgoing)),tag='compute')
			outgoing['workspace'] = work
			outgoing['calc'] = calc
			if 'upstream' in outgoing:
				sn = outgoing['sn']
				outgoing['upstream'] = dict([(k,
					load(data[sn][k],work.postdir)) for k in outgoing['upstream']])
			result,attrs = function(**outgoing)
			"""
			spec files are carefully constructed
			they prevent redundant calculations
			they allow us to loop over many parameters while saving files with a single index
			the calculation dictionary in the specs file contains meta-parameters for looping
			we are careful not to save meta parameters to the spec file
			we only save parameters which are relevant to the calculation itself
			the calculation dictionary in the spec file must therefore separate these parameters
			in a sub-dictionary called 'specs'
			we prefer attrs to be small and specific
			since attrs is also used to uniquely specify the data
			all big data should be stored as a result via numpy
			"""
			#---if any calculation specifications are not in attributes we warn the user here
			if 'specs' in calc: unaccounted = [i for i in calc['specs'] if i not in attrs]
			else: unaccounted = []
			if 'upstream' in unaccounted and 'upstream' not in attrs: 
				status('automatically appending upstream data',tag='status')
				unaccounted.remove('upstream')
				attrs['upstream'] = calc['specs']['upstream']
			if any(unaccounted):
				print computer_error_attrs_passthrough+'\n\n'
				status('some calculation specs were not saved: %s'%
					str(unaccounted),tag='STATUS')
				import pdb;pdb.set_trace()
			store(result,fn,work.postdir,attrs=attrs)
			with open(work.postdir+fn_base+fn_key+'.spec','w') as fp: fp.write(json.dumps(attrs)+'\n')
	#---no modifications to work so no save
	return
Exemple #2
0
def plotload(plotname,work,specfile=None,choice_override=None,use_group=False):

	"""
	Load postprocessing data for making a plot.
	Note that we currently do not use the specs items.
	"""

	#---read plot specification
	if 0:
		if not specfile: specfile = work.paths['specs_file']
		#---load the yaml specifications file
		if type(specfile)==str: specfile = [specfile]
		raw_specs = ''
		for sfn in specfile: 
			with open(sfn,'r') as fp: raw_specs += '\n'+fp.read()
		specs = yaml.load(raw_specs)
	specs = work.load_specs()

	#---merge automatic plots here
	if 0:
		if 'autoplots' in specs:
			for key,val in specs['autoplots'].items():
				if key in specs['plots']: 
					raise Exception('\n[ERROR] redundant names in plots and autoplots: %s'%key+
						", which is populated with django so check calculator.Calculation")
				else: specs['plots'][key] = deepcopy(val)
	plotspecs = specs['plots'][plotname]

	#---load the calculation from the workspace
	calcnames = plotspecs['calculation']

	if type(calcnames)==str: calcnames = [calcnames]
	datasets = {name:[] for name in calcnames}
	calcsets = {name:[] for name in calcnames}
	
	#---loop over calcnames requested in the plot specs
	for calcname in calcnames:
		
		calcs = work.interpret_specs(work.calc[calcname])
		if len(calcs)==0: raise Exception('[ERROR] failed to retrieve calculations')
	
		#---get the group from either plotspecs or the calculation or exception
		if 'group' in plotspecs: group = plotspecs['group']
		elif 'group' in work.calc[calcname]: group = work.calc[calcname]['group']
		else: group = None
		#---get the collection from either plotspecs or the upstream calculation
		if 'collections' in plotspecs: 
			collections = plotspecs['collections']
			if type(collections)==str: collections = [collections]
		else: collections = calc['collections']
		sns = flatten([work.vars['collections'][c] for c in collections])

		#---compile all upstream data
		data = [{} for c in calcs]
	
		#---iterate over the loop over upstream calculations
		for calcnum,calcwhittle in enumerate(calcs):

			status('upstream data type: %s'%str(calcwhittle),tag='load')
			calc = deepcopy(work.calc[calcname])
			#---loop over simulations 
			for snum,sn in enumerate(sns):
				status(sn.ljust(26),tag='load',i=snum,looplen=len(sns))
				#---slices in plotspecs or lookup from variables with plus-syntax
				#---! need to allow blank slices here so that the machine looks to calcs to get them
				if 'slices' in plotspecs and not re.match('^\+',plotspecs['slices']): 
					work.cursor = (work.c,work.trajectory_format)
					sl = work.slice(sn)[plotspecs['slices']]['all' if not group else group]
				elif 'slices' in plotspecs: 
					sl = deepcopy(delve(work.vars,*plotspecs['slices'].strip('+').split('/')))
					#---the slice might not have a filekey if its a combo
					if 'filekey' not in sl:
						#---! pbc and groups will usually be absent here
						start,end,skip,pbc = [sl[i] for i in 'start,end,skip,pbc'.split(',')]
						sl['filekey'] = '%s.%d-%d-%d'%(work.prefixer(sn),start,end,skip)
				else: raise Exception('[ERROR] cannot infer slices')
				#---compute base filename
				if not group: 
					#---! deprecated
					fn_base = re.findall('^v[0-9]+\.[0-9]+-[0-9]+-[0-9]+',sl['filekey'])[0]+'.%s'%calcname
				elif use_group:
					#---special settings here for loading certain kinds of data eg protein_abstractor
					fn_base = '%s.%s.pbc%s.%s'%(sl['filekey'],group,sl['pbc'],calcname)
				else: fn_base = '%s.%s'%(sl['filekey'],calcname)
				#---fill in upstream details in our replicate of the calculation specs
				for route,val in [(i,j) for i,j in catalog(calcwhittle)]:
					#---! the plot has to mimic the specs structure exactly otherwise error below
					try: endpoint = delve(work.calc[calcname],*route)
					except:
						import pdb;pdb.set_trace()
					if type(endpoint)==dict and 'loop' in endpoint: 
						try: 
							penultimate = delve(calc,*route[:-1])
							penultimate[route[-1]] = val
						except: pass
				#---get the dat file and package it
				fn = work.select_postdata(fn_base,calc,debug=True)
				if fn == None: 
					print '[ERROR] cannot locate a file necessary for plotting via work.select_postdata\n'+\
						'[ERROR] you probably need to fix your meta.yaml file and run "make compute"\n'+\
						'[ERROR] check the post directory at "%s" and the variables fn_base,calc\n'
					import pdb;pdb.set_trace()
				dat_fn = os.path.basename(fn)[:-4]+'dat'
				data[calcnum][sn] = {'data':load(dat_fn,work.postdir),
					'slice':sl,'group':group,'fn_base':fn_base}
		#---if only one calculation of this type then we elevate package
		if len(calcs)==1: calcs,data = calcs[0],data[0]
		datasets[calcname],calcsets[calcname] = data,calcs
	#---if only one upstream calculation we return that directly
	if len(datasets)==1: return datasets.values()[0],calcsets.values()[0]
	else: return datasets,calcsets