def fit(self,x=None, y=None,der=True): ''' fit the training data @return [nlZ, post] if der = False @return [nlZ, dnlZ, post] if der = True (default) where nlZ is the negative log marginal likelihood dnlZ is partial derivatives of nlZ w.r.t. each hyperparameter post is struct representation of the (approximate) posterior post consists of post.alpha, post.L, post.sW ''' if x != None: self.x = x if y != None: self.y = y # call inference method if isinstance(self.likfunc, lik.Erf): #or is instance(self.likfunc, lik.Logistic): uy = unique(self.y) ind = ( uy != 1 ) if any( uy[ind] != -1): raise Exception('You attempt classification using labels different from {+1,-1}') if not der: post, nlZ = self.inffunc.proceed(self.meanfunc, self.covfunc, self.likfunc, self.x, self.y, 2) self.nlZ = nlZ self.posterior = deepcopy(post) return nlZ, post else: post, nlZ, dnlZ = self.inffunc.proceed(self.meanfunc, self.covfunc, self.likfunc, self.x, self.y, 3) self.nlZ = nlZ self.dnlZ = deepcopy(dnlZ) self.posterior = deepcopy(post) return nlZ, dnlZ, post
def analyze(inffunc, meanfunc, covfunc, likfunc, x, y, der=False): ''' Middle Step, or maybe useful for experts to analyze sth. return [nlZ, post] if der = False or [nlZ, dnlZ, post] if der = True ''' if not meanfunc: meanfunc = mean.meanZero() if not covfunc: raise Exception('Covariance function cannot be empty') if not likfunc: likfunc = lik.likGauss([0.1]) if not inffunc: inffunc = inf.infExact() # if covFTIC then infFITC # call inference method if isinstance(likfunc, lik.likErf): #or isinstance(likfunc, likelihood.likLogistic) uy = unique(y) ind = ( uy != 1 ) if any( uy[ind] != -1): raise Exception('You attempt classification using labels different from {+1,-1}') print 'unique is not defined' if not der: vargout = inffunc.proceed(meanfunc, covfunc, likfunc, x, y, 2) post = vargout[0] nlZ = vargout[1] return [nlZ[0], post] # report -log marg lik, and post else: vargout = inffunc.proceed(meanfunc, covfunc, likfunc, x, y, 3) post = vargout[0] nlZ = vargout[1] dnlZ = vargout[2] return [nlZ[0], dnlZ, post] # report -log marg lik, derivatives and post
def fit(self, x=None, y=None, der=True): ''' Fit the training data. Update negative log marginal likelihood(nlZ), partial derivatives of nlZ w.r.t. each hyperparameter(dnlZ), and struct representation of the (approximate) posterior(post), which consists of post.alpha, post.L, post.sW. ''' if x != None: if x.ndim == 1: x = np.reshape(x, (x.shape[0],1)) self.x = x if y != None: if y.ndim == 1: y = np.reshape(y, (y.shape[0],1)) self.y = y if self.usingDefaultMean and self.meanfunc == None: c = np.mean(y) self.meanfunc = mean.Const(c) # adapt default prior mean wrt. training labels # call inference method if isinstance(self.likfunc, lik.Erf): #or is instance(self.likfunc, lik.Logistic): uy = unique(self.y) ind = ( uy != 1 ) if any( uy[ind] != -1): raise Exception('You attempt classification using labels different from {+1,-1}') if not der: post, nlZ = self.inffunc.proceed(self.meanfunc, self.covfunc, self.likfunc, self.x, self.y, 2) self.nlZ = nlZ self.posterior = deepcopy(post) return nlZ, post else: post, nlZ, dnlZ = self.inffunc.proceed(self.meanfunc, self.covfunc, self.likfunc, self.x, self.y, 3) self.nlZ = nlZ self.dnlZ = deepcopy(dnlZ) self.posterior = deepcopy(post) return nlZ, dnlZ, post
def getPosterior(self, x=None, y=None, der=True): ''' Fit the training data. Update negative log marginal likelihood(nlZ), partial derivatives of nlZ w.r.t. each hyperparameter(dnlZ), and struct representation of the (approximate) posterior(post), which consists of post.alpha, post.L, post.sW. nlZ, dnlZ, post = getPosterior(x, y, der=True)\n nlZ, post = getPosterior(x, y, der=False ) :param x: training inputs in shape (n,D) :param y: training labels in shape (n,1) :param boolean der: flag for whether to compute derivatives :return: negative log marginal likelihood (nlZ), derivatives of nlZ (dnlZ), posterior structure(post) You can print post to see descriptions of posterior. or see pyGPs.Core.inf for details. ''' # check wether the number of inputs and labels match if x is not None and y is not None: assert x.shape[0] == y.shape[0], "number of inputs and labels does not match" # check the shape of inputs # transform to the correct shape if not x is None: if x.ndim == 1: x = np.reshape(x, (x.shape[0],1)) self.x = x if not y is None: if y.ndim == 1: y = np.reshape(y, (y.shape[0],1)) self.y = y if self.usingDefaultMean and self.meanfunc is None: c = np.mean(y) self.meanfunc = mean.Const(c) # adapt default prior mean wrt. training labels # call inference method if isinstance(self.likfunc, lik.Erf): #or is instance(self.likfunc, lik.Logistic): uy = unique(self.y) ind = ( uy != 1 ) if any( uy[ind] != -1): raise Exception('You attempt classification using labels different from {+1,-1}') if not der: post, nlZ = self.inffunc.evaluate(self.meanfunc, self.covfunc, self.likfunc, self.x, self.y, 2) self.nlZ = nlZ self.posterior = deepcopy(post) return nlZ, post else: post, nlZ, dnlZ = self.inffunc.evaluate(self.meanfunc, self.covfunc, self.likfunc, self.x, self.y, 3) self.nlZ = nlZ self.dnlZ = deepcopy(dnlZ) self.posterior = deepcopy(post) return nlZ, dnlZ, post
def fit(self, x=None, y=None, der=True): ''' Fit the training data. Update negative log marginal likelihood(nlZ), partial derivatives of nlZ w.r.t. each hyperparameter(dnlZ), and struct representation of the (approximate) posterior(post), which consists of post.alpha, post.L, post.sW. ''' if x != None: if x.ndim == 1: x = np.reshape(x, (x.shape[0], 1)) self.x = x if y != None: if y.ndim == 1: y = np.reshape(y, (y.shape[0], 1)) self.y = y if self.usingDefaultMean and self.meanfunc == None: c = np.mean(y) self.meanfunc = mean.Const( c) # adapt default prior mean wrt. training labels # call inference method if isinstance(self.likfunc, lik.Erf): #or is instance(self.likfunc, lik.Logistic): uy = unique(self.y) ind = (uy != 1) if any(uy[ind] != -1): raise Exception( 'You attempt classification using labels different from {+1,-1}' ) if not der: post, nlZ = self.inffunc.evaluate(self.meanfunc, self.covfunc, self.likfunc, self.x, self.y, self.ScalePrior, 2) self.nlZ = nlZ self.posterior = deepcopy(post) return nlZ, post else: if self.ScalePrior: post, nlZ, dnlZ, dscale = self.inffunc.evaluate( self.meanfunc, self.covfunc, self.likfunc, self.x, self.y, self.ScalePrior, 3) else: post, nlZ, dnlZ = self.inffunc.evaluate( self.meanfunc, self.covfunc, self.likfunc, self.x, self.y, self.ScalePrior, 3) dscale = None self.nlZ = nlZ self.dnlZ = deepcopy(dnlZ) self.dscale = dscale self.posterior = deepcopy(post) return nlZ, dnlZ, post
def query(self, expr): self.results = [] count = 0 for card in self.cards: count += 1 if count % 100 == 0: sys.stdout.write("."); sys.stdout.flush() try: result = eval(expr, globals(), card) except: #import traceback; traceback.print_exc(); break continue # ignore for now if result: self.results.append(card) print if self.options.unique: # remove duplicates self.results = tools.unique(self.results, key=lambda c: c['name'])
def fit(self, x=None, y=None, der=True): ''' fit the training data @return [nlZ, post] if der = False @return [nlZ, dnlZ, post] if der = True (default) where nlZ is the negative log marginal likelihood dnlZ is partial derivatives of nlZ w.r.t. each hyperparameter post is struct representation of the (approximate) posterior post consists of post.alpha, post.L, post.sW ''' if x != None: self.x = x if y != None: self.y = y # call inference method if isinstance(self.likfunc, lik.Erf): #or is instance(self.likfunc, lik.Logistic): uy = unique(self.y) ind = (uy != 1) if any(uy[ind] != -1): raise Exception( 'You attempt classification using labels different from {+1,-1}' ) if not der: post, nlZ = self.inffunc.proceed(self.meanfunc, self.covfunc, self.likfunc, self.x, self.y, 2) self.nlZ = nlZ self.posterior = deepcopy(post) return nlZ, post else: post, nlZ, dnlZ = self.inffunc.proceed(self.meanfunc, self.covfunc, self.likfunc, self.x, self.y, 3) self.nlZ = nlZ self.dnlZ = deepcopy(dnlZ) self.posterior = deepcopy(post) return nlZ, dnlZ, post
def getPosterior(self, x=None, y=None, der=True): ''' Fit the training data. Update negative log marginal likelihood(nlZ), partial derivatives of nlZ w.r.t. each hyperparameter(dnlZ), and struct representation of the (approximate) posterior(post), which consists of post.alpha, post.L, post.sW. nlZ, dnlZ, post = getPosterior(x, y, der=True)\n nlZ, post = getPosterior(x, y, der=False ) :param x: training inputs in shape (n,D) :param y: training labels in shape (n,1) :param boolean der: flag for whether to compute derivatives :return: negative log marginal likelihood (nlZ), derivatives of nlZ (dnlZ), posterior structure(post) You can print post to see descriptions of posterior. or see pyGPs.Core.inf for details. ''' # check wether the number of inputs and labels match if x is not None and y is not None: assert x.shape[0] == y.shape[ 0], "number of inputs and labels does not match" # check the shape of inputs # transform to the correct shape if not x is None: if x.ndim == 1: x = np.reshape(x, (x.shape[0], 1)) self.x = x if not y is None: if y.ndim == 1: y = np.reshape(y, (y.shape[0], 1)) self.y = y if self.usingDefaultMean and self.meanfunc is None: c = np.mean(y) self.meanfunc = mean.Const( c) # adapt default prior mean wrt. training labels # call inference method if isinstance(self.likfunc, lik.Erf): #or is instance(self.likfunc, lik.Logistic): uy = unique(self.y) ind = (uy != 1) if any(uy[ind] != -1): raise Exception( 'You attempt classification using labels different from {+1,-1}' ) if not der: post, nlZ = self.inffunc.evaluate(self.meanfunc, self.covfunc, self.likfunc, self.x, self.y, 2) self.nlZ = nlZ self.posterior = deepcopy(post) return nlZ, post else: post, nlZ, dnlZ = self.inffunc.evaluate(self.meanfunc, self.covfunc, self.likfunc, self.x, self.y, 3) self.nlZ = nlZ self.dnlZ = deepcopy(dnlZ) self.posterior = deepcopy(post) return nlZ, dnlZ, post
def computer(function,**kwargs): """ Compute function figures out how to run a calculation over a simulation. """ work = kwargs['workspace'] calc = kwargs['calc'] #---perform a calculation over all collections if 'collections' in calc: cols = tuple([calc['collections']]) if type(calc['collections'])==str else calc['collections'] sns = unique(flatten([work.vars['collections'][i] for i in cols])) else: sns = work.sns() #---get slices (required) slice_name = calc['slice_name'] group = calc['group'] if 'group' in calc else None #---pass data to the function according to upstream data type incoming_type = calc['uptype'] jobs,data = [],dict([(sn,{}) for sn in sns]) combined_slices = [] for sn in sns: new_job = {'sn':sn,'slice_name':slice_name,'group':group} if incoming_type == 'simulation': #---prepare combinations in a dictionary if slice_name not in work.slice(sn): raise Exception( '\n[ERROR] the slices yaml file is missing a slice named "%s" for simulation "%s"'% (slice_name,sn)) try: mfp = work.slice(sn)[slice_name][group]['missing_frame_percent'] except: print "[WARNING] no missing frame percentage here" mfp = 0.0 if mfp>work.missing_frame_tolerance: status('upstream slice failure: %s,%s,%s missing_frame_percent=%.1f'%( sn,slice_name,group,mfp),tag='warning') continue #---defaulting to 'all' group if group is None new_job['grofile'] = work.postdir+\ work.slice(sn)[slice_name][group if group else 'all']['gro'] #---! xtc must become a flag. recommend 'xtc' becomes work.cursor[1] #---defaulting to 'all' group if group is None new_job['trajfile'] = work.postdir+work.slice(sn)[slice_name][group if group else 'all']['xtc'] if 'specs' not in calc: calc['specs'] = '' if 'upstream' in calc['specs']: #---if no loop on upstream you can use a list if type(calc['specs']['upstream'])==list: upstream_ask = dict([(key,None) for key in calc['specs']['upstream']]) elif type(calc['specs']['upstream'])==str: upstream_ask = {calc['specs']['upstream']:None} else: upstream_ask = calc['specs']['upstream'] for key,val in upstream_ask.items(): upspecs = deepcopy(work.calc[key]) #---identify the list of particular options along with the stubs options,stubs = work.interpret_specs(upspecs,return_stubs=True) #---identify paths and values over which we "whittle" the total list of specs whittles = [(i,j) for i,j in catalog(val)] #---if no loop on upstream pickles we interpret none and send blank specs if val in ['None','none',None]: specs = [options[ss] for r,v in whittles for ss,s in enumerate(stubs)] else: #---select the correct option by matching all catalogued routes from the incoming #---...key to the original calculation specs = [options[ss] for r,v in whittles for ss,s in enumerate(stubs) if delve(s['specs'],*r)==v] if len(specs)!=1 and 'loop' not in upspecs['slice_name']: import pdb;pdb.set_trace() raise Exception('[ERROR] redundant upstream selection %s'%str(select)) #---if there are multiple slices #---! note that we expect that if slice_names is a list it will be ordered here too for slicenum,spec in enumerate(specs): #---if the upstream calculation has a group then use it in the filename if not group: if 'group' in work.calc[key]: upgroup = work.calc[key]['group'] else: upgroup = None else: upgroup = group if not upgroup: sl = work.slice(sn)[spec['slice_name']] fn_base = re.findall('^v[0-9]+\.[0-9]+-[0-9]+-[0-9]+', work.slice(sn)[upspecs['slice_name']]['all']['filekey'] )[0]+'.%s'%key else: sl = work.slice(sn)[spec['slice_name']][upgroup] fn_base = '%s.%s'%(sl['filekey'],key) #---! moved the following block left recently fn = work.select_postdata(fn_base,spec) if not fn: print '[ERROR] missing %s'%fn import pdb;pdb.set_trace() outkey = key if len(specs)==1 else '%s%d'%(key,slicenum) #---before each calculation the master loop loads the filename stored here data[sn][outkey] = os.path.basename(fn)[:-4]+'dat' new_job['upstream'] = data[sn].keys() jobs.append(new_job) #---master loop for outgoing in jobs: sn,slice_name,group = outgoing['sn'],outgoing['slice_name'],outgoing['group'] #---if we combine slices for this calculation we use the whole time span in the base filename if type(slice_name)==list: #---! simple method for making the combination file key start = min([work.slice(sn)[s]['all' if not group else group]['start'] for s in slice_name]) end = max([work.slice(sn)[s]['all' if not group else group]['end'] for s in slice_name]) skip = work.slice(sn)[s]['all' if not group else group]['skip'] #---! this filekey construction means the user will have to anticipate the names of combos fn_base = '%s.%d-%d-%d.%s'%(work.prefixer(sn),start,end,skip,function.__name__) else: #---we index all calculations automatically in case we loop over specs later index,fn_key = -1,'' if not group: fn_base = re.findall('^v[0-9]+\.[0-9]+-[0-9]+-[0-9]+', work.slice(sn)[slice_name][ 'all' if not group else group]['filekey'])[0]+'.%s'%function.__name__ else: try: fn_base = work.slice(sn)[slice_name][ 'all' if not group else group]['filekey']+'.%s'%function.__name__ except: print "no group and cannot get base filename" import pdb;pdb.set_trace() prev = glob.glob(work.postdir+fn_base+'*.dat') if prev == []: index = 0 else: index = max(map(lambda x:int(re.findall('^.+\/%s\.n([0-9]+)\.dat'%fn_base,x)[0]),prev))+1 fn_key = '.n%d'%index fn = fn_base+fn_key+'.dat' #---safety check for file errors to prevent overwriting however this should be handled by indices if os.path.isfile(work.postdir+fn): raise Exception('[ERROR] %s exists'%(work.postdir+fn)) #---check for specs file with the exact same specifications exists = True if index != -1 and work.select_postdata(fn_base,calc) != None else False if not exists: import ipdb;ipdb.set_trace() status("%s %s"%(function.__name__,str(outgoing)),tag='compute') outgoing['workspace'] = work outgoing['calc'] = calc if 'upstream' in outgoing: sn = outgoing['sn'] outgoing['upstream'] = dict([(k, load(data[sn][k],work.postdir)) for k in outgoing['upstream']]) result,attrs = function(**outgoing) """ spec files are carefully constructed they prevent redundant calculations they allow us to loop over many parameters while saving files with a single index the calculation dictionary in the specs file contains meta-parameters for looping we are careful not to save meta parameters to the spec file we only save parameters which are relevant to the calculation itself the calculation dictionary in the spec file must therefore separate these parameters in a sub-dictionary called 'specs' we prefer attrs to be small and specific since attrs is also used to uniquely specify the data all big data should be stored as a result via numpy """ #---if any calculation specifications are not in attributes we warn the user here if 'specs' in calc: unaccounted = [i for i in calc['specs'] if i not in attrs] else: unaccounted = [] if 'upstream' in unaccounted and 'upstream' not in attrs: status('automatically appending upstream data',tag='status') unaccounted.remove('upstream') attrs['upstream'] = calc['specs']['upstream'] if any(unaccounted): print computer_error_attrs_passthrough+'\n\n' status('some calculation specs were not saved: %s'% str(unaccounted),tag='STATUS') import pdb;pdb.set_trace() store(result,fn,work.postdir,attrs=attrs) with open(work.postdir+fn_base+fn_key+'.spec','w') as fp: fp.write(json.dumps(attrs)+'\n') #---no modifications to work so no save return
import sys whereami = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.join("..")) import cardloader import groupquery import magicquery import tools # load all cards # expects config.txt in parent directory! loader = cardloader.CardLoader(sets=sys.argv[1:], path="../config.txt") cards = loader.load_cards() # remove duplicates cards = tools.unique(cards, key=lambda c: c['name']) # add a restriction: creatures must have power of 4 at least # change this to change the output! ^_^ def restrict(card): return card['power'] >= 4 # only keep mono-color creatures cards = [c for c in cards if c.type('creature') and not (c.multicolor or c.colorless) and restrict(c)] print "Inspecting", len(cards), "mono-colored creatures" colors = "RWBGU" types = {}