def LOO_Hg(ID_to_drop,Masterfile,Connectionsfile): datfile = 'Hgdata.dat' srtfile = 'Hgdata.srt' extfile = 'Hgdata.datext' extsrtfile = 'Hgdata.datextsrt' ndxfile = 'Hgdata.ndx' # read in and parse the entire validation data set MasterData = np.loadtxt(Masterfile,skiprows=1,delimiter=',') headers = open(Masterfile,'r').readline().strip().split(',') MKind = headers.index('ID') ID = MasterData[:,MKind].astype(int) SpC = MasterData[:,headers.index('SpC')].astype(int) event = MasterData[:,headers.index('Event')].astype(int) length = MasterData[:,headers.index('length')].astype(float) Hg_obs = MasterData[:,headers.index('Hg')].astype(float) # adjust the weights by a log transformation Wt = MasterData[:,headers.index('Wt')].astype(float) Wt[Wt<2]=1 Wt = np.log(Wt) + 1.0 MasterData[:,headers.index('Wt')]=Wt # first find the event, species, and length of the ID that is to be dropped - this is required # later for the forward calculation index_to_drop = np.nonzero(ID==ID_to_drop)[0] csp = SpC[index_to_drop] cev = event[index_to_drop] clen = length[index_to_drop] obsHg = Hg_obs[index_to_drop] # find all IDs that need to be dropped dropIDs = drop_ID(ID_to_drop,Connectionsfile) MKlist = np.setdiff1d(ID,dropIDs) # make a set out of the ID field # SORT MasterData by ID MasterData = MasterData[MasterData[:,MKind].argsort()] MasterData[:,MKind] = MasterData[:,MKind].astype(int) # excellent way to dereference the Master Data - requires that the MasterData matrix # be sorted by MKind (done above outside the loop) # for more details see: #http://stackoverflow.com/questions/5505380/most-efficient-way-to-pull-specified-rows-from-a-2-d-array CurrMasterData = MasterData[np.searchsorted(MasterData[:,MKind],MKlist),:] # now, write out the datfile in proper formats # from Donato's code: # SPC, Event, length, Result, DL, WT, ID (new version of 11/11 requires ID on the end ofp = open(datfile,'w') for line in CurrMasterData: ofp.write('%3d %7d %13.8f %13.8f %2d %13.8f %d\n' %(line[6],line[7],line[1],line[4],line[3],line[2],line[MKind])) ofp.close() # trim away any orphaned SpC or Events allSPC = np.unique(CurrMasterData[:,6]) allEVENT = np.unique(CurrMasterData[:,7]) # now read in the parameter starting values files and trim out irrelevant parameters spcdat = np.loadtxt('Hgspc.srt.master') currspc = spcdat[np.searchsorted(spcdat[:,0],allSPC),:] # see above for ref. on this technique ofp = open('Hgspc.srt','w') for line in currspc: ofp.write('%10d %20f\n' %(line[0],line[1])) ofp.close() eventdat = np.loadtxt('Hgevents.srt.master') currevent = eventdat[np.searchsorted(eventdat[:,0],allEVENT),:] # see above for ref. on this technique ofp = open('Hgevents.srt','w') for line in currevent: ofp.write('%10d %20f\n' %(line[0],line[1])) ofp.close() # memory cleanup del CurrMasterData # gqsort on Hgdata.dat, sorting by event, SPC, and DL gqsort_Hgdat(datfile,srtfile) # append a sequence number after sorting # analagous to MLEprep01.c and write out to extfile indat = np.loadtxt(srtfile) ofp = open(extfile,'w') i = 0 for line in indat: i += 1 ofp.write('%3d %7d %13.8f %13.8f %2d %13.8f %8d\n' %(line[0], line[1], line[2], line[3], line[4], line[5], i)) ofp.close() # now sort by SPC gqsort_Hgdatext(extfile,extsrtfile) # Finally, strip out the index from extsrtfile and save in the .ndx file indat = np.loadtxt(extsrtfile) ofp = open(ndxfile,'w') for line in indat: ofp.write('%8d\n' %(line[-1])) ofp.close() # call the external C-code Newton-Raphson parameter estimation code os.system('./NRparest') # finally, read in the results and make the Hg prediction for the left-out value # SpC parameters SpCpars = np.loadtxt('BestSPs') # Event parameters Eventpars = np.loadtxt('BestEPs') # pull the parameter values necessary for calculating Hg for the left-out value spcind = np.nonzero(SpCpars[:,0]==csp)[0] evind = np.nonzero(Eventpars[:,0]==cev)[0] # calculate mercury for this index cHg = calc_Hg(SpCpars[spcind,1],Eventpars[evind,1],clen) cHg = (np.exp(cHg)-1)/1000.0 # return the left-out modeled Hg concentraion. return cHg, obsHg
for i in np.arange(numdat): ndx_ofp.write('%d\n' % (i+1)) ndx_ofp.close() if (len_LOO - cDL > 1): # call the external C-code Newton-Raphson parameter estimation code os.system('./NRparest') # finally, read in the results and make the Hg prediction for the left-out value # SpC parameters SpCpars = np.loadtxt('BestSPs') # Event parameters Eventpars = np.loadtxt('BestEPs') # calculate mercury for this index cHg = calc_Hg(SpCpars[1],Eventpars[1],clen) cHg = (np.exp(cHg)-1)/1000.0 # read in sigma from the summary data file set1 = open('summaryRESULTS.dat','r').readlines() tmp = set1[1].strip().split() # tmp is [max_sig max_loglike total_iters best_iteration] ofp = open(main_output_file,'a') ofp.write('%17d%17.8e%17.8e%17.8e%17.8e%17d%17d' %(cID, cHg_obs, cHg, float(tmp[0]), float(tmp[1]), float(tmp[2]), float(tmp[3])) + '\n') ofp.close()
os.exit() # call the external C-code Newton-Raphson parameter estimation code os.system('./NRparest') # finally, read in the results and make the Hg prediction for the left-out value # SpC parameters SpCpars = np.loadtxt('BestSPs') # Event parameters Eventpars = np.loadtxt('BestEPs') # pull the parameter values necessary for calculating Hg for the left-out value spcind = np.nonzero(SpCpars[:,0]==csp)[0] evind = np.nonzero(Eventpars[:,0]==cev)[0] # calculate mercury for this index cHg = calc_Hg(SpCpars[spcind,1],Eventpars[evind,1],clen) cHg = (np.exp(cHg)-1)/1000.0 # return the left-out modeled Hg concentraion. #return cHg, obsHg