def blend_predict(data,wvl,filelist,blendranges,inrange,refpredict,toblend,masterlist,name_subs,ranges,ncs,maskfile,filenames,outputstr): y_full,fullnorm=ccam.pls_predict(data,ncs['full'],wvl,maskfile,loadfile=filenames['loadfile']['full'],mean_file=filenames['means_file']['full']) y_low,lownorm=ccam.pls_predict(data,ncs['low'],wvl,maskfile,loadfile=filenames['loadfile']['low'],mean_file=filenames['means_file']['low']) y_mid,midnorm=ccam.pls_predict(data,ncs['mid'],wvl,maskfile,loadfile=filenames['loadfile']['mid'],mean_file=filenames['means_file']['mid']) y_high,highnorm=ccam.pls_predict(data,ncs['high'],wvl,maskfile,loadfile=filenames['loadfile']['high'],mean_file=filenames['means_file']['high']) predicts=[y_full,y_low,y_mid,y_high] blended=ccam.submodels_blend(predicts,blendranges,inrange,refpredict,toblend,overwrite=False,noneg=False) targetlist,targetdists,targetamps,nshots=ccam.target_lookup(filelist,masterlist,name_subs) y_combined=numpy.zeros_like(y_high) print('Writing results to'+filenames['pred_csv_out'][outputstr]) with open(filenames['pred_csv_out'][outputstr],'w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') row=['','','','','Full ('+str(ranges['full'][0])+'-'+str(ranges['full'][1])+')','Low ('+str(ranges['low'][0])+'-'+str(ranges['low'][1])+')','Mid ('+str(ranges['mid'][0])+'-'+str(ranges['mid'][1])+')','High ('+str(ranges['high'][0])+'-'+str(ranges['high'][1])+')','Blended'] writer.writerow(row) row=['','','','Norm=',fullnorm,lownorm,midnorm,highnorm] writer.writerow(row) row=['','','','nc=',str(ncs['full']),str(ncs['low']),str(ncs['mid']),str(ncs['high'])] writer.writerow(row) row=['File','Target','Distance','Power',which_elem,which_elem,which_elem,which_elem,which_elem] writer.writerow(row) for i in range(0,len(y_combined)): row=[filelist[i],targetlist[i],targetdists[i],targetamps[i],y_full[i],y_low[i],y_mid[i],y_high[i],blended[i]] writer.writerow(row)
def pls_blend(self, comps_all): blended = numpy.zeros_like(comps_all[0]) for i in range(0, len(self.elems)): #reconstruct the blend input settings from the blend array file blendarray, blend_labels = ccam.read_csv(self.blend_array_dir + '\\' + self.elems[i] + '_blend_array.csv', 0, labelrow=True) blendarray = numpy.array(numpy.array(blendarray, dtype='float'), dtype='int') ranges = [] inrange = [] refpredict = [] toblend = [] predict = [] for k in comps_all: predict.append(k[:, i]) for j in range(len(blendarray[:, 0])): ranges.append(blendarray[j, 0:2].tolist()) inrange.append(blendarray[j, 2].tolist()) refpredict.append(blendarray[j, 3].tolist()) toblend.append(blendarray[j, 4:].tolist()) blended[:, i] = ccam.submodels_blend(predict, ranges, inrange, refpredict, toblend) return blended
def RMSE_blend(inputvals,inrange,refpredict,predicts,actual): ranges=sorted(inputvals[0:4]) toblend=[inputvals[4:6].tolist(),inputvals[6:8].tolist(),inputvals[8:10].tolist(),inputvals[10:12].tolist(),inputvals[12:14].tolist()] try: toblend=numpy.array(toblend,dtype='int') except: print('something is wrong') toblend=toblend.tolist() #print toblend blendranges=[[-20,ranges[0]],[ranges[0],ranges[1]],[ranges[1],ranges[2]],[ranges[2],ranges[3]],[ranges[3],120]] blended=ccam.submodels_blend(predicts,blendranges,inrange,refpredict,toblend,overwrite=False,noneg=False) RMSE=numpy.sqrt(numpy.mean((blended-actual)**2)) print (RMSE) return RMSE
if full is 12 to 20, blend the low and mid model using full as reference If full model is 20 to 25 blend mid and high using full as reference if full model is >25 use high Use full for all others Do not overwrite predictions that have already been set in a previous round of logic. """ predicts = [y_db_full, y_db_low, y_db_mid, y_db_high] ranges = [[-10, 12], [12, 20], [20, 25], [25, 100], [0, 100]] inrange = [0, 0, 0, 0, 0] refpredict = [0, 0, 0, 0, 0] toblend = [[1, 1], [1, 2], [2, 3], [3, 3], [0, 0]] blended2 = ccam.submodels_blend(predicts, ranges, inrange, refpredict, toblend, overwrite=False) truecomps = [ comps[:, compindex], comps[:, compindex], comps[:, compindex], comps[:, compindex], comps[:, compindex] ] predicts = [y_db_full, y_db_low, y_db_mid, y_db_high, blended2] plot_title = 'Final Model ' + which_elem + ' Predictions of Full Database' labels = ['Full', 'Low', 'Mid', 'High', 'Blended'] colors = ['c', 'r', 'g', 'b', 'k'] markers = ['o', '<', 'v', '^', '*'] ccam.plots.Plot1to1(truecomps, predicts,
def final_model_results(y,spect_index,namelist,compos,blend_settings,xminmax,yminmax,ranges,ncs,norms,which_elem,filenames,outfilestr,dpi=1000): imgnames=filenames['imgfiles'] predicts=[y['full'],y['low'],y['mid'],y['high']] print(blend_settings) blended2=ccam.submodels_blend(predicts,blend_settings['blendranges'],blend_settings['inrange'],blend_settings['refpredict'],blend_settings['toblend'],overwrite=False,noneg=False) #Create plots of the full model results (NOTE: these plots will show artificially "optimistic" results # within the range where the model was trained. These are meant to be used primarily to visualize how the models will do when extrapolating, #NOT for evaluation of model accuracy within its training range) predicts=[y['full'],y['low'],y['mid'],y['high'],blended2] if which_elem=='SiO2': which_elem_temp=r'SiO$_2$' if which_elem=='TiO2': which_elem_temp=r'TiO$_2$' if which_elem=='Al2O3': which_elem_temp=r'Al$_2$O$_3$' if which_elem=='FeOT': which_elem_temp=r'FeO$_T$' if which_elem=='MgO': which_elem_temp=r'MgO' if which_elem=='CaO': which_elem_temp=r'CaO' if which_elem=='Na2O': which_elem_temp=r'Na$_2$O' if which_elem=='K2O': which_elem_temp=r'K$_2$O' plot_title='Final Model '+which_elem_temp+' Predictions of Full Database' labels=['Full','Low ','Mid ','High ','Blended '] colors=['k','c','g','b','r'] markers=['o','<','v','^','o'] if outfilestr=='test': #dpi=1000 plot_title=which_elem_temp imgnames=filenames['imgfiles_test'] percentiles=[0,20,40,60,80] bins=numpy.percentile(compos[0],percentiles) #bins=numpy.max(compos[0])/20*numpy.arange(20) #bins=numpy.hstack(([0],numpy.logspace(-1,2,num=10)[0:-1])) index_bins=numpy.digitize(compos[0],bins) index_full=numpy.where((compos[0]>0) & (compos[0]<100)) index_low=numpy.where((compos[1]>ranges['low'][0]) & (compos[1]<ranges['low'][1])) index_mid=numpy.where((compos[2]>ranges['mid'][0]) & (compos[2]<ranges['mid'][1])) index_high=numpy.where((compos[3]>ranges['high'][0]) & (compos[3]<ranges['high'][1])) index_blend=numpy.where((compos[4]>0) & (compos[4]<100)) n_full=len(index_full[0]) n_low=len(index_low[0]) n_mid=len(index_mid[0]) n_high=len(index_high[0]) n_blend=len(index_blend[0]) n_bins=[] RMSEP_bins=[] RMSEP_bins_full=[] S2_bins=[] S2_bins_full=[] t_bins=[] f_bins=[] p_bins=[] for i in range(len(bins)): n_bins.append(numpy.sum(index_bins==i+1)) RMSEP_bins.append(numpy.sqrt(numpy.mean((predicts[4][index_bins==i+1]-compos[4][index_bins==i+1])**2))) RMSEP_bins_full.append(numpy.sqrt(numpy.mean((predicts[0][index_bins==i+1]-compos[0][index_bins==i+1])**2))) if RMSEP_bins_full[i]<RMSEP_bins[i]: print(i) print(RMSEP_bins[i]) print(RMSEP_bins_full[i]) print('stop') S2_bins.append((RMSEP_bins[i]/numpy.sqrt(2*(n_bins[i]-1)))**2) S2_bins_full.append((RMSEP_bins_full[i]/numpy.sqrt(2*(n_bins[i]-1)))**2) t_bins.append((RMSEP_bins_full[i]-RMSEP_bins[i])/numpy.sqrt(S2_bins_full[i]+S2_bins[i])) f_bins.append(((S2_bins_full[i]+S2_bins[i])**2)/((S2_bins_full[i]**2)/(n_bins[i]-1)+(S2_bins[i]**2)/(n_bins[i]-1))) p_bins.append(stats.t.sf(numpy.abs(t_bins[i]),f_bins[i])*2*100) RMSEP_full=(numpy.sqrt(numpy.mean((predicts[0][index_full]-compos[0][index_full])**2))) RMSEP_full_low=(numpy.sqrt(numpy.mean((predicts[0][index_low]-compos[0][index_low])**2))) RMSEP_full_mid=(numpy.sqrt(numpy.mean((predicts[0][index_mid]-compos[0][index_mid])**2))) RMSEP_full_high=(numpy.sqrt(numpy.mean((predicts[0][index_high]-compos[0][index_high])**2))) S2_full=(RMSEP_full/numpy.sqrt(2*(n_full-1)))**2 S2_full_low=(RMSEP_full_low/numpy.sqrt(2*(n_low-1)))**2 S2_full_mid=(RMSEP_full_mid/numpy.sqrt(2*(n_mid-1)))**2 S2_full_high=(RMSEP_full_high/numpy.sqrt(2*(n_high-1)))**2 RMSEP_low=(numpy.sqrt(numpy.mean((predicts[1][index_low]-compos[1][index_low])**2))) RMSEP_mid=(numpy.sqrt(numpy.mean((predicts[2][index_mid]-compos[2][index_mid])**2))) RMSEP_high=(numpy.sqrt(numpy.mean((predicts[3][index_high]-compos[3][index_high])**2))) RMSEP_blend=(numpy.sqrt(numpy.mean((predicts[4][index_blend]-compos[4][index_blend])**2))) RMSEP_blend_low=(numpy.sqrt(numpy.mean((predicts[4][index_low]-compos[4][index_low])**2))) RMSEP_blend_mid=(numpy.sqrt(numpy.mean((predicts[4][index_mid]-compos[4][index_mid])**2))) RMSEP_blend_high=(numpy.sqrt(numpy.mean((predicts[4][index_high]-compos[4][index_high])**2))) S2_blend=(RMSEP_blend/numpy.sqrt(2*(n_blend-1)))**2 S2_blend_low=(RMSEP_blend_low/numpy.sqrt(2*(n_low-1)))**2 S2_blend_mid=(RMSEP_blend_mid/numpy.sqrt(2*(n_mid-1)))**2 S2_blend_high=(RMSEP_blend_high/numpy.sqrt(2*(n_high-1)))**2 t_full_blend=(RMSEP_full-RMSEP_blend)/numpy.sqrt(S2_full+S2_blend) t_fulllow_blendlow=(RMSEP_full_low-RMSEP_blend_low)/numpy.sqrt(S2_full_low+S2_blend_low) t_fullmid_blendmid=(RMSEP_full_mid-RMSEP_blend_mid)/numpy.sqrt(S2_full_mid+S2_blend_mid) t_fullhigh_blendhigh=(RMSEP_full_high-RMSEP_blend_high)/numpy.sqrt(S2_full_high+S2_blend_high) f_full_blend=((S2_full+S2_blend)**2)/((S2_full**2)/(n_full-1)+(S2_blend**2)/(n_blend-1)) f_fulllow_blendlow=((S2_full_low+S2_blend_low)**2)/((S2_full_low**2)/(n_low-1)+(S2_blend_low**2)/(n_low-1)) f_fullmid_blendmid=((S2_full_mid+S2_blend_mid)**2)/((S2_full_mid**2)/(n_mid-1)+(S2_blend_mid**2)/(n_mid-1)) f_fullhigh_blendhigh=((S2_full_high+S2_blend_high)**2)/((S2_full_high**2)/(n_high-1)+(S2_blend_high**2)/(n_high-1)) p_full_blend=stats.t.sf(numpy.abs(t_full_blend),f_full_blend)*2 p_fulllow_blendlow=stats.t.sf(numpy.abs(t_fulllow_blendlow),f_fulllow_blendlow)*2 p_fullmid_blendmid=stats.t.sf(numpy.abs(t_fullmid_blendmid),f_fullmid_blendmid)*2 p_fullhigh_blendhigh=stats.t.sf(numpy.abs(t_fullhigh_blendhigh),f_fullhigh_blendhigh)*2 # labels=['PLS1 (RMSEP='+str(round(RMSEP_full,2))+')','Low (RMSEP='+str(round(RMSEP_low,2))+')','Mid (RMSEP='+str(round(RMSEP_mid,2))+')','High (RMSEP='+str(round(RMSEP_high,2))+')','Blended Submodels (RMSEP='+str(round(RMSEP_blend,2))+')'] labels=['Full Model','Low','Mid','High','Blended'] f=operator.itemgetter(0,4) yminmax[0]=numpy.min(f(predicts)) ccam.plots.Plot1to1(list(f(compos)),list(f(predicts)),plot_title,list(f(labels)),list(f(colors)),list(f(markers)),imgnames['blended_full'],xminmax=xminmax,yminmax=yminmax,dpi=1000) rel_err=[] for i in list(range(len(f(compos)))): abs_err=numpy.abs(f(compos)[i]-f(predicts)[i]) rel_err.append(abs_err/f(compos)[i]*100) labels_rel_err=['PLS1','Blended Submodels'] ccam.plots.Plot1to1(list(f(compos)),rel_err,plot_title,labels_rel_err,list(f(colors)),list(f(markers)),imgnames['blended_full_rel'],xminmax=[numpy.min(list(f(compos))),100],yminmax=[numpy.min(rel_err),numpy.max(rel_err)],loglog=True,one_to_one=False,ylabel='Relative Error (%)',dpi=1000) cwd=os.getcwd() with open(cwd+'\\Testset_RMSEP_summary.csv','a',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') writer.writerow([which_elem]) row=['Ranges'] for i in blend_settings['blendranges']: for j in i: row.append(j) writer.writerow(row) row=['To blend:'] for i in blend_settings['toblend']: for j in i: row.append(j) writer.writerow(row) #row=['Ref predict:'] #for i in blend_settings['refpredict']: # row.append(i) #writer.writerow(row) #row=['In range:'] #for i in blend_settings['inrange']: # row.append(i) #writer.writerow(row) writer.writerow(['Composition Range','# of samples','RMSEP full','RMSEP Blended','p-value','RMSEP Sub-Model']) writer.writerow([str(ranges['full'][0])+'-'+str(ranges['full'][1]),str(n_full),str(RMSEP_full),str(RMSEP_blend),str(p_full_blend)]) writer.writerow([str(ranges['low'][0])+'-'+str(ranges['low'][1]),str(n_low),str(RMSEP_full_low),str(RMSEP_blend_low),str(p_fulllow_blendlow),str(RMSEP_low)]) writer.writerow([str(ranges['mid'][0])+'-'+str(ranges['mid'][1]),str(n_mid),str(RMSEP_full_mid),str(RMSEP_blend_mid),str(p_fullmid_blendmid),str(RMSEP_mid)]) writer.writerow([str(ranges['high'][0])+'-'+str(ranges['high'][1]),str(n_high),str(RMSEP_full_high),str(RMSEP_blend_high),str(p_fullhigh_blendhigh),str(RMSEP_high)]) for i in range(len(p_bins)): try: row=[str(round(bins[i],2))+'-'+str(round(bins[i+1],2))] except: row=[str(round(bins[i],2))+'-100'] row.append(n_bins[i]) row.append(RMSEP_bins_full[i]) row.append(RMSEP_bins[i]) row.append(p_bins[i]) print(i) print(row) writer.writerow(row) yminmax[0]=numpy.min(predicts) ccam.plots.Plot1to1(compos[1:-1],predicts[1:-1],which_elem_temp,labels[1:-1],['r','g','b'],markers[1:-1],imgnames['all'],xminmax=xminmax,yminmax=yminmax,dpi=dpi) yminmax[0]=numpy.min(predicts[4]) ccam.plots.Plot1to1([compos[4]],[predicts[4]],which_elem_temp,[labels[4]],[colors[4]],[markers[4]],imgnames['blended'],xminmax=xminmax,yminmax=yminmax,dpi=dpi) yminmax[0]=numpy.min(predicts[0]) ccam.plots.Plot1to1([compos[0]],[predicts[0]],plot_title,[labels[0]],[colors[0]],[markers[0]],imgnames['full'],xminmax=xminmax,yminmax=yminmax,dpi=dpi) yminmax[0]=numpy.min(predicts[1]) ccam.plots.Plot1to1([compos[1]],[predicts[1]],plot_title,[labels[1]],[colors[1]],[markers[1]],imgnames['low'],xminmax=xminmax,yminmax=yminmax,dpi=dpi) yminmax[0]=numpy.min(predicts[2]) ccam.plots.Plot1to1([compos[2]],[predicts[2]],plot_title,[labels[2]],[colors[2]],[markers[2]],imgnames['mid'],xminmax=xminmax,yminmax=yminmax,dpi=dpi) yminmax[0]=numpy.min(predicts[3]) ccam.plots.Plot1to1([compos[3]],[predicts[3]],plot_title,[labels[3]],[colors[3]],[markers[3]],imgnames['high'],xminmax=xminmax,yminmax=yminmax,dpi=dpi) with open(filenames['pred_csv_out'][outfilestr],'w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') row=['','','','Full ('+str(ranges['full'][0])+'-'+str(ranges['full'][1])+')','Low ('+str(ranges['low'][0])+'-'+str(ranges['low'][1])+')','Mid ('+str(ranges['mid'][0])+'-'+str(ranges['mid'][1])+')','High ('+str(ranges['high'][0])+'-'+str(ranges['high'][0])+')','Blended'] writer.writerow(row) row=['','','Norm=',norms['full'],norms['low'],norms['mid'],norms['high']] writer.writerow(row) row=['','','nc=',str(ncs['full']),str(ncs['low']),str(ncs['mid']),str(ncs['high'])] writer.writerow(row) row=['Target','Index','True Comp',which_elem,which_elem,which_elem,which_elem] writer.writerow(row) for i in range(0,len(namelist)): row=[namelist[i],spect_index[i],str(compos[0][i]),y['full'][i],y['low'][i],y['mid'][i],y['high'][i],blended2[i]] writer.writerow(row)