Ejemplo n.º 1
0
def blend_predict(data,wvl,filelist,blendranges,inrange,refpredict,toblend,masterlist,name_subs,ranges,ncs,maskfile,filenames,outputstr):
    
    
    y_full,fullnorm=ccam.pls_predict(data,ncs['full'],wvl,maskfile,loadfile=filenames['loadfile']['full'],mean_file=filenames['means_file']['full'])
    y_low,lownorm=ccam.pls_predict(data,ncs['low'],wvl,maskfile,loadfile=filenames['loadfile']['low'],mean_file=filenames['means_file']['low'])
    y_mid,midnorm=ccam.pls_predict(data,ncs['mid'],wvl,maskfile,loadfile=filenames['loadfile']['mid'],mean_file=filenames['means_file']['mid'])
    y_high,highnorm=ccam.pls_predict(data,ncs['high'],wvl,maskfile,loadfile=filenames['loadfile']['high'],mean_file=filenames['means_file']['high'])
    
    predicts=[y_full,y_low,y_mid,y_high]
    
    blended=ccam.submodels_blend(predicts,blendranges,inrange,refpredict,toblend,overwrite=False,noneg=False)
    
    targetlist,targetdists,targetamps,nshots=ccam.target_lookup(filelist,masterlist,name_subs)
    
    y_combined=numpy.zeros_like(y_high)
    print('Writing results to'+filenames['pred_csv_out'][outputstr])
    with open(filenames['pred_csv_out'][outputstr],'w',newline='') as writefile:
            writer=csv.writer(writefile,delimiter=',')
            row=['','','','','Full ('+str(ranges['full'][0])+'-'+str(ranges['full'][1])+')','Low ('+str(ranges['low'][0])+'-'+str(ranges['low'][1])+')','Mid ('+str(ranges['mid'][0])+'-'+str(ranges['mid'][1])+')','High ('+str(ranges['high'][0])+'-'+str(ranges['high'][1])+')','Blended']
            writer.writerow(row)
            row=['','','','Norm=',fullnorm,lownorm,midnorm,highnorm]
            writer.writerow(row)
            row=['','','','nc=',str(ncs['full']),str(ncs['low']),str(ncs['mid']),str(ncs['high'])]
            writer.writerow(row)
            row=['File','Target','Distance','Power',which_elem,which_elem,which_elem,which_elem,which_elem]
            writer.writerow(row)
            
            for i in range(0,len(y_combined)):
                row=[filelist[i],targetlist[i],targetdists[i],targetamps[i],y_full[i],y_low[i],y_mid[i],y_high[i],blended[i]]
                writer.writerow(row)        
Ejemplo n.º 2
0
    def pls_blend(self, comps_all):
        blended = numpy.zeros_like(comps_all[0])
        for i in range(0, len(self.elems)):
            #reconstruct the blend input settings from the blend array file
            blendarray, blend_labels = ccam.read_csv(self.blend_array_dir +
                                                     '\\' + self.elems[i] +
                                                     '_blend_array.csv',
                                                     0,
                                                     labelrow=True)
            blendarray = numpy.array(numpy.array(blendarray, dtype='float'),
                                     dtype='int')
            ranges = []
            inrange = []
            refpredict = []
            toblend = []
            predict = []
            for k in comps_all:
                predict.append(k[:, i])
            for j in range(len(blendarray[:, 0])):
                ranges.append(blendarray[j, 0:2].tolist())
                inrange.append(blendarray[j, 2].tolist())
                refpredict.append(blendarray[j, 3].tolist())
                toblend.append(blendarray[j, 4:].tolist())

            blended[:, i] = ccam.submodels_blend(predict, ranges, inrange,
                                                 refpredict, toblend)

        return blended
Ejemplo n.º 3
0
def RMSE_blend(inputvals,inrange,refpredict,predicts,actual):
    ranges=sorted(inputvals[0:4])
    toblend=[inputvals[4:6].tolist(),inputvals[6:8].tolist(),inputvals[8:10].tolist(),inputvals[10:12].tolist(),inputvals[12:14].tolist()]
    try:
        toblend=numpy.array(toblend,dtype='int')
    except:
        print('something is wrong')
    toblend=toblend.tolist()
    #print toblend
    blendranges=[[-20,ranges[0]],[ranges[0],ranges[1]],[ranges[1],ranges[2]],[ranges[2],ranges[3]],[ranges[3],120]]     
    blended=ccam.submodels_blend(predicts,blendranges,inrange,refpredict,toblend,overwrite=False,noneg=False)
    RMSE=numpy.sqrt(numpy.mean((blended-actual)**2))
    print (RMSE)
    return RMSE
if full is 12 to 20, blend the low and mid model using full as reference
If full model is 20 to 25 blend mid and high using full as reference
if full model is >25 use high
Use full for all others
Do not overwrite predictions that have already been set in a previous round of logic.
"""

predicts = [y_db_full, y_db_low, y_db_mid, y_db_high]
ranges = [[-10, 12], [12, 20], [20, 25], [25, 100], [0, 100]]
inrange = [0, 0, 0, 0, 0]
refpredict = [0, 0, 0, 0, 0]
toblend = [[1, 1], [1, 2], [2, 3], [3, 3], [0, 0]]

blended2 = ccam.submodels_blend(predicts,
                                ranges,
                                inrange,
                                refpredict,
                                toblend,
                                overwrite=False)

truecomps = [
    comps[:, compindex], comps[:, compindex], comps[:, compindex],
    comps[:, compindex], comps[:, compindex]
]
predicts = [y_db_full, y_db_low, y_db_mid, y_db_high, blended2]
plot_title = 'Final Model ' + which_elem + ' Predictions of Full Database'
labels = ['Full', 'Low', 'Mid', 'High', 'Blended']
colors = ['c', 'r', 'g', 'b', 'k']
markers = ['o', '<', 'v', '^', '*']

ccam.plots.Plot1to1(truecomps,
                    predicts,
Ejemplo n.º 5
0
def final_model_results(y,spect_index,namelist,compos,blend_settings,xminmax,yminmax,ranges,ncs,norms,which_elem,filenames,outfilestr,dpi=1000):
    imgnames=filenames['imgfiles']    
    predicts=[y['full'],y['low'],y['mid'],y['high']]  
    print(blend_settings)
    blended2=ccam.submodels_blend(predicts,blend_settings['blendranges'],blend_settings['inrange'],blend_settings['refpredict'],blend_settings['toblend'],overwrite=False,noneg=False)
    #Create plots of the full model results (NOTE: these plots will show artificially "optimistic" results
    # within the range where the model was trained. These are meant to be used primarily to visualize how the models will do when extrapolating,
    #NOT for evaluation of model accuracy within its training range)
    predicts=[y['full'],y['low'],y['mid'],y['high'],blended2]
    
    if which_elem=='SiO2':
        which_elem_temp=r'SiO$_2$'
    if which_elem=='TiO2':
        which_elem_temp=r'TiO$_2$'
    if which_elem=='Al2O3':
        which_elem_temp=r'Al$_2$O$_3$'
    if which_elem=='FeOT':
        which_elem_temp=r'FeO$_T$'
    if which_elem=='MgO':
        which_elem_temp=r'MgO'
    if which_elem=='CaO':
        which_elem_temp=r'CaO'
    if which_elem=='Na2O':
        which_elem_temp=r'Na$_2$O'
    if which_elem=='K2O':
        which_elem_temp=r'K$_2$O'
    plot_title='Final Model '+which_elem_temp+' Predictions of Full Database'
    labels=['Full','Low ','Mid ','High ','Blended ']
    colors=['k','c','g','b','r']
    markers=['o','<','v','^','o']
    
    if outfilestr=='test':
        #dpi=1000
        
        plot_title=which_elem_temp
        imgnames=filenames['imgfiles_test']
        
        percentiles=[0,20,40,60,80]
        bins=numpy.percentile(compos[0],percentiles)
        #bins=numpy.max(compos[0])/20*numpy.arange(20)
        #bins=numpy.hstack(([0],numpy.logspace(-1,2,num=10)[0:-1]))        
        index_bins=numpy.digitize(compos[0],bins)
        
        index_full=numpy.where((compos[0]>0) & (compos[0]<100))
        index_low=numpy.where((compos[1]>ranges['low'][0]) & (compos[1]<ranges['low'][1]))       
        index_mid=numpy.where((compos[2]>ranges['mid'][0]) & (compos[2]<ranges['mid'][1]))
        index_high=numpy.where((compos[3]>ranges['high'][0]) & (compos[3]<ranges['high'][1]))
        index_blend=numpy.where((compos[4]>0) & (compos[4]<100))
            
        n_full=len(index_full[0])
        n_low=len(index_low[0])
        n_mid=len(index_mid[0])
        n_high=len(index_high[0])
        n_blend=len(index_blend[0])
        n_bins=[]
        
        
        RMSEP_bins=[]
        RMSEP_bins_full=[]
        S2_bins=[]
        S2_bins_full=[]
        t_bins=[]
        f_bins=[]
        p_bins=[]
        for i in range(len(bins)):
            n_bins.append(numpy.sum(index_bins==i+1))
            RMSEP_bins.append(numpy.sqrt(numpy.mean((predicts[4][index_bins==i+1]-compos[4][index_bins==i+1])**2)))
            RMSEP_bins_full.append(numpy.sqrt(numpy.mean((predicts[0][index_bins==i+1]-compos[0][index_bins==i+1])**2)))   
            if RMSEP_bins_full[i]<RMSEP_bins[i]:
                print(i)                
                print(RMSEP_bins[i])
                print(RMSEP_bins_full[i])
                print('stop')
            S2_bins.append((RMSEP_bins[i]/numpy.sqrt(2*(n_bins[i]-1)))**2)
            S2_bins_full.append((RMSEP_bins_full[i]/numpy.sqrt(2*(n_bins[i]-1)))**2)
            t_bins.append((RMSEP_bins_full[i]-RMSEP_bins[i])/numpy.sqrt(S2_bins_full[i]+S2_bins[i]))
            f_bins.append(((S2_bins_full[i]+S2_bins[i])**2)/((S2_bins_full[i]**2)/(n_bins[i]-1)+(S2_bins[i]**2)/(n_bins[i]-1)))
            p_bins.append(stats.t.sf(numpy.abs(t_bins[i]),f_bins[i])*2*100)
        

        RMSEP_full=(numpy.sqrt(numpy.mean((predicts[0][index_full]-compos[0][index_full])**2)))
        RMSEP_full_low=(numpy.sqrt(numpy.mean((predicts[0][index_low]-compos[0][index_low])**2)))
        RMSEP_full_mid=(numpy.sqrt(numpy.mean((predicts[0][index_mid]-compos[0][index_mid])**2)))
        RMSEP_full_high=(numpy.sqrt(numpy.mean((predicts[0][index_high]-compos[0][index_high])**2)))
        
        
        S2_full=(RMSEP_full/numpy.sqrt(2*(n_full-1)))**2
        S2_full_low=(RMSEP_full_low/numpy.sqrt(2*(n_low-1)))**2
        S2_full_mid=(RMSEP_full_mid/numpy.sqrt(2*(n_mid-1)))**2
        S2_full_high=(RMSEP_full_high/numpy.sqrt(2*(n_high-1)))**2
       
        RMSEP_low=(numpy.sqrt(numpy.mean((predicts[1][index_low]-compos[1][index_low])**2)))
        RMSEP_mid=(numpy.sqrt(numpy.mean((predicts[2][index_mid]-compos[2][index_mid])**2)))
        RMSEP_high=(numpy.sqrt(numpy.mean((predicts[3][index_high]-compos[3][index_high])**2)))
        
              
        RMSEP_blend=(numpy.sqrt(numpy.mean((predicts[4][index_blend]-compos[4][index_blend])**2)))
        RMSEP_blend_low=(numpy.sqrt(numpy.mean((predicts[4][index_low]-compos[4][index_low])**2)))
        RMSEP_blend_mid=(numpy.sqrt(numpy.mean((predicts[4][index_mid]-compos[4][index_mid])**2)))
        RMSEP_blend_high=(numpy.sqrt(numpy.mean((predicts[4][index_high]-compos[4][index_high])**2)))
        
        S2_blend=(RMSEP_blend/numpy.sqrt(2*(n_blend-1)))**2
        S2_blend_low=(RMSEP_blend_low/numpy.sqrt(2*(n_low-1)))**2
        S2_blend_mid=(RMSEP_blend_mid/numpy.sqrt(2*(n_mid-1)))**2
        S2_blend_high=(RMSEP_blend_high/numpy.sqrt(2*(n_high-1)))**2
        
        t_full_blend=(RMSEP_full-RMSEP_blend)/numpy.sqrt(S2_full+S2_blend)
        t_fulllow_blendlow=(RMSEP_full_low-RMSEP_blend_low)/numpy.sqrt(S2_full_low+S2_blend_low)
        t_fullmid_blendmid=(RMSEP_full_mid-RMSEP_blend_mid)/numpy.sqrt(S2_full_mid+S2_blend_mid)
        t_fullhigh_blendhigh=(RMSEP_full_high-RMSEP_blend_high)/numpy.sqrt(S2_full_high+S2_blend_high)
        
        f_full_blend=((S2_full+S2_blend)**2)/((S2_full**2)/(n_full-1)+(S2_blend**2)/(n_blend-1))
        f_fulllow_blendlow=((S2_full_low+S2_blend_low)**2)/((S2_full_low**2)/(n_low-1)+(S2_blend_low**2)/(n_low-1))
        f_fullmid_blendmid=((S2_full_mid+S2_blend_mid)**2)/((S2_full_mid**2)/(n_mid-1)+(S2_blend_mid**2)/(n_mid-1))
        f_fullhigh_blendhigh=((S2_full_high+S2_blend_high)**2)/((S2_full_high**2)/(n_high-1)+(S2_blend_high**2)/(n_high-1))        
        
        p_full_blend=stats.t.sf(numpy.abs(t_full_blend),f_full_blend)*2
        p_fulllow_blendlow=stats.t.sf(numpy.abs(t_fulllow_blendlow),f_fulllow_blendlow)*2
        p_fullmid_blendmid=stats.t.sf(numpy.abs(t_fullmid_blendmid),f_fullmid_blendmid)*2
        p_fullhigh_blendhigh=stats.t.sf(numpy.abs(t_fullhigh_blendhigh),f_fullhigh_blendhigh)*2

#        labels=['PLS1 (RMSEP='+str(round(RMSEP_full,2))+')','Low (RMSEP='+str(round(RMSEP_low,2))+')','Mid (RMSEP='+str(round(RMSEP_mid,2))+')','High (RMSEP='+str(round(RMSEP_high,2))+')','Blended Submodels (RMSEP='+str(round(RMSEP_blend,2))+')']
        labels=['Full Model','Low','Mid','High','Blended']
        f=operator.itemgetter(0,4)
        yminmax[0]=numpy.min(f(predicts))
    
        ccam.plots.Plot1to1(list(f(compos)),list(f(predicts)),plot_title,list(f(labels)),list(f(colors)),list(f(markers)),imgnames['blended_full'],xminmax=xminmax,yminmax=yminmax,dpi=1000)
        rel_err=[]
        for i in list(range(len(f(compos)))):
            abs_err=numpy.abs(f(compos)[i]-f(predicts)[i])
            rel_err.append(abs_err/f(compos)[i]*100)
            
        labels_rel_err=['PLS1','Blended Submodels']    
        ccam.plots.Plot1to1(list(f(compos)),rel_err,plot_title,labels_rel_err,list(f(colors)),list(f(markers)),imgnames['blended_full_rel'],xminmax=[numpy.min(list(f(compos))),100],yminmax=[numpy.min(rel_err),numpy.max(rel_err)],loglog=True,one_to_one=False,ylabel='Relative Error (%)',dpi=1000)
        
        
        
        cwd=os.getcwd()
        with open(cwd+'\\Testset_RMSEP_summary.csv','a',newline='') as writefile:
            writer=csv.writer(writefile,delimiter=',')
            writer.writerow([which_elem])   
            row=['Ranges']
            for i in blend_settings['blendranges']:
                for j in i:
                    row.append(j)
            writer.writerow(row)
            row=['To blend:']
            for i in blend_settings['toblend']:
                for j in i:
                    row.append(j)
            writer.writerow(row)
            #row=['Ref predict:']
            #for i in blend_settings['refpredict']:
            #    row.append(i)
            #writer.writerow(row)
            #row=['In range:']
            #for i in blend_settings['inrange']:
            #    row.append(i)
            #writer.writerow(row)
            
            writer.writerow(['Composition Range','# of samples','RMSEP full','RMSEP Blended','p-value','RMSEP Sub-Model'])
            writer.writerow([str(ranges['full'][0])+'-'+str(ranges['full'][1]),str(n_full),str(RMSEP_full),str(RMSEP_blend),str(p_full_blend)])
            writer.writerow([str(ranges['low'][0])+'-'+str(ranges['low'][1]),str(n_low),str(RMSEP_full_low),str(RMSEP_blend_low),str(p_fulllow_blendlow),str(RMSEP_low)])
            writer.writerow([str(ranges['mid'][0])+'-'+str(ranges['mid'][1]),str(n_mid),str(RMSEP_full_mid),str(RMSEP_blend_mid),str(p_fullmid_blendmid),str(RMSEP_mid)])
            writer.writerow([str(ranges['high'][0])+'-'+str(ranges['high'][1]),str(n_high),str(RMSEP_full_high),str(RMSEP_blend_high),str(p_fullhigh_blendhigh),str(RMSEP_high)])
                       
            for i in range(len(p_bins)):
                try:
                    row=[str(round(bins[i],2))+'-'+str(round(bins[i+1],2))]
                except:
                    row=[str(round(bins[i],2))+'-100']
                row.append(n_bins[i])
                row.append(RMSEP_bins_full[i])
                row.append(RMSEP_bins[i])
                row.append(p_bins[i])
                print(i)
                print(row)
                writer.writerow(row)
            
           
        
        
        
    

    
    
    yminmax[0]=numpy.min(predicts)
    
    ccam.plots.Plot1to1(compos[1:-1],predicts[1:-1],which_elem_temp,labels[1:-1],['r','g','b'],markers[1:-1],imgnames['all'],xminmax=xminmax,yminmax=yminmax,dpi=dpi)
    yminmax[0]=numpy.min(predicts[4])    
    ccam.plots.Plot1to1([compos[4]],[predicts[4]],which_elem_temp,[labels[4]],[colors[4]],[markers[4]],imgnames['blended'],xminmax=xminmax,yminmax=yminmax,dpi=dpi)
    yminmax[0]=numpy.min(predicts[0])
    ccam.plots.Plot1to1([compos[0]],[predicts[0]],plot_title,[labels[0]],[colors[0]],[markers[0]],imgnames['full'],xminmax=xminmax,yminmax=yminmax,dpi=dpi)
    yminmax[0]=numpy.min(predicts[1])
    ccam.plots.Plot1to1([compos[1]],[predicts[1]],plot_title,[labels[1]],[colors[1]],[markers[1]],imgnames['low'],xminmax=xminmax,yminmax=yminmax,dpi=dpi)
    yminmax[0]=numpy.min(predicts[2])
    ccam.plots.Plot1to1([compos[2]],[predicts[2]],plot_title,[labels[2]],[colors[2]],[markers[2]],imgnames['mid'],xminmax=xminmax,yminmax=yminmax,dpi=dpi)
    yminmax[0]=numpy.min(predicts[3])
    ccam.plots.Plot1to1([compos[3]],[predicts[3]],plot_title,[labels[3]],[colors[3]],[markers[3]],imgnames['high'],xminmax=xminmax,yminmax=yminmax,dpi=dpi)
    
    with open(filenames['pred_csv_out'][outfilestr],'w',newline='') as writefile:
            writer=csv.writer(writefile,delimiter=',')
            row=['','','','Full ('+str(ranges['full'][0])+'-'+str(ranges['full'][1])+')','Low ('+str(ranges['low'][0])+'-'+str(ranges['low'][1])+')','Mid ('+str(ranges['mid'][0])+'-'+str(ranges['mid'][1])+')','High ('+str(ranges['high'][0])+'-'+str(ranges['high'][0])+')','Blended']
            writer.writerow(row)
            row=['','','Norm=',norms['full'],norms['low'],norms['mid'],norms['high']]
            writer.writerow(row)
            row=['','','nc=',str(ncs['full']),str(ncs['low']),str(ncs['mid']),str(ncs['high'])]
            writer.writerow(row)
            row=['Target','Index','True Comp',which_elem,which_elem,which_elem,which_elem]
            writer.writerow(row)
            
            for i in range(0,len(namelist)):
                row=[namelist[i],spect_index[i],str(compos[0][i]),y['full'][i],y['low'][i],y['mid'][i],y['high'][i],blended2[i]]
                writer.writerow(row)