def blend_predict(data,wvl,filelist,blendranges,inrange,refpredict,toblend,masterlist,name_subs,ranges,ncs,maskfile,filenames,outputstr): y_full,fullnorm=ccam.pls_predict(data,ncs['full'],wvl,maskfile,loadfile=filenames['loadfile']['full'],mean_file=filenames['means_file']['full']) y_low,lownorm=ccam.pls_predict(data,ncs['low'],wvl,maskfile,loadfile=filenames['loadfile']['low'],mean_file=filenames['means_file']['low']) y_mid,midnorm=ccam.pls_predict(data,ncs['mid'],wvl,maskfile,loadfile=filenames['loadfile']['mid'],mean_file=filenames['means_file']['mid']) y_high,highnorm=ccam.pls_predict(data,ncs['high'],wvl,maskfile,loadfile=filenames['loadfile']['high'],mean_file=filenames['means_file']['high']) predicts=[y_full,y_low,y_mid,y_high] blended=ccam.submodels_blend(predicts,blendranges,inrange,refpredict,toblend,overwrite=False,noneg=False) targetlist,targetdists,targetamps,nshots=ccam.target_lookup(filelist,masterlist,name_subs) y_combined=numpy.zeros_like(y_high) print('Writing results to'+filenames['pred_csv_out'][outputstr]) with open(filenames['pred_csv_out'][outputstr],'w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') row=['','','','','Full ('+str(ranges['full'][0])+'-'+str(ranges['full'][1])+')','Low ('+str(ranges['low'][0])+'-'+str(ranges['low'][1])+')','Mid ('+str(ranges['mid'][0])+'-'+str(ranges['mid'][1])+')','High ('+str(ranges['high'][0])+'-'+str(ranges['high'][1])+')','Blended'] writer.writerow(row) row=['','','','Norm=',fullnorm,lownorm,midnorm,highnorm] writer.writerow(row) row=['','','','nc=',str(ncs['full']),str(ncs['low']),str(ncs['mid']),str(ncs['high'])] writer.writerow(row) row=['File','Target','Distance','Power',which_elem,which_elem,which_elem,which_elem,which_elem] writer.writerow(row) for i in range(0,len(y_combined)): row=[filelist[i],targetlist[i],targetdists[i],targetamps[i],y_full[i],y_low[i],y_mid[i],y_high[i],blended[i]] writer.writerow(row)
print 'Choosing spectra' spectra, names, spect_index, comps = ccam.choose_spectra(spectra, spect_index, names, comps, compindex, mincomp=0, maxcomp=100, keepfile=keepfile, removefile=removefile, which_removed=None) y_db_full, fullnorm = ccam.pls_predict(spectra, nc_full, wvl, maskfile, loadfile=loadfile_full, mean_file=means_file_full) y_db_low, lownorm = ccam.pls_predict(spectra, nc_low, wvl, maskfile, loadfile=loadfile_low, mean_file=means_file_low) y_db_mid, midnorm = ccam.pls_predict(spectra, nc_mid, wvl, maskfile, loadfile=loadfile_mid, mean_file=means_file_mid) y_db_high, highnorm = ccam.pls_predict(spectra,
def predict_elem(which_elem,maxnc,ranges,norms,ncs,testsetfile,predict,blend_settings,searchdir='F:\\ChemCam\\ops_ccam_team\\',searchdir_cal=r'F:\ChemCam\ops_ccam_team\CalTarget 95A', searchdir_apxs=r'F:\ChemCam\ops_ccam_team\Best APXS Comparisons', searchdir_val=r'F:\ChemCam\ops_ccam_team\Validation Targets', maskfile=r'C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\DataProcessing\Working\Input\mask_minors_noise.csv', masterlist=r'F:\ChemCam\ops_ccam_misc\MASTERLIST_SOL_0010_0801.csv', name_subs=r'C:\Users\rbanderson\Documents\Projects\MSL\ChemCam\DataProcessing\Working\Input\target_name_subs.csv', dbfile='C:\\Users\\rbanderson\\Documents\\Projects\\MSL\\ChemCam\\DataProcessing\\Working\\Input\\full_db_mars_corrected.csv', removefile='C:\\Users\\rbanderson\\Documents\\Projects\\MSL\\ChemCam\\DataProcessing\\Working\\Input\\removelist.csv', plstype='sklearn',xminmax=[0,100],yminmax=[0,100],blend_opt=True,blend_outfile=None,dpi=1000): outpath='C:\\Users\\rbanderson\\Documents\\Projects\\MSL\\ChemCam\\DataProcessing\\Working\\external_test_set\\Output\\'+which_elem+'\\' print('############ '+which_elem+' ##############') filenames=generate_filenames(which_elem,outpath,plstype,maxnc,norms,ranges,xminmax,yminmax) print('Making outlier check plots') outlier_plots(filenames,norms,ncs,which_elem) print("Making 1 to 1 plots using CV results") cv_plots(filenames,ncs,norms,xminmax,yminmax,which_elem) print('Reading database') sys.stdout.flush() spectra,comps,spect_index,names,labels,wvl=ccam.read_db(dbfile,compcheck=True) oxides=labels[2:] compindex=numpy.where(oxides==which_elem)[0] print('Choosing spectra') spectra,names,spect_index,comps=ccam.choose_spectra(spectra,spect_index,names,comps,compindex,mincomp=0,maxcomp=100,keepfile=None,removefile=removefile,which_removed=None) y_db_full,norms['full']=ccam.pls_predict(spectra,ncs['full'],wvl,maskfile,loadfile=filenames['loadfile']['full'],mean_file=filenames['means_file']['full']) y_db_low,norms['low']=ccam.pls_predict(spectra,ncs['low'],wvl,maskfile,loadfile=filenames['loadfile']['low'],mean_file=filenames['means_file']['low']) y_db_mid,norms['mid']=ccam.pls_predict(spectra,ncs['mid'],wvl,maskfile,loadfile=filenames['loadfile']['mid'],mean_file=filenames['means_file']['mid']) y_db_high,norms['high']=ccam.pls_predict(spectra,ncs['high'],wvl,maskfile,loadfile=filenames['loadfile']['high'],mean_file=filenames['means_file']['high']) y_db={'full':y_db_full,'low':y_db_low,'mid':y_db_mid,'high':y_db_high} #Get the test set spectra #f=open(testsetfile,'rb') data=pandas.read_csv(testsetfile,header=None) #data=zip(*csv.reader(f)) testnames=data.iloc[:,0] #testnames=numpy.array(data[0],dtype='string') testind=numpy.in1d(names,testnames) trainind=numpy.in1d(names,testnames,invert=True) test_spectra=spectra[testind] train_spectra=spectra[trainind] test_comps=comps[testind,compindex] train_comps=comps[trainind,compindex] test_spect_index=spect_index[testind] train_spect_index=spect_index[trainind] testnames=names[testind] trainnames=names[trainind] y_test_full,norms['full']=ccam.pls_predict(test_spectra,ncs['full'],wvl,maskfile,loadfile=filenames['loadfile']['full'],mean_file=filenames['means_file']['full']) y_test_low,norms['low']=ccam.pls_predict(test_spectra,ncs['low'],wvl,maskfile,loadfile=filenames['loadfile']['low'],mean_file=filenames['means_file']['low']) y_test_mid,norms['mid']=ccam.pls_predict(test_spectra,ncs['mid'],wvl,maskfile,loadfile=filenames['loadfile']['mid'],mean_file=filenames['means_file']['mid']) y_test_high,norms['high']=ccam.pls_predict(test_spectra,ncs['high'],wvl,maskfile,loadfile=filenames['loadfile']['high'],mean_file=filenames['means_file']['high']) y_test={'full':y_test_full,'low':y_test_low,'mid':y_test_mid,'high':y_test_high} y_train_full,norms['full']=ccam.pls_predict(train_spectra,ncs['full'],wvl,maskfile,loadfile=filenames['loadfile']['full'],mean_file=filenames['means_file']['full']) y_train_low,norms['low']=ccam.pls_predict(train_spectra,ncs['low'],wvl,maskfile,loadfile=filenames['loadfile']['low'],mean_file=filenames['means_file']['low']) y_train_mid,norms['mid']=ccam.pls_predict(train_spectra,ncs['mid'],wvl,maskfile,loadfile=filenames['loadfile']['mid'],mean_file=filenames['means_file']['mid']) y_train_high,norms['high']=ccam.pls_predict(train_spectra,ncs['high'],wvl,maskfile,loadfile=filenames['loadfile']['high'],mean_file=filenames['means_file']['high']) y_train={'full':y_train_full,'low':y_train_low,'mid':y_train_mid,'high':y_train_high} #optimize the blending settings truecomps=[comps[:,compindex],comps[:,compindex],comps[:,compindex],comps[:,compindex],comps[:,compindex]] truecomps_test=[test_comps,test_comps,test_comps,test_comps,test_comps] truecomps_train=[train_comps,train_comps,train_comps,train_comps,train_comps] if blend_opt: blend_settings=blend_optimize(y_train,blend_settings,truecomps_train,outfile=filenames['blend_outfile']) final_model_results(y_db,spect_index,names,truecomps,blend_settings,xminmax,yminmax,ranges,ncs,norms,which_elem,filenames,'db',dpi=dpi) final_model_results(y_test,test_spect_index,testnames,truecomps_test,blend_settings,xminmax,yminmax,ranges,ncs,norms,which_elem,filenames,'test',dpi=dpi) if predict: #Read CCS data #apxs_data,apxs_wvl,apxs_filelist,shotnums=ccam.read_ccs(searchdir_apxs,shots=True,masterlist=masterlist,name_sub_file=name_subs) apxs_data,apxs_wvl,apxs_filelist,=ccam.read_ccs(searchdir_apxs) val_data,val_wvl,val_filelist=ccam.read_ccs(searchdir_val) cal_data,cal_wvl,cal_filelist=ccam.read_ccs(searchdir_cal) all_data,all_wvl,all_filelist=ccam.read_ccs(searchdir) #get apxs CCS results blend_predict(apxs_data,apxs_wvl,apxs_filelist,blend_settings['blendranges'],blend_settings['inrange'],blend_settings['refpredict'],blend_settings['toblend'],masterlist,name_subs,ranges,ncs,maskfile,filenames,'apxs') #get validation CCS results blend_predict(val_data,val_wvl,val_filelist,blend_settings['blendranges'],blend_settings['inrange'],blend_settings['refpredict'],blend_settings['toblend'],masterlist,name_subs,ranges,ncs,maskfile,filenames,'val') #get cal target CCS results blend_predict(cal_data,cal_wvl,cal_filelist,blend_settings['blendranges'],blend_settings['inrange'],blend_settings['refpredict'],blend_settings['toblend'],masterlist,name_subs,ranges,ncs,maskfile,filenames,'cal') #get CCS results (this step takes a while because it needs to read all the CCS files) blend_predict(all_data,all_wvl,all_filelist,blend_settings['blendranges'],blend_settings['inrange'],blend_settings['refpredict'],blend_settings['toblend'],masterlist,name_subs,ranges,ncs,maskfile,filenames,'all')
#data,wvl,filelist=ccam.read_ccs(searchdir) #data,wvl=ccam.mask(data,wvl,maskfile) #data_norm3=ccam_normalize.ccam_normalize(data,wvl,normtype=3) #data_norm1=ccam_normalize.ccam_normalize(data,wvl,normtype=1) targetlist = ccam.target_lookup(filelist, masterlist, name_subs) #y_full=ccam.pls_unk(data_norm1,nc_full,coeff_file=coeff_file_full,means_file=means_file_full) #y_low=ccam.pls_unk(data_norm1,nc_low,coeff_file=coeff_file_low,means_file=means_file_low) #y_midlow=ccam.pls_unk(data_norm1,nc_midlow,coeff_file=coeff_file_midlow,means_file=means_file_midlow) #y_midhigh=ccam.pls_unk(data_norm1,nc_midhigh,coeff_file=coeff_file_midhigh,means_file=means_file_midhigh) #y_high=ccam.pls_unk(data_norm1,nc_high,coeff_file=coeff_file_high,means_file=means_file_high) #print 'Full model prediction' # y_full, fullnorm = ccam.pls_predict(which_elem, nc_full, copy.copy(data), copy.copy(wvl), maskfile, fullfiles) y_low, lownorm = ccam.pls_predict(which_elem, nc_low, copy.copy(data), copy.copy(wvl), maskfile, lowfiles) y_midlow, midlownorm = ccam.pls_predict(which_elem, nc_midlow, copy.copy(data), copy.copy(wvl), maskfile, midlowfiles) y_midhigh, midhighnorm = ccam.pls_predict(which_elem, nc_midhigh, copy.copy(data), copy.copy(wvl), maskfile, midhighfiles) y_high, highnorm = ccam.pls_predict(which_elem, nc_high, copy.copy(data), copy.copy(wvl), maskfile, highfiles) y_combined = numpy.zeros_like(y_high) combined_description = 'If Full<' + str( low_cutoff) + 'use low, else if Full <' + str( mid_cutoff) + ' use midlow, else if Full <' + str(
compindex = numpy.where(oxides == which_elem)[0] print 'Choosing spectra' spectra, names, spect_index, comps = ccam.choose_spectra(spectra, spect_index, names, comps, compindex, mincomp=0, maxcomp=100, keepfile=keepfile, removefile=removefile, which_removed=None) y_db_full, fullnorm = ccam.pls_predict(spectra, nc_full, wvl, maskfile, loadfile=loadfile_full, mean_file=means_file_full) truecomps = comps[:, compindex] predicts = y_db_full plot_title = 'Final Model ' + which_elem + ' Predictions of Full Database' labels = 'Full' colors = 'c' markers = 'o' ccam.plots.Plot1to1(truecomps, predicts, plot_title, labels, colors,
print 'Choosing spectra' mincomp = 0 maxcomp = 100 spectra, names, spect_index, comps = ccam.choose_spectra(spectra, spect_index, names, comps, compindex, mincomp=mincomp, maxcomp=maxcomp, keepfile=keepfile, removefile=removefile, which_removed=None) y_db_full, fullnorm = ccam.pls_predict(which_elem, nc_full, spectra, wvl, maskfile, fullfiles) y_db_low, lownorm = ccam.pls_predict(which_elem, nc_low, spectra, wvl, maskfile, lowfiles) y_db_mid, midnorm = ccam.pls_predict(which_elem, nc_mid, spectra, wvl, maskfile, midfiles) y_db_high, highnorm = ccam.pls_predict(which_elem, nc_high, spectra, wvl, maskfile, highfiles) """ From low model 0 to 1, use the low model If low model between 1 and 3, then blend the low and mid models using low as reference If low model AND mid model are between 3 and 8, use the mid model If mid is 8 to 12, then blend mid and high using mid as reference if high >12 then use high Do not overwrite predictions that have already been set in a previous round of logic. """