예제 #1
0
def blend_predict(data,wvl,filelist,blendranges,inrange,refpredict,toblend,masterlist,name_subs,ranges,ncs,maskfile,filenames,outputstr):
    
    
    y_full,fullnorm=ccam.pls_predict(data,ncs['full'],wvl,maskfile,loadfile=filenames['loadfile']['full'],mean_file=filenames['means_file']['full'])
    y_low,lownorm=ccam.pls_predict(data,ncs['low'],wvl,maskfile,loadfile=filenames['loadfile']['low'],mean_file=filenames['means_file']['low'])
    y_mid,midnorm=ccam.pls_predict(data,ncs['mid'],wvl,maskfile,loadfile=filenames['loadfile']['mid'],mean_file=filenames['means_file']['mid'])
    y_high,highnorm=ccam.pls_predict(data,ncs['high'],wvl,maskfile,loadfile=filenames['loadfile']['high'],mean_file=filenames['means_file']['high'])
    
    predicts=[y_full,y_low,y_mid,y_high]
    
    blended=ccam.submodels_blend(predicts,blendranges,inrange,refpredict,toblend,overwrite=False,noneg=False)
    
    targetlist,targetdists,targetamps,nshots=ccam.target_lookup(filelist,masterlist,name_subs)
    
    y_combined=numpy.zeros_like(y_high)
    print('Writing results to'+filenames['pred_csv_out'][outputstr])
    with open(filenames['pred_csv_out'][outputstr],'w',newline='') as writefile:
            writer=csv.writer(writefile,delimiter=',')
            row=['','','','','Full ('+str(ranges['full'][0])+'-'+str(ranges['full'][1])+')','Low ('+str(ranges['low'][0])+'-'+str(ranges['low'][1])+')','Mid ('+str(ranges['mid'][0])+'-'+str(ranges['mid'][1])+')','High ('+str(ranges['high'][0])+'-'+str(ranges['high'][1])+')','Blended']
            writer.writerow(row)
            row=['','','','Norm=',fullnorm,lownorm,midnorm,highnorm]
            writer.writerow(row)
            row=['','','','nc=',str(ncs['full']),str(ncs['low']),str(ncs['mid']),str(ncs['high'])]
            writer.writerow(row)
            row=['File','Target','Distance','Power',which_elem,which_elem,which_elem,which_elem,which_elem]
            writer.writerow(row)
            
            for i in range(0,len(y_combined)):
                row=[filelist[i],targetlist[i],targetdists[i],targetamps[i],y_full[i],y_low[i],y_mid[i],y_high[i],blended[i]]
                writer.writerow(row)        
예제 #2
0
def read_single_ccs(
        filename,
        skiprows=0,
        shots=False,
        masterlist=None,
        name_sub_file=None):  #,minsol=0,maxsol=10000,masterlist=None):
    filetrim = filename[-40]

    if shots is True:
        file_targets, file_dists, file_amps, nshots = ccam.target_lookup(
            filename, masterlist, name_sub_file)
        nshots = numpy.array(nshots, dtype='int')
        sum_shots = numpy.sum(nshots)

    if shots is not True:
        means = numpy.zeros([6144], dtype='float64')
    if shots is True:
        singleshots = numpy.zeros([6144, sum_shots], dtype='float64')
        files_singleshot = numpy.zeros_like([filetrim[0]] * sum_shots)
        shotnums = numpy.zeros([sum_shots])
        rowcount = 0

    tempdata = ccam.read_csv(filename, skiprows, labelrow=False)

    wvl = numpy.array(tempdata[:, 0], dtype='float')
    if shots is False:
        means = tempdata[:, -1]
    if shots is True:

        shotnums[rowcount:rowcount + nshots] = range(nshots)
        files_singleshot[rowcount:rowcount + nshots] = filetrim
        singleshots[:, rowcount:rowcount + nshots] = tempdata[:, 1:-2]
        rowcount = rowcount + nshots

    if shots is True:
        singleshots = numpy.transpose(singleshots)
        return singleshots, wvl, files_singleshot, shotnums
    if shots is False:
        return means, wvl, filetrim
y_high, highnorm = ccam.pls_predict(data,
                                    nc_high,
                                    wvl,
                                    maskfile,
                                    loadfile=loadfile_high,
                                    mean_file=means_file_high)

predicts = [y_full, y_low, y_mid, y_high]
blended = ccam.submodels_blend(predicts,
                               ranges,
                               inrange,
                               refpredict,
                               toblend,
                               overwrite=False)

targetlist, targetdists, targetamps = ccam.target_lookup(
    filelist, masterlist, name_subs)

y_combined = numpy.zeros_like(y_high)
print 'Writing results'
with open(outputfile_apxs, 'wb') as writefile:
    writer = csv.writer(writefile, delimiter=',')
    row = [
        '', '', '', '', 'Full',
        'Low (' + str(lowmin) + '-' + str(lowmax) + ')',
        'Mid (' + str(midmin) + '-' + str(midmax) + ')',
        'High (' + str(highmin) + '-' + str(highmax) + ')', 'Blended'
    ]
    writer.writerow(row)
    row = ['', '', '', 'Norm=', fullnorm, lownorm, midnorm, highnorm]
    writer.writerow(row)
    row = [
예제 #4
0
def read_ccs(searchdir,
             skiprows=15,
             shots=False,
             masterlist=None,
             name_sub_file=None):  #,minsol=0,maxsol=10000,masterlist=None):
    searchstring = '*CCS*csv'

    #Recursively search for CCS files in the specified directory
    filelist = []
    for root, dirnames, filenames in os.walk(searchdir):
        for filename in fnmatch.filter(filenames, searchstring):
            filelist.append(os.path.join(root, filename))
    filelist = numpy.array(filelist)

    #Remove duplicates
    files = numpy.zeros_like(filelist)
    sclocks = numpy.zeros_like(filelist)
    fileversion = numpy.zeros(len(filelist), dtype='int')
    for i in range(len(filelist)):
        files[i] = filelist[i][-40:]
        sclocks[i] = filelist[i][-36:-27]
        fileversion[i] = filelist[i][-5:-4]

    keep = numpy.zeros(len(files), dtype='bool')
    for i in range(len(files)):
        sclock_match = numpy.in1d(sclocks, sclocks[i])
        maxversion = max(fileversion[sclock_match])
        if fileversion[i] == maxversion:
            keep[i] = True

    files = files[keep]
    filelist = filelist[keep]
    sclocks = sclocks[keep]

    files, unique_index = numpy.unique(files, return_index=True)
    filelist = filelist[unique_index]
    sclocks = sclocks[unique_index]
    if shots is True:
        file_targets, file_dists, file_amps, nshots = ccam.target_lookup(
            filelist, masterlist, name_sub_file)
        nshots = numpy.array(nshots, dtype='int')
        sum_shots = numpy.sum(nshots)
    print('Reading ' + str(len(filelist)) + ' files...')
    if shots is not True:
        means = numpy.zeros([len(filelist), 6144], dtype='float64')
    if shots is True:
        singleshots = numpy.zeros([6144, sum_shots], dtype='float64')
        files_singleshot = numpy.zeros_like([files[0]] * sum_shots)
        shotnums = numpy.zeros([sum_shots])
        rowcount = 0
    for i in range(len(filelist)):

        if numpy.mod(i + 1, 100) == 0:
            print('Reading file #' + str(i + 1))

        tempdata, templabels = ccam.read_csv(filelist[i],
                                             skiprows,
                                             labelrow=False)

        wvl = numpy.array(tempdata[:, 0], dtype='float')
        if shots is False:
            means[i, :] = tempdata[:, -1]
        if shots is True:

            shotnums[rowcount:rowcount + nshots[i]] = range(nshots[i])
            files_singleshot[rowcount:rowcount + nshots[i]] = files[i]
            singleshots[:, rowcount:rowcount + nshots[i]] = tempdata[:, 1:-2]
            rowcount = rowcount + nshots[i]

#        if i==0:
#            wvl=numpy.array(tempdata[:,0],dtype='float64')
#            if shots is True:
#                singleshots=numpy.array(tempdata[:,1:-2],dtype='float64')
#                shotnums=numpy.array(range(len(tempdata[:,1:-2])))
#                files_singleshot=numpy.array([files[i]]*len(tempdata[:,1:-2]))
#            medians=numpy.array(tempdata[:,-2],dtype='float64')
#            means=numpy.array(tempdata[:,-1],dtype='float64')
#
#        if i>0:
#            if shots is True:
#                singleshots=numpy.vstack([singleshots,numpy.array(tempdata[:,1:-2],dtype='float64')])
#                shotnums=numpy.hstack([shotnums,numpy.array(range(len(tempdata[:,1:-2])))])
#                files_singleshot=numpy.hstack([files_singleshot,numpy.array([files[i]]*len(tempdata[:,1:-2]))])
#
#            medians=numpy.vstack([medians,numpy.array(tempdata[:,-2],dtype='float64')])
#            means=numpy.vstack([means,numpy.array(tempdata[:,-1],dtype='float64')])

    if shots is True:
        singleshots = numpy.transpose(singleshots)
        return singleshots, wvl, files_singleshot, shotnums
    if shots is False:
        return means, wvl, files
예제 #5
0
    def calc_comp(self):
        #Choose whether to do single shots
        self.shots = self.myWidget.singleshots_checkbox.isChecked()
        filelist, files = ccam.search_ccs(self.searchdir)
        self.myWidget.progressBar.setMaximum(len(filelist))
        targets, dists, amps, nshots = ccam.target_lookup(
            filelist, self.masterlist, self.name_sub_file)
        nshots = numpy.array(nshots, dtype='int')
        #Loop through each file in the file list
        for i in range(0, len(filelist)):
            app.processEvents()
            self.myWidget.progressBar.setValue(i)
            print filelist[i]

            if self.shots is True:
                #print 'Single shots'
                singleshots, wvl, filename, shotnum = ccam.read_ccs(
                    filelist[i],
                    skiprows=0,
                    shots=self.shots,
                    masterlist=self.masterlist,
                    name_sub_file=self.name_sub_file,
                    singlefile=True)
                singleshots_masked, wvl_masked = ccam.mask(
                    singleshots, wvl, self.maskfile)
                self.spectra_masked_norm1 = ccam.normalize(singleshots_masked,
                                                           wvl_masked,
                                                           normtype=1)
                self.spectra_masked_norm3 = ccam.normalize(singleshots_masked,
                                                           wvl_masked,
                                                           normtype=3)

            if self.shots is False:
                #print 'Means'
                nshots[i] = 1
                meanspect, wvl, filename = ccam.read_ccs(
                    filelist[i],
                    skiprows=0,
                    shots=self.shots,
                    masterlist=self.masterlist,
                    name_sub_file=self.name_sub_file,
                    singlefile=True)
                meanspect_masked, wvl_masked = ccam.mask(
                    meanspect, wvl, self.maskfile)
                self.spectra_masked_norm1 = ccam.normalize(meanspect_masked,
                                                           wvl_masked,
                                                           normtype=1)
                self.spectra_masked_norm3 = ccam.normalize(meanspect_masked,
                                                           wvl_masked,
                                                           normtype=3)

            comps_temp = self.pls_submodels(nshots[i])
            if i == 0:
                comps_all = comps_temp
                filename_all = filename

                if self.shots is True:
                    shotnum_all = shotnum
                    targets_all = numpy.tile(targets[i], nshots[i])
                    dists_all = numpy.tile(dists[i], nshots[i])
                    amps_all = numpy.tile(amps[i], nshots[i])

            else:
                comps_all = [
                    numpy.vstack([comps_all[0], comps_temp[0]]),
                    numpy.vstack([comps_all[1], comps_temp[1]]),
                    numpy.vstack([comps_all[2], comps_temp[2]]),
                    numpy.vstack([comps_all[3], comps_temp[3]])
                ]
                filename_all = numpy.hstack([filename_all, filename])

                if self.shots is True:
                    shotnum_all = numpy.hstack([shotnum_all, shotnum])
                    targets_all = numpy.hstack(
                        [targets_all,
                         numpy.tile(targets[i], nshots[i])])
                    dists_all = numpy.hstack(
                        [dists_all, numpy.tile(dists[i], nshots[i])])
                    amps_all = numpy.hstack(
                        [amps_all, numpy.tile(amps[i], nshots[i])])

        blended_all = self.pls_blend(comps_all)
        self.myWidget.progressBar.setValue(len(filelist))
        if self.shots is False:
            shotnum_all = 'placeholder'
            targets_all = targets
            dists_all = dists
            amps_all = amps
        self.write_results(blended_all, shotnum_all, targets_all, dists_all,
                           amps_all, filename_all)
예제 #6
0
def pls_cal(dbfile,maskfile,outpath,which_elem,testfold,nc,normtype=1,mincomp=0,maxcomp=100,plstype='mlpy',keepfile=None,removefile=None,cal_dir=None,masterlist_file=None,compfile=None,name_sub_file=None,foldfile=None,nfolds=7,seed=None,n_bag=None,skscale=False,n_boost=None,max_samples=0.1,n_elems=9):
    plstype_string=plstype    
    if n_bag!=None:
        plstype_string=plstype+'_bag'
    if n_boost!=None:
        plstype_string=plstype+'_boost'
    if skscale==True:
        plstype_string=plstype+'_scale'
    print('Reading database')
    sys.stdout.flush()
    spectra,comps,spect_index,names,labels,wvl=ccam.read_db(dbfile,compcheck=True,n_elems=n_elems)
    oxides=labels[2:]
    compindex=numpy.where(oxides==which_elem)[0]
    
    print('Choosing spectra')
    
    which_removed=outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_removed.csv'
    spectra,names,spect_index,comps=ccam.choose_spectra(spectra,spect_index,names,comps,compindex,mincomp=mincomp,maxcomp=maxcomp,keepfile=keepfile,removefile=removefile,which_removed=which_removed)
        
    
    print('Masking spectra')
    spectra,wvl=ccam.mask(spectra,wvl,maskfile)
    
    print('Normalizing spectra')
    spectra=ccam.normalize(spectra,wvl,normtype=normtype)
    
    
    print('Assigning Folds')
    if foldfile!=None:
        #if a fold file is specified, use it
        folds=ccam.folds(foldfile,names)
    else:
        #otherwise, define random folds
        folds=ccam.random_folds(names,nfolds,seed=seed)

    names_nofold=names[(folds==0)]
    spect_index_nofold=spect_index[(folds==0)]
    #write a file containing the samples not assigned to folds
    with open(which_removed,'ab') as writefile:
        writer=csv.writer(writefile,delimiter=',',)
        for i in range(len(names_nofold)):
            writer.writerow([names_nofold[i],spect_index_nofold[i],'No Fold'])
    
    
    #remove spectra that are not assigned to any fold
    spectra=spectra[(folds!=0),:]
    spect_index=spect_index[(folds!=0)]
    names=names[(folds!=0)]
    comps=comps[(folds!=0),:]
    folds=folds[(folds!=0)]
    
    print('Defining Training and Test Sets')
    spectra_train=spectra[(folds!=testfold)]
    spect_index_train=spect_index[(folds!=testfold)]
    names_train=names[(folds!=testfold)]
    comps_train=comps[(folds!=testfold),compindex]
    folds_train=folds[(folds!=testfold)]
    folds_train_unique=numpy.unique(folds_train)
    
    spectra_test=spectra[(folds==testfold)]
    spect_index_test=spect_index[(folds==testfold)]
    names_test=names[(folds==testfold)]
    comps_test=comps[(folds==testfold),compindex]
    folds_test=folds[(folds==testfold)]
    
    print('Do Leave One Label Out (LOLO) cross validation with all folds but the test set')
    #define array to hold cross validation predictions and RMSEs
    train_predict_cv=numpy.zeros((len(names_train),nc))
    RMSECV=numpy.zeros(nc)
    
    for i in folds_train_unique:
        print('Holding out fold #'+str(i))
        
        if skscale==False:
        #mean center those spectra left in
            #X_cv_in1,X_cv_in_mean1=meancenter.ccam_meancenter(spectra_train[(folds_train!=i),:])
            X_cv_in,X_cv_in_mean=ccam.meancenter(spectra_train[(folds_train!=i),:])
            
            #and those left out
            X_cv_out=ccam.meancenter(spectra_train[(folds_train==i),:],X_mean=X_cv_in_mean)[0]   
             
            #mean center compositions left in
            Y_cv_in,Y_cv_in_mean=ccam.meancenter(comps_train[(folds_train!=i)])
        if skscale==True:
            X_cv_in=spectra_train[(folds_train!=i),:]
            X_cv_out=spectra_train[(folds_train==i),:]
            Y_cv_in=comps_train[(folds_train!=i)]
            Y_cv_in_mean=0
       
        #step through each number of components
        for j in range(1,nc+1):
            print('Training Model for '+str(j)+' components')
            #train the model
            if plstype=='mlpy':
                PLS1model=ccam.mlpy_pls.PLS(j)
                PLS1model.learn(X_cv_in,Y_cv_in)
                    #predict the samples held out
                train_predict_cv[(folds_train==i),j-1]=PLS1model.pred(X_cv_out)+Y_cv_in_mean
                
            if plstype=='sklearn':
                PLS1model=PLSRegression(n_components=j,scale=skscale)
                if n_bag==None and n_boost==None:
                    PLS1model.fit(X_cv_in,Y_cv_in)
                    train_predict_cv[(folds_train==i),j-1]=numpy.squeeze(PLS1model.predict(X_cv_out)+Y_cv_in_mean)
                if n_bag!=None:
                    PLS1bagged=ensemble.BaggingRegressor(PLS1model,n_estimators=n_bag,max_samples=max_samples,verbose=1)
                    PLS1bagged.fit(X_cv_in,Y_cv_in)
                    train_predict_cv[(folds_train==i),j-1]=numpy.squeeze(PLS1bagged.predict(X_cv_out)+Y_cv_in_mean)
                if n_boost!=None:
                    PLS1boosted=ensemble.AdaBoostRegressor(PLS1model,n_estimators=n_boost)
                    PLS1boosted.fit(X_cv_in,Y_cv_in)
                    train_predict_cv[(folds_train==i),j-1]=numpy.squeeze(PLS1boosted.predict(X_cv_out)+Y_cv_in_mean)
    #calculate RMSECV
    for i in range(0,nc):
        sqerr=(train_predict_cv[:,i]-comps_train)**2.0
        RMSECV[i]=numpy.sqrt(numpy.mean(sqerr))
    
    #mean center full model
    if skscale==False:
        X,X_mean=ccam.meancenter(spectra_train)
        X_test=ccam.meancenter(spectra_test,X_mean=X_mean)[0]
        X_all=ccam.meancenter(spectra,X_mean=X_mean)[0]
        
        Y,Y_mean=ccam.meancenter(comps_train)
    if skscale==True:
        X=spectra_train
        X_test=spectra_test
        X_all=spectra
        Y=comps_train
        Y_mean=0
    
    #create arrays for results and RMSEs
    trainset_results=numpy.zeros((len(names_train),nc))
    testset_results=numpy.zeros((len(names_test),nc))
    results=numpy.zeros((len(names),nc))    
    
    RMSEP=numpy.zeros(nc)
    RMSEC=numpy.zeros(nc)
    beta=numpy.zeros((len(X[0,:]),nc))
    Q_res=numpy.zeros((len(X[:,0]),nc))
    T2=numpy.zeros((len(X[:,0]),nc))

    [a,evals,b]=numpy.linalg.svd(numpy.cov(numpy.dot(X,X.transpose())))
    evals=numpy.diag(evals**2)
    if cal_dir!=None:
        print('Reading cal target data')
        cal_data,cal_wvl,cal_filelist=ccam.read_ccs(cal_dir)
        cal_data,cal_wvl=ccam.mask(cal_data,cal_wvl,maskfile)
        cal_data=ccam.normalize(cal_data,cal_wvl,normtype=normtype)
        if skscale==True:
            cal_data_centered=cal_data
        if skscale==False:
            cal_data_centered=ccam.meancenter(cal_data,X_mean=X_mean)[0]

            
        RMSEP_cal=numpy.zeros(nc)
        RMSEP_cal_good=numpy.zeros(nc)        
        RMSEP_KGAMEDS=numpy.zeros(nc)
        RMSEP_MACUSANITE=numpy.zeros(nc)
        RMSEP_NAU2HIS=numpy.zeros(nc)
        RMSEP_NAU2LOS=numpy.zeros(nc)
        RMSEP_NAU2MEDS=numpy.zeros(nc)
        RMSEP_NORITE=numpy.zeros(nc)
        RMSEP_PICRITE=numpy.zeros(nc)
        RMSEP_SHERGOTTITE=numpy.zeros(nc)
        
        targets,dists,amps,nshots=ccam.target_lookup(cal_filelist,masterlist_file,name_sub_file)
        target_comps=ccam.target_comp_lookup(targets,compfile,which_elem)
        cal_results=numpy.zeros((len(targets),nc))
       
    model_list=[]
    #Now step through each # of components with the full model
    for j in range(1,nc+1):
        print('Training full model for '+str(j)+' components')
        if plstype=='mlpy':
        
            PLS1model=ccam.mlpy_pls.PLS(j)
            PLS1model.learn(X,Y)
            beta[:,j-1]=PLS1model.beta()
            model_list.append([PLS1model])
            trainset_results[:,j-1]=PLS1model.pred(X)+Y_mean
            testset_results[:,j-1]=PLS1model.pred(X_test)+Y_mean
            results[:,j-1]=PLS1model.pred(X_all)+Y_mean
            if cal_dir != None:
                comps_copy=copy.copy(target_comps)
#                if skscale==True:
#                    cal_results[:,j-1]=PLS1model.pred(cal_data)
#                if skscale==False:
                cal_results[:,j-1]=PLS1model.pred(cal_data_centered)+Y_mean
                RMSEP_KGAMEDS[j-1],RMSEP_MACUSANITE[j-1],RMSEP_NAU2HIS[j-1],RMSEP_NAU2LOS[j-1],RMSEP_NAU2MEDS[j-1],RMSEP_NORITE[j-1],RMSEP_PICRITE[j-1],RMSEP_SHERGOTTITE[j-1],RMSEP_cal_good[j-1]=cal_rmses(targets,nc,target_comps,j,cal_data_centered,Y_mean,mincomp,maxcomp,cal_results)
   


        if plstype=='sklearn':
            PLS1model=PLSRegression(n_components=j,scale=skscale)

            if n_bag==None and n_boost==None:
                PLS1model.fit(X,Y)
                T=PLS1model.x_scores_
                #There's probably a more efficient way to calculate T2...
                for k in range(len(X[:,0])):
                    T2[k,j-1]=numpy.dot(T[k,:],numpy.dot(numpy.linalg.inv(numpy.dot(T.transpose(),T)),T[k,:]))
                
                E=X-numpy.dot(PLS1model.x_scores_,PLS1model.x_loadings_.transpose())
                Q_res[:,j-1]=numpy.dot(E,E.transpose()).diagonal()
                
                trainset_results[:,j-1]=numpy.squeeze(PLS1model.predict(X)+Y_mean)
                testset_results[:,j-1]=numpy.squeeze(PLS1model.predict(X_test)+Y_mean)
                results[:,j-1]=numpy.squeeze(PLS1model.predict(X_all)+Y_mean)
                beta[:,j-1]=numpy.squeeze(PLS1model.coefs)
                model_list.append([PLS1model])

                    
                if cal_dir != None:
                    comps_copy=copy.copy(target_comps)
                    cal_results[:,j-1]=numpy.squeeze(PLS1model.predict(cal_data_centered)+Y_mean)
                    RMSEP_KGAMEDS[j-1],RMSEP_MACUSANITE[j-1],RMSEP_NAU2HIS[j-1],RMSEP_NAU2LOS[j-1],RMSEP_NAU2MEDS[j-1],RMSEP_NORITE[j-1],RMSEP_PICRITE[j-1],RMSEP_SHERGOTTITE[j-1],RMSEP_cal_good[j-1]=cal_rmses(targets,nc,target_comps,j,cal_data_centered,Y_mean,mincomp,maxcomp,cal_results)
   
            if n_bag!=None:
                PLS1bagged=ensemble.BaggingRegressor(PLS1model,n_estimators=n_bag,max_samples=max_samples,verbose=1)
                PLS1bagged.fit(X,Y)
                trainset_results[:,j-1]=numpy.squeeze(PLS1bagged.predict(X)+Y_mean)
                testset_results[:,j-1]=numpy.squeeze(PLS1bagged.predict(X_test)+Y_mean)
                results[:,j-1]=numpy.squeeze(PLS1bagged.predict(X_all)+Y_mean)
                beta[:,j-1]=None
                model_list.append([PLS1bagged])
                if cal_dir != None:
                    comps_copy=copy.copy(target_comps)
                    cal_results[:,j-1]=numpy.squeeze(PLS1bagged.predict(cal_data_centered)+Y_mean)
                    RMSEP_KGAMEDS[j-1],RMSEP_MACUSANITE[j-1],RMSEP_NAU2HIS[j-1],RMSEP_NAU2LOS[j-1],RMSEP_NAU2MEDS[j-1],RMSEP_NORITE[j-1],RMSEP_PICRITE[j-1],RMSEP_SHERGOTTITE[j-1],RMSEP_cal_good[j-1]=cal_rmses(targets,nc,target_comps,j,cal_data_centered,Y_mean,mincomp,maxcomp,cal_results)
            if n_boost!=None:
                PLS1boosted=ensemble.AdaBoostRegressor(PLS1model,n_estimators=n_boost)
                PLS1boosted.fit(X,Y)
                trainset_results[:,j-1]=numpy.squeeze(PLS1boosted.predict(X)+Y_mean)
                testset_results[:,j-1]=numpy.squeeze(PLS1boosted.predict(X_test)+Y_mean)
                results[:,j-1]=numpy.squeeze(PLS1boosted.predict(X_all)+Y_mean)
                beta[:,j-1]=None
                model_list.append([PLS1boosted])
                if cal_dir != None:
                    comps_copy=copy.copy(target_comps)
                    cal_results[:,j-1]=numpy.squeeze(PLS1boosted.predict(cal_data_centered)+Y_mean)
                    RMSEP_KGAMEDS[j-1],RMSEP_MACUSANITE[j-1],RMSEP_NAU2HIS[j-1],RMSEP_NAU2LOS[j-1],RMSEP_NAU2MEDS[j-1],RMSEP_NORITE[j-1],RMSEP_PICRITE[j-1],RMSEP_SHERGOTTITE[j-1],RMSEP_cal_good[j-1]=cal_rmses(targets,nc,target_comps,j,cal_data_centered,Y_mean,mincomp,maxcomp,cal_results)
   
        RMSEC[j-1]=numpy.sqrt(numpy.mean((trainset_results[:,j-1]-comps_train)**2.0))
        RMSEP[j-1]=numpy.sqrt(numpy.mean((testset_results[:,j-1]-comps_test)**2.0))
        
   
    with open(outpath+which_elem+'_'+plstype_string+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'.pkl','wb') as picklefile:
            pickle.dump(model_list,picklefile)

 #if cal_dir is specified, read cal target data and calculate RMSEs    
    if cal_dir!=None:
        n_good_cal=numpy.sum(numpy.array([RMSEP_KGAMEDS,RMSEP_MACUSANITE,RMSEP_NAU2HIS,RMSEP_NAU2LOS,RMSEP_NAU2MEDS,RMSEP_NORITE,RMSEP_PICRITE,RMSEP_SHERGOTTITE])[:,0]!=0)
        print(n_good_cal)
        RMSEP_cal=(RMSEP_KGAMEDS+RMSEP_MACUSANITE+RMSEP_NAU2HIS+RMSEP_NAU2LOS+RMSEP_NAU2MEDS+RMSEP_NORITE+RMSEP_PICRITE+RMSEP_SHERGOTTITE)/n_good_cal
        RMSEP_single_cals=[RMSEP_KGAMEDS,RMSEP_MACUSANITE,RMSEP_NAU2HIS,RMSEP_NAU2LOS,RMSEP_NAU2MEDS,RMSEP_NORITE,RMSEP_PICRITE,RMSEP_SHERGOTTITE,RMSEP_cal]            
                       
        with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_caltargets_predict.csv','w',newline='') as writefile:
            writer=csv.writer(writefile,delimiter=',')
            row=['File','Target','Laser Energy','True_Comp']
            row.extend(list(range(1,nc+1)))
            writer.writerow(row)
            for i in range(0,len(targets)):
                row=[cal_filelist[i],targets[i],amps[i],target_comps[i]]
                row.extend(cal_results[i,:])
                writer.writerow(row)
        with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_RMSEP_caltargets.csv','w',newline='') as writefile:
            writer=csv.writer(writefile,delimiter=',')
            writer.writerow(['NC','RMSEP Cal Targets (wt.%)'])            
            for i in range(0,nc):
                writer.writerow([i+1,RMSEP_cal[i]])
        ccam.RMSE(RMSECV,RMSEP,RMSEC,which_elem+' RMSEs',outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_RMSE_plot_cal.png',RMSEP_cals=RMSEP_single_cals)
        ccam.RMSE(RMSECV,RMSEP,RMSEC,which_elem+' RMSEs',outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_RMSE_plot_cal_good.png',RMSEP_good=RMSEP_cal_good)
        
    # plot RMSEs
    ccam.RMSE(RMSECV,RMSEP,RMSEC,which_elem+' RMSEs',outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_RMSE_plot.png')
    
    
   
   #Write output info to files

    with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_Q_res.csv','w',newline='') as writefile:
        writer=csv.writer(writefile,delimiter=',')
        row=["Sample","Spectrum","Fold","True Comp"]
        row.extend(range(1,nc+1))
        writer.writerow(row)        
        for i in range(0,len(names_train)):
            row=[names_train[i],spect_index_train[i],folds_train[i],comps_train[i]]
            row.extend(Q_res[i,:])
            writer.writerow(row)
    with open(outpath+which_elem+'_'+str(mincomp)+'-'+str(maxcomp)+'_quartiles.csv','w',newline='') as writefile:
        writer=csv.writer(writefile,delimiter=',')
        row=[which_elem]
        writer.writerow(row)
        row=['Min',numpy.percentile(comps[:,compindex],0)]
        writer.writerow(row)
        row=['1st Quartile',numpy.percentile(comps[:,compindex],25)]
        writer.writerow(row)
        row=['Median',numpy.percentile(comps[:,compindex],50)]
        writer.writerow(row)
        row=['3rd Quartile',numpy.percentile(comps[:,compindex],75)]
        writer.writerow(row)
        row=['Max',numpy.percentile(comps[:,compindex],100)]
        writer.writerow(row)

    with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_HotellingT2.csv','w',newline='') as writefile:
        writer=csv.writer(writefile,delimiter=',')
        row=["Sample","Spectrum","Fold","True Comp"]
        row.extend(range(1,nc+1))
        writer.writerow(row)        
        for i in range(0,len(names_train)):
            row=[names_train[i],spect_index_train[i],folds_train[i],comps_train[i]]
            row.extend(T2[i,:])
            writer.writerow(row)
            
    with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_RMSECV.csv','w',newline='') as writefile:
        writer=csv.writer(writefile,delimiter=',')
        writer.writerow(['NC','RMSECV (wt.%)'])            
        for i in range(0,nc):
            writer.writerow([i+1,RMSECV[i]])
    
    with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_RMSEC.csv','w',newline='') as writefile:
        writer=csv.writer(writefile,delimiter=',')
        writer.writerow(['NC','RMSEC (wt.%)'])            
        for i in range(0,nc):
            writer.writerow([i+1,RMSEC[i]])
            
    with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_RMSEP.csv','w',newline='') as writefile:
        writer=csv.writer(writefile,delimiter=',')
        writer.writerow(['NC','RMSEP (wt.%)'])            
        for i in range(0,nc):
            writer.writerow([i+1,RMSEP[i]])
            
    with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_cv_predict.csv','w',newline='') as writefile:
        writer=csv.writer(writefile,delimiter=',')
        row=['Sample','Spectrum','Fold','True_Comp']
        row.extend(range(1,nc+1))
        writer.writerow(row)
        for i in range(0,len(names_train)):
            row=[names_train[i],spect_index_train[i],folds_train[i],comps_train[i]]
            row.extend(train_predict_cv[i,:])
            writer.writerow(row)
    
    with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_train_predict.csv','w',newline='') as writefile:
        writer=csv.writer(writefile,delimiter=',')
        row=['Sample','Spectrum','Fold','True_Comp']
        row.extend(range(1,nc+1))
        writer.writerow(row)
        for i in range(0,len(names_train)):
            row=[names_train[i],spect_index_train[i],folds_train[i],comps_train[i]]
            row.extend(trainset_results[i,:])
            writer.writerow(row)
            
    with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_test_predict.csv','w',newline='') as writefile:
        writer=csv.writer(writefile,delimiter=',')
        row=['Sample','Spectrum','Fold','True_Comp']
        row.extend(range(1,nc+1))
        writer.writerow(row)
        for i in range(0,len(names_test)):
            row=[names_test[i],spect_index_test[i],folds_test[i],comps_test[i]]
            row.extend(testset_results[i,:])
            writer.writerow(row)
    
    with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_all_predict.csv','w',newline='') as writefile:
        writer=csv.writer(writefile,delimiter=',')
        row=['Sample','Spectrum','Fold','True_Comp']
        row.extend(range(1,nc+1))
        writer.writerow(row)
        for i in range(0,len(names)):
            row=[names[i],spect_index[i],folds[i],comps[i,compindex]]
            row.extend(results[i,:])
            writer.writerow(row)
            
    with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_beta_coeffs.csv','w',newline='') as writefile:
        writer=csv.writer(writefile,delimiter=',')
        row=['wvl']
        row.extend(range(1,nc+1))
        writer.writerow(row)
        for i in range(0,len(wvl)):
            row=[wvl[i]]
            row.extend(beta[i,:])
            writer.writerow(row)        
    
    if skscale==False:
        with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_meancenters.csv','w',newline='') as writefile:
            writer=csv.writer(writefile,delimiter=',')        
            writer.writerow([which_elem+' mean',Y_mean])
            for i in range(0,len(wvl)):
                row=[wvl[i],X_mean[i]]
                writer.writerow(row)
            
    with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_inputinfo.csv','w',newline='') as writefile:
        writer=csv.writer(writefile,delimiter=',')        
        writer.writerow(['Spectral database =',dbfile])
        writer.writerow(['Spectra Kept =',keepfile])
        writer.writerow(['Spectra Removed =',which_removed])
        writer.writerow(['Fold Definition =',foldfile])
        writer.writerow(['Test Fold =',maskfile])
        writer.writerow(['Mask File =',maskfile])
        writer.writerow(['Algorithm =',plstype_string])
        writer.writerow(['# of components =',nc])
        writer.writerow(['Normalization Type =',normtype])
        writer.writerow(['Composition Min. =',mincomp])
        writer.writerow(['Composition Max. =',maxcomp])
예제 #7
0
midnorm = 1
highnorm = 3

low_cutoff = 6
high_cutoff = 15

data, wvl, filelist = ccam.read_ccs(searchdir)
#pickle.dump(data, open( "ccamdata.pkl", "wb" ))
#pickle.dump(wvl,open( "ccamwvl.pkl", "wb" ))
#pickle.dump(filelist,open( "ccamfilelist.pkl", "wb" ))

#pickle.load(open( r"C:\Users\rbanderson\Documents\MSL\ChemCam\Data Processing\Working\ccam\ccamdata.pkl", "rb" ))
#pickle.load(open( r"C:\Users\rbanderson\Documents\MSL\ChemCam\Data Processing\Working\ccam\ccamwvl.pkl", "rb" ))
#pickle.load(open( r"C:\Users\rbanderson\Documents\MSL\ChemCam\Data Processing\Working\ccam\ccamfilelist.pkl", "rb" ))

targetlist, distslist, amplist = ccam.target_lookup(filelist, masterlist,
                                                    name_subs)

y_full = ccam.pls_unk(data_norm3,
                      nc_full,
                      coeff_file=coeff_file_full,
                      means_file=means_file_full)
y_low = ccam.pls_unk(data_norm3,
                     nc_low,
                     coeff_file=coeff_file_low,
                     means_file=means_file_low)
y_mid = ccam.pls_unk(data_norm1,
                     nc_mid,
                     coeff_file=coeff_file_mid,
                     means_file=means_file_mid)
y_high = ccam.pls_unk(data_norm3,
                      nc_high,
예제 #8
0
def pls_cal(dbfile,
            foldfile,
            maskfile,
            outpath,
            which_elem,
            testfold,
            nc,
            normtype=3,
            mincomp=0,
            maxcomp=100,
            plstype='mlpy',
            keepfile=None,
            removefile=None,
            cal_dir=None,
            masterlist_file=None,
            compfile=None,
            name_sub_file=None):

    print 'Reading database'
    sys.stdout.flush()
    spectra, comps, spect_index, names, labels, wvl = ccam.read_db(
        dbfile, compcheck=True)
    oxides = labels[2:]
    compindex = numpy.where(oxides == which_elem)[0]

    print 'Choosing spectra'
    which_removed = outpath + which_elem + '_' + plstype + '_nc' + str(
        nc) + '_norm' + str(normtype) + '_' + str(mincomp) + '-' + str(
            maxcomp) + '_removed.csv'
    spectra, names, spect_index, comps = ccam.choose_spectra(
        spectra,
        spect_index,
        names,
        comps,
        compindex,
        mincomp=mincomp,
        maxcomp=maxcomp,
        keepfile=keepfile,
        removefile=removefile,
        which_removed=which_removed)

    print 'Masking spectra'
    spectra, wvl = ccam.mask(spectra, wvl, maskfile)

    print 'Normalizing spectra'
    spectra = ccam.normalize(spectra, wvl, normtype=normtype)

    print 'Assigning Folds'
    folds = ccam.folds(foldfile, names)
    names_nofold = names[(folds == 0)]
    spect_index_nofold = spect_index[(folds == 0)]
    #write a file containing the samples not assigned to folds
    with open(which_removed, 'ab') as writefile:
        writer = csv.writer(
            writefile,
            delimiter=',',
        )
        for i in range(len(names_nofold)):
            writer.writerow(
                [names_nofold[i], spect_index_nofold[i], 'No Fold'])

    #remove spectra that are not assigned to any fold
    spectra = spectra[(folds != 0), :]
    spect_index = spect_index[(folds != 0)]
    names = names[(folds != 0)]
    comps = comps[(folds != 0), :]
    folds = folds[(folds != 0)]

    print 'Defining Training and Test Sets'
    spectra_train = spectra[(folds != testfold)]
    spect_index_train = spect_index[(folds != testfold)]
    names_train = names[(folds != testfold)]
    comps_train = comps[(folds != testfold), compindex]
    folds_train = folds[(folds != testfold)]
    folds_train_unique = numpy.unique(folds_train)

    spectra_test = spectra[(folds == testfold)]
    spect_index_test = spect_index[(folds == testfold)]
    names_test = names[(folds == testfold)]
    comps_test = comps[(folds == testfold), compindex]
    folds_test = folds[(folds == testfold)]

    print 'Do Leave One Label Out (LOLO) cross validation with all folds but the test set'
    #define array to hold cross validation predictions and RMSEs
    train_predict_cv = numpy.zeros((len(names_train), nc))
    RMSECV = numpy.zeros(nc)

    for i in folds_train_unique:
        print 'Holding out fold #' + str(i)
        #mean center those spectra left in
        #X_cv_in1,X_cv_in_mean1=meancenter.ccam_meancenter(spectra_train[(folds_train!=i),:])
        X_cv_in, X_cv_in_mean = ccam.meancenter(
            spectra_train[(folds_train != i), :])

        #and those left out
        X_cv_out = ccam.meancenter(spectra_train[(folds_train == i), :],
                                   X_mean=X_cv_in_mean)[0]

        #mean center compositions left in
        Y_cv_in, Y_cv_in_mean = ccam.meancenter(
            comps_train[(folds_train != i)])

        #step through each number of components
        for j in range(1, nc + 1):
            print 'Training PLS Model for ' + str(j) + ' components'
            #train the model
            if plstype == 'mlpy':
                PLS1model = mlpy.pls.PLS(j)
                PLS1model.learn(X_cv_in, Y_cv_in)

                #predict the samples held out
                train_predict_cv[(folds_train == i), j -
                                 1] = PLS1model.pred(X_cv_out) + Y_cv_in_mean
            if plstype == 'sklearn':
                PLS1model = PLSRegression(n_components=nc)
                PLS1model.fit(X_cv_in, Y_cv_in)
                train_predict_cv[
                    (folds_train == i),
                    j - 1] = PLS1model.predict(X_cv_out) + Y_cv_in_mean
    #calculate RMSECV
    for i in range(0, nc):
        sqerr = (train_predict_cv[:, i] - comps_train)**2.0
        RMSECV[i] = numpy.sqrt(numpy.mean(sqerr))

    #mean center full model
    X, X_mean = ccam.meancenter(spectra_train)
    X_test = ccam.meancenter(spectra_test, X_mean=X_mean)[0]

    Y, Y_mean = ccam.meancenter(comps_train)

    #create arrays for results and RMSEs
    trainset_results = numpy.zeros((len(names_train), nc))
    testset_results = numpy.zeros((len(names_test), nc))
    RMSEP = numpy.zeros(nc)
    RMSEC = numpy.zeros(nc)
    beta = numpy.zeros((len(X_mean), nc))

    #Now step through each # of components with the full model
    for j in range(1, nc + 1):
        print 'Training full model for ' + str(j) + ' components'
        if plstype == 'mlpy':
            PLS1model = mlpy.pls.PLS(j)
            PLS1model.learn(X, Y)
            beta[:, j - 1] = PLS1model.beta()
            trainset_results[:, j - 1] = PLS1model.pred(X) + Y_mean
            testset_results[:, j - 1] = PLS1model.pred(X_test) + Y_mean
        if plstype == 'sklearn':
            PLS1model = PLSRegression(n_components=nc)
            PLS1model.fit(X, Y)
            print 'stop'

        RMSEC[j - 1] = numpy.sqrt(
            numpy.mean((trainset_results[:, j - 1] - comps_train)**2.0))
        RMSEP[j - 1] = numpy.sqrt(
            numpy.mean((testset_results[:, j - 1] - comps_test)**2.0))

#if cal_dir is specified, read cal target data and calculate RMSEs
    if cal_dir != None:
        cal_data, cal_wvl, cal_filelist = ccam.read_ccs(cal_dir)
        cal_data, cal_wvl = ccam.mask(cal_data, cal_wvl, maskfile)
        cal_data = ccam.normalize(cal_data, cal_wvl, normtype=normtype)

        RMSEP_cal = numpy.zeros(nc)
        RMSEP_KGAMEDS = numpy.zeros(nc)
        RMSEP_MACUSANITE = numpy.zeros(nc)
        RMSEP_NAU2HIS = numpy.zeros(nc)
        RMSEP_NAU2LOS = numpy.zeros(nc)
        RMSEP_NAU2MEDS = numpy.zeros(nc)
        RMSEP_NORITE = numpy.zeros(nc)
        RMSEP_PICRITE = numpy.zeros(nc)
        RMSEP_SHERGOTTITE = numpy.zeros(nc)

        targets, dists, amps = ccam.target_lookup(cal_filelist,
                                                  masterlist_file,
                                                  name_sub_file)
        target_comps = ccam.target_comp_lookup(targets, compfile, which_elem)
        cal_results = numpy.zeros((len(targets), nc))

        for i in range(nc):
            comps_copy = copy.copy(target_comps)
            cal_results[:, i] = ccam.pls_unk(cal_data,
                                             i + 1,
                                             beta=beta[:, i],
                                             X_mean=X_mean,
                                             Y_mean=Y_mean)
            #RMSEP_cal[i]=numpy.sqrt(numpy.mean((cal_results[:,i]-target_comps)**2))
            cal_results[(comps_copy < mincomp), i] = 0
            cal_results[(comps_copy > maxcomp), i] = 0
            comps_copy[(comps_copy < mincomp)] = 0
            comps_copy[(comps_copy > maxcomp)] = 0
            RMSEP_KGAMEDS[i] = numpy.sqrt(
                numpy.mean((cal_results[(targets == 'KGAMEDS'), i] -
                            comps_copy[(targets == 'KGAMEDS')])**2))
            RMSEP_MACUSANITE[i] = numpy.sqrt(
                numpy.mean((cal_results[(targets == 'MACUSANITE'), i] -
                            comps_copy[(targets == 'MACUSANITE')])**2))
            RMSEP_NAU2HIS[i] = numpy.sqrt(
                numpy.mean((cal_results[(targets == 'NAU2HIS'), i] -
                            comps_copy[(targets == 'NAU2HIS')])**2))
            RMSEP_NAU2LOS[i] = numpy.sqrt(
                numpy.mean((cal_results[(targets == 'NAU2LOS'), i] -
                            comps_copy[(targets == 'NAU2LOS')])**2))
            RMSEP_NAU2MEDS[i] = numpy.sqrt(
                numpy.mean((cal_results[(targets == 'NAU2MEDS'), i] -
                            comps_copy[(targets == 'NAU2MEDS')])**2))
            RMSEP_NORITE[i] = numpy.sqrt(
                numpy.mean((cal_results[(targets == 'NORITE'), i] -
                            comps_copy[(targets == 'NORITE')])**2))
            RMSEP_PICRITE[i] = numpy.sqrt(
                numpy.mean((cal_results[(targets == 'PICRITE'), i] -
                            comps_copy[(targets == 'PICRITE')])**2))
            RMSEP_SHERGOTTITE[i] = numpy.sqrt(
                numpy.mean((cal_results[(targets == 'SHERGOTTITE'), i] -
                            comps_copy[(targets == 'SHERGOTTITE')])**2))
        n_good_cal = len(numpy.unique(comps_copy)) - 1
        RMSEP_cal = (RMSEP_KGAMEDS + RMSEP_MACUSANITE + RMSEP_NAU2HIS +
                     RMSEP_NAU2LOS + RMSEP_NAU2MEDS + RMSEP_NORITE +
                     RMSEP_PICRITE + RMSEP_SHERGOTTITE) / n_good_cal
        RMSEP_single_cals = [
            RMSEP_KGAMEDS, RMSEP_MACUSANITE, RMSEP_NAU2HIS, RMSEP_NAU2LOS,
            RMSEP_NAU2MEDS, RMSEP_NORITE, RMSEP_PICRITE, RMSEP_SHERGOTTITE,
            RMSEP_cal
        ]

        with open(
                outpath + which_elem + '_' + str(mincomp) + '-' +
                str(maxcomp) + '_' + plstype + '_nc' + str(nc) + '_norm' +
                str(normtype) + '_caltargets_predict.csv', 'wb') as writefile:
            writer = csv.writer(writefile, delimiter=',')
            row = ['File', 'Target', 'Laser Energy', 'True_Comp']
            row.extend(range(1, nc + 1))
            writer.writerow(row)
            for i in range(0, len(targets)):
                row = [cal_filelist[i], targets[i], amps[i], target_comps[i]]
                row.extend(cal_results[i, :])
                writer.writerow(row)
        with open(
                outpath + which_elem + '_' + str(mincomp) + '-' +
                str(maxcomp) + '_' + plstype + '_nc' + str(nc) + '_norm' +
                str(normtype) + '_RMSECP_caltargets.csv', 'wb') as writefile:
            writer = csv.writer(writefile, delimiter=',')
            writer.writerow(['NC', 'RMSECP Cal Targets (wt.%)'])
            for i in range(0, nc):
                writer.writerow([i + 1, RMSEP_cal[i]])
        ccam.plots.RMSE(RMSECV,
                        RMSEP,
                        RMSEC,
                        which_elem + ' RMSEs',
                        outpath + which_elem + '_' + str(mincomp) + '-' +
                        str(maxcomp) + '_' + plstype + '_nc' + str(nc) +
                        '_norm' + str(normtype) + '_RMSE_plot_cal.png',
                        RMSEP_cals=RMSEP_single_cals)

    # plot RMSEs
    ccam_plots.ccam_plot_RMSE(
        RMSECV, RMSEP, RMSEC, which_elem + 'RMSEs', outpath + which_elem +
        '_' + plstype + '_nc' + str(nc) + '_norm' + str(normtype) + '_' +
        str(mincomp) + '-' + str(maxcomp) + '_RMSE_plot.png')

    #Write output info to files
    print outpath + which_elem + '_' + plstype + '_nc' + str(
        nc) + '_norm' + str(normtype) + '_' + str(mincomp) + '-' + str(
            maxcomp) + '_RMSECV.csv'
    with open(
            outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' +
            str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) +
            '_RMSECV.csv', 'wb') as writefile:
        writer = csv.writer(writefile, delimiter=',')
        writer.writerow(['NC', 'RMSECV (wt.%)'])
        for i in range(0, nc):
            writer.writerow([i + 1, RMSECV[i]])

    with open(
            outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' +
            str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) +
            '_RMSEC.csv', 'wb') as writefile:
        writer = csv.writer(writefile, delimiter=',')
        writer.writerow(['NC', 'RMSEC (wt.%)'])
        for i in range(0, nc):
            writer.writerow([i + 1, RMSEC[i]])

    with open(
            outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' +
            str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) +
            '_RMSEP.csv', 'wb') as writefile:
        writer = csv.writer(writefile, delimiter=',')
        writer.writerow(['NC', 'RMSEP (wt.%)'])
        for i in range(0, nc):
            writer.writerow([i + 1, RMSEP[i]])

    with open(
            outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' +
            str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) +
            '_cv_predict.csv', 'wb') as writefile:
        writer = csv.writer(writefile, delimiter=',')
        row = ['Sample', 'Spectrum', 'Fold', 'True_Comp']
        row.extend(range(1, nc + 1))
        writer.writerow(row)
        for i in range(0, len(names_train)):
            row = [
                names_train[i], spect_index_train[i], folds_train[i],
                comps_train[i]
            ]
            row.extend(train_predict_cv[i, :])
            writer.writerow(row)

    with open(
            outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' +
            str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) +
            '_train_predict.csv', 'wb') as writefile:
        writer = csv.writer(writefile, delimiter=',')
        row = ['Sample', 'Spectrum', 'Fold', 'True_Comp']
        row.extend(range(1, nc + 1))
        writer.writerow(row)
        for i in range(0, len(names_train)):
            row = [
                names_train[i], spect_index_train[i], folds_train[i],
                comps_train[i]
            ]
            row.extend(trainset_results[i, :])
            writer.writerow(row)

    with open(
            outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' +
            str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) +
            '_test_predict.csv', 'wb') as writefile:
        writer = csv.writer(writefile, delimiter=',')
        row = ['Sample', 'Spectrum', 'Fold', 'True_Comp']
        row.extend(range(1, nc + 1))
        writer.writerow(row)
        for i in range(0, len(names_test)):
            row = [
                names_test[i], spect_index_test[i], folds_test[i],
                comps_test[i]
            ]
            row.extend(testset_results[i, :])
            writer.writerow(row)

    with open(
            outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' +
            str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) +
            '_beta_coeffs.csv', 'wb') as writefile:
        writer = csv.writer(writefile, delimiter=',')
        row = ['wvl']
        row.extend(range(1, nc + 1))
        writer.writerow(row)
        for i in range(0, len(wvl)):
            row = [wvl[i]]
            row.extend(beta[i, :])
            writer.writerow(row)

    with open(
            outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' +
            str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) +
            '_meancenters.csv', 'wb') as writefile:
        writer = csv.writer(writefile, delimiter=',')
        writer.writerow([which_elem + ' mean', Y_mean])
        for i in range(0, len(wvl)):
            row = [wvl[i], X_mean[i]]
            writer.writerow(row)

    with open(
            outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' +
            str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) +
            '_inputinfo.csv', 'wb') as writefile:
        writer = csv.writer(writefile, delimiter=',')
        writer.writerow(['Spectral database =', dbfile])
        writer.writerow(['Spectra Kept =', keepfile])
        writer.writerow(['Spectra Removed =', which_removed])
        writer.writerow(['Fold Definition =', foldfile])
        writer.writerow(['Test Fold =', maskfile])
        writer.writerow(['Mask File =', maskfile])
        writer.writerow(['Algorithm =', plstype])
        writer.writerow(['# of components =', nc])
        writer.writerow(['Normalization Type =', normtype])
        writer.writerow(['Composition Min. =', mincomp])
        writer.writerow(['Composition Max. =', maxcomp])
예제 #9
0
def read_ccs(searchdir,
             skiprows=0,
             shots=False,
             masterlist=None,
             name_sub_file=None,
             singlefile=False):  #,minsol=0,maxsol=10000,masterlist=None):
    if singlefile is False:
        filelist, files = search_ccs(searchdir)
    if singlefile is True:
        filelist = numpy.array([searchdir])
        files = [filelist[0][-40:]]

    if shots is True:
        file_targets, file_dists, file_amps, nshots = ccam.target_lookup(
            filelist, masterlist, name_sub_file)
        nshots = numpy.array(nshots, dtype='int')
        sum_shots = numpy.sum(nshots)
    if singlefile is False:
        print 'Reading ' + str(len(filelist)) + ' files...'
    if shots is not True:
        means = numpy.zeros([len(filelist), 6144], dtype='float64')
    if shots is True:
        singleshots = numpy.zeros([6144, sum_shots], dtype='float64')
        files_singleshot = numpy.zeros_like([files[0]] * sum_shots)
        shotnums = numpy.zeros([sum_shots])
        rowcount = 0
    for i in range(len(filelist)):
        if singlefile is False:
            if numpy.mod(i + 1, 100) == 0:
                print 'Reading file #' + str(i + 1)

        tempdata = ccam.read_csv(filelist[i], skiprows, labelrow=False)

        wvl = numpy.array(tempdata[:, 0], dtype='float')
        if shots is False:
            means[i, :] = tempdata[:, -1]
        if shots is True:
            shotnums[rowcount:rowcount + nshots[i]] = range(nshots[i])
            files_singleshot[rowcount:rowcount + nshots[i]] = files[i]
            singleshots[:, rowcount:rowcount + nshots[i]] = tempdata[:, 1:-2]
            rowcount = rowcount + nshots[i]


#        if i==0:
#            wvl=numpy.array(tempdata[:,0],dtype='float64')
#            if shots is True:
#                singleshots=numpy.array(tempdata[:,1:-2],dtype='float64')
#                shotnums=numpy.array(range(len(tempdata[:,1:-2])))
#                files_singleshot=numpy.array([files[i]]*len(tempdata[:,1:-2]))
#            medians=numpy.array(tempdata[:,-2],dtype='float64')
#            means=numpy.array(tempdata[:,-1],dtype='float64')
#
#        if i>0:
#            if shots is True:
#                singleshots=numpy.vstack([singleshots,numpy.array(tempdata[:,1:-2],dtype='float64')])
#                shotnums=numpy.hstack([shotnums,numpy.array(range(len(tempdata[:,1:-2])))])
#                files_singleshot=numpy.hstack([files_singleshot,numpy.array([files[i]]*len(tempdata[:,1:-2]))])
#
#            medians=numpy.vstack([medians,numpy.array(tempdata[:,-2],dtype='float64')])
#            means=numpy.vstack([means,numpy.array(tempdata[:,-1],dtype='float64')])

    if shots is True:
        singleshots = numpy.transpose(singleshots)
        return singleshots, wvl, files_singleshot, shotnums
    if shots is False:
        return means, wvl, files