def generateKernelMatrix(DataSet): """Creates a kernel matrix/gram matrix from an input dataset, which is a list of examples""" n_samples = len(DataSet) kernelMatrix = np.empty([n_samples, n_samples], dtype="string") PatternIds = np.empty([n_samples, 1], dtype="string") Labels = np.empty([n_samples, 1], dtype="string") for i in xrange(n_samples): (label, tps, pID, A1) = DataSet[i] PatternIds[i, 0] = pID Labels[i, 0] = label for i in xrange(n_samples): for j in xrange(n_samples): (label1, tps, pID, A1) = DataSet[i] (label2, tps, pID, A2) = DataSet[j] kernelMatrix[i, j] = str(ger.GERPKernel(A1, A2)) kernelFileMatrix = np.concatenate(PatternIds, kernelMatrix) labelMatrix = np.concatenate(PatternIds, Labels) np.savetxt("labelText.txt", labelMatrix, delimiter=',') np.savetxt("kernelText.txt", kernelFileMatrix, delimiter=',') labels = ml.Labels("labelText.txt") kdata = ml.kernelData("kernelText.txt") kdata.attachLabels(labels) return kdata
def prepare_train_set(mgrs): assert (len(mgrs) > 0) l = PyML.Labels([i.get_param('epoch_name') for i in mgrs]) train_data = zeros((len(mgrs), len(mgrs[0].get_samples()[0]))) for i, mgr in enumerate(mgrs): # Only the first channel is taken to consideration train_data[i, :] = mgr.get_samples()[0] train_set = PyML.VectorDataSet(train_data, L=l) return train_set
def vecSplit(vecName='',fracTrain=0.3): """ Splits a vector/csv fotmatted SVMLIB file into training and test sets by row according to numTrain and numTest and writes out the resulting files. Existing files are overwritten. q - numTest + numTrain should equal the number of lines in vecName minus the one line header. - Returns 'Done.' [08/16/2011]: a major change - instead of taking numTest and numTrain directly fracTrain (the fraction of trials destined for taining) was added as a invocation arg. numTrain/numTest are now discovered. An unmodified (commented out) version of the old function was left in the source. """ ## Calc numbers of features for ## training and testing data vecData = ml.VectorDataSet(vecName,labelsColumn=1,idColumn=0) numTrain = int(vecData.numFeatures * fracTrain) numTest = vecData.numFeatures - numTrain ## Create filenames of train and test data taht will ## be written soon... ## Remove 'vec' from vecName so a more informative, ## less redundant, names can be created. tmpName = str.split(vecName,'vec') trainName = open('vec_train_{0}{1}'.format(numTrain,tmpName[-1]), 'w') testName = open('vec_test_{0}{1}'.format(numTest,tmpName[-1]), 'w') ## Randomly select features for either ## training or testing. sampler = np.asarray([1] * numTrain + [2] * numTest) np.random.shuffle(sampler) ## Create indices from 'sampler' featureIndex = np.arange(len(sampler)) invertTrainIndex = featureIndex[sampler == 2] invertTestIndex = featureIndex[sampler == 1] print('trainIndex: {0}'.format(invertTrainIndex)) print('testIndex: {0}'.format(invertTestIndex)) ## Use trainIndex or testIndex to eliminate features, ## deepcopy the vecData first; eliminateFeatures() ## operates in place. trainData = ml.VectorDataSet(vecData) trainData.eliminateFeatures(invertTrainIndex.tolist()) trainData.save(trainName) testData = ml.VectorDataSet(vecData) testData.eliminateFeatures(invertTestIndex.tolist()) testData.save(testName)
def on_train_clicked(self, widget): # FOR NOW, only SVM is supported if self.sClassifier == "SVM": filen = self.getFilenameToRead("Open training data", filter='mat') if filen is not None: data = ml.VectorDataSet(filen, labelsColumn=0) self.clssfr = ml.SVM() self.clssfr.train(data) # train finished. need to update button status self.setDisabledBtns() self.showMessage("Training SVM is done.") else: self.showMessage("The classifier is not supported yet!")
def generateKernelMatrix(DataSet): """Creates a kernel matrix/gram matrix from an input dataset, which is a list of examples""" n_samples = len(DataSet) kernelMatrix = np.empty([n_samples,n_samples], dtype = "string") PatternIds = np.empty([n_samples,1],dtype = "string") Labels = np.empty([n_samples,1],dtype = "string") for i in xrange(n_samples): (label, tps, pID, A1) = DataSet[i] PatternIds[i,0] = pID Labels[i,0] = label for i in xrange(n_samples): for j in xrange(n_samples): (label1, tps, pID, A1) = DataSet[i] (label2, tps, pID, A2) = DataSet[j] kernelMatrix[i,j] = str(ger.GERPKernel(A1,A2)) kernelFileMatrix = np.concatenate(PatternIds, kernelMatrix) labelMatrix = np.concatenate(PatternIds, Labels) np.savetxt("labelText.txt", labelMatrix, delimiter = ',') np.savetxt("kernelText.txt", kernelFileMatrix,delimiter=',') labels = ml.Labels("labelText.txt") kdata = ml.kernelData("kernelText.txt") kdata.attachLabels(labels) return kdata
def on_detect_clicked(self, widget): if self.clssfr is not None: filen = self.getFilenameToRead("Open hurricane data", filter='mdat') if filen is not None: fname = os.path.basename(filen) key, ext = os.path.splitext(fname) if ext == '.dat': key = key[1:] # take 'g' out #testData = gdtool.createMat(self.datapath, self.trackpath, key, key) #result = self.clssfr.test(ml.VectorDataSet(testData,labelsColumn=0)) tmpfn = 'f__tmpDetected__' if os.path.exists(tmpfn): os.unlink(tmpfn) # for DEMO, undersampled the normal data -- without undersampling there are too many candidates gdtool.createMat(self.datapath, self.trackpath, key, key, store=tmpfn, undersample=True, genkeyf=True) bneedDel = True else: tmpfn = fname bneedDel = False result = self.clssfr.test( ml.VectorDataSet(tmpfn, labelsColumn=0)) gdkeyfilen = ''.join([tmpfn, '.keys']) with open(gdkeyfilen, 'r') as f: gridkeys = pickle.load(f) self.stormlocs = pickle.load(f) predicted = result.getPredictedLabels() predicted = np.array(map(float, predicted)) self.detected = np.array(gridkeys)[predicted == 1] if bneedDel: os.unlink(tmpfn) os.unlink(gdkeyfilen) snstroms = str(self.stormlocs.shape[0]) sndetected = str(self.detected.shape[0]) self.chkHurricane.set_label(snstroms + " Hurricanes") self.chkDetected.set_label(sndetected + " Detected") self.showMessage(''.join([ sndetected, "/", snstroms, " grids are predicted to have hurricane." ])) if False: with open('demo.detected', 'w') as f: pickle.dump(self.detected, f) pickle.dump(self.stormlocs, f) # test data tested. update buttons self.setDisabledBtns() self.redraw() else: self.showMessage("There is no trained classifier!")
def runSVM(trainF, testF): """ [6/27/2011]: This script serves little purpose as it takes to long to interate over 0-10,000 to optimize C automatically. It is more effiecinetly done by hand (for now at least). Go! """ import PyML as ml import numpy as np znorm = ml.Standardizer() # Init the SVM s = SVM() print(s) # Reformat the data to csv (aka Vector) so feature based # normalization can occur, then normalize that train and # test data test = ml.SparseDataSet(testF) vectestF = 'vec_' + testF test.save(vectestF, format='csv') vecTest = ml.VectorDataSet(vectestF, labelsColumn=1, idColumn=0) znorm.train(vecTest) trainedSVM = [] # Returns a SVM trained class if isinstance(trainF, str): train = ml.SparseDataSet(trainF) vectrainF = 'vec_' + trainF train.save(vectrainF, format='csv') vecTrain = ml.VectorDataSet(vectrainF, labelsColumn=1, idColumn=0) znorm.train(vecTrain) # Optimize C param = ml.modelSelection.Param(s, 'C', list(np.arange(0, 10000, .5))) m = ml.modelSelection.ModelSelector(param) trainedSVM = m.train(vecTrain) # Optimize C trainedSVM.save('svm_' + testF) else: trainedSVM = trainF cross = trainedSVM.stratifiedCV(trainedSVM, 10) results = trainedSVM(vecTest) return results
def generateKernelMatrix(TestData, TrainingData, sigma=10, lam=math.pow(10, -3), nu=0.5, kernel="twed"): """Creates a kernel matrix/gram matrix from an input dataset, which is a list of examples""" n_samplesTest = len(TestData) n_samplesTrain = len(TrainingData) kernelMatrix = np.empty([n_samplesTest, n_samplesTrain], dtype="float") PatternIds = np.empty([n_samplesTest, 1], dtype=object) Labels = np.empty([n_samplesTest, 1], dtype=object) for i in xrange(n_samplesTest): (label, tps, pID, A1) = TestData[i] PatternIds[i, 0] = pID Labels[i, 0] = label for i in xrange(n_samplesTest): for j in xrange(n_samplesTrain): (label1, tps, pID, A1) = TestData[i] (label2, tps, pID, A2) = TrainingData[j] if (kernel == "gerp"): kernelMatrix[i, j] = str(ger.GERPKernel(A1, A2, sigma)) elif (kernel == "twed"): kernelMatrix[i, j] = str( twed.TwedKernel(TestData[i], TrainingData[j], lam, nu, sigma)) kernelFileMatrix = np.concatenate((PatternIds, kernelMatrix), axis=1) labelMatrix = np.concatenate((PatternIds, Labels), axis=1) np.savetxt("labelText.txt", labelMatrix, fmt='%s', delimiter=',') np.savetxt("kernelText.txt", kernelFileMatrix, fmt="%s", delimiter=',') f1 = "labelText.txt" f2 = "kernelText.txt" labels = ml.Labels(f1) kdata = ml.KernelData(f2) kdata.attachLabels(labels) return kdata
def createVectorDataSet(A): """Converts a traing example list to a vector dataset""" labels = [] patterns = [] X = [] for (label, tps, pID, B) in A: labels.append(label) patterns.append(pID) X.append(B) data = ml.VectorDataSet(X, L=labels, patternID=patterns) return data
def zSparse(fname): """ Converts a sparse formated SVMLIB data file to Vector/CSV format and then znomralizes on a feature basis and writes out that file as fname. """ znorm = ml.Standardizer() sparse = ml.SparseDataSet(fname) sparse.save('temp',format='csv') vec = ml.VectorDataSet('temp',labelsColumn=1,idColumn=0) znorm.train(vec) vecName = 'vec_' + fname # Verbal overwrite of priors if os.path.exists(vecName): print('Overwriting {0}.'.format(vecName)) os.remove(vecName) vec.save(vecName)
def test_normalize(self): if not self.TEST_NORMALIZE: return mgr = get_fake_manager(3, 10) e = ch.Normalize(norm=2) new_mgr = e.process([mgr])[0] import PyML data = PyML.VectorDataSet(mgr.get_samples()) data.normalize(2) for i in range(3): for j in range(10): self.assertAlmostEqual(new_mgr.get_samples()[i, j], data.getMatrix()[i, j]) LOGGER.info("Normalize tested!")
def _p300_verify_svm_one_fold(t_train_mgrs, nt_train_mgrs, t_test_mgrs, nt_test_mgrs, non_target_per_target, C, Cmode, kernel): assert (len(t_test_mgrs) * non_target_per_target == len(nt_test_mgrs)) s = PyML.svm.SVM(C=C, Cmode=Cmode, arg=kernel) # Train classifier on a train set... train_data = t_train_mgrs + nt_train_mgrs train_vect = zeros((len(train_data), len(train_data[0].get_samples()[0]))) train_labels_vect = [] for i, mgr in enumerate(train_data): train_labels_vect.append(mgr.get_param('epoch_name')) train_vect[i, :] = mgr.get_samples()[0] s.train(PyML.VectorDataSet(train_vect, L=PyML.Labels(train_labels_vect))) # test classifier on a test set # grab two elements of target test set and 2*non_target_per_target elements # of non-target test set .... succ = 0 fail = 0 i = 0 while i + 1 < len(t_test_mgrs): t1 = t_test_mgrs[i] ns1 = nt_test_mgrs[i * non_target_per_target:(i + 1) * non_target_per_target] whatever, t1_value = s.classify(PyML.VectorDataSet(t1.get_samples()), 0) ns1_value = max([ s.classify(PyML.VectorDataSet(n1.get_samples()), 0)[1] for n1 in ns1 ]) t2 = t_test_mgrs[i + 1] ns2 = nt_test_mgrs[(i + 1) * non_target_per_target:(i + 2) * non_target_per_target] whatever, t2_value = s.classify(PyML.VectorDataSet(t2.get_samples()), 0) ns2_value = max([ s.classify(PyML.VectorDataSet(n2.get_samples()), 0)[1] for n2 in ns2 ]) # Check if the decision was good ... if t1_value > ns1_value and t2_value > ns2_value: succ += 1 else: fail += 1 i += 2 return succ, fail
def on_load_clicked(self, widget): filen = self.getFilenameToRead("Load Classifier", filter='svm') if filen is not None: #db = shelve.open(filen) #if db.has_key('clssfr'): # self.clssfr = db['clssfr'] #else: # self.showMessage("Cannot find a classifier!") #db.close() #with open(filen, 'wb') as f: # self.clssfr = pickle.load(f) datfn = self.getFilenameToRead("Open Training Data", filter='mat') if datfn is not None: data = ml.VectorDataSet(datfn, labelsColumn=0) self.clssfr = loadSVM(filen, data) ## Why do I need to feed data ??? #self.clssfr = loadSVM(filen,None) ## edited PyML for this # classifier has been loaded. need to update button status self.setDisabledBtns() self.showMessage("The classifier has been loaded!")
def make_sfr_radius_mass_plots(msF,merF,rfiter=3): for sk,fk in zip(snap_keys,fil_keys): parameters, pcd, pc, pcd = make_pc_dict(msF,sk,fk) pc1 = pc.X[:,0].flatten() pc2 = pc.X[:,1].flatten() pc3 = pc.X[:,2].flatten() pc4 = pc.X[:,3].flatten() pc5 = pc.X[:,4].flatten() pc6 = pc.X[:,5].flatten() pc7 = pc.X[:,6].flatten() PCs=pandas.DataFrame(pc.X) asym = get_all_morph_val(msF,sk,fk,'ASYM') gini = get_all_morph_val(msF,sk,fk,'GINI') m20 = get_all_morph_val(msF,sk,fk,'M20') cc = get_all_morph_val(msF,sk,fk,'CC') Mstat = get_all_morph_val(msF,sk,fk,'MID1_MPRIME') Istat = get_all_morph_val(msF,sk,fk,'MID1_ISTAT') Dstat = get_all_morph_val(msF,sk,fk,'MID1_DSTAT') sfid = get_all_snap_val(msF,sk,'SubfindID') S_GM20 = SGM20(gini,m20) F_GM20 = FGM20(gini,m20) latest_NumMajorMergersLastGyr = get_mergerinfo_val(merF,sk,'latest_NumMajorMergersLastGyr') boolean_merger1 = latest_NumMajorMergersLastGyr >= 1.0 this_NumMajorMergersLastGyr = get_mergerinfo_val(merF,sk,'this_NumMajorMergersLastGyr') boolean_merger2 = this_NumMajorMergersLastGyr >= 1.0 mhalo = get_all_snap_val(msF,sk,'Mhalo_Msun') mstar = get_all_snap_val(msF,sk,'Mstar_Msun') log_mstar_mhalo = np.log10( mstar/mhalo ) redshift = msF['nonparmorphs'][sk][fk]['CAMERA0']['REDSHIFT'].value[0] #set up RF data frame above, run or save input/output for each loop iteration rf_dict = {} PARAMS_MOD=True PARAMS_ONLY=False PCS_ONLY=False RUN_RF=True RF_ITER=rfiter rf_masscut = 10.0**(10.5) if PCS_ONLY is True: gi = np.where(np.isfinite(pc1)*np.isfinite(pc2)*np.isfinite(pc3)*np.isfinite(pc4)*np.isfinite(pc5)*np.isfinite(pc6)*np.isfinite(pc7)*(mstar >= rf_masscut) != 0)[0] print(gi.shape, pc1.shape) rf_dict['pc1']=pc1[gi] rf_dict['pc2']=pc2[gi] rf_dict['pc3']=pc3[gi] rf_dict['pc4']=pc4[gi] rf_dict['pc5']=pc5[gi] rf_dict['pc6']=pc6[gi] rf_dict['pc7']=pc7[gi] rf_dict['mergerFlag']=boolean_merger1[gi] rf_dict['SubfindID']=sfid[gi] cols=['pc1','pc2','pc3','pc4','pc5','pc6','pc7'] rflabel='pcs' if PARAMS_ONLY is True: gi = np.where(np.isfinite(gini)*np.isfinite(m20)*np.isfinite(asym)*np.isfinite(Mstat)*np.isfinite(Istat)*np.isfinite(Dstat)*np.isfinite(cc)*(mstar >= rf_masscut) != 0)[0] print(gi.shape, pc1.shape) rf_dict['gini']=gini[gi] rf_dict['m20']=m20[gi] rf_dict['asym']=asym[gi] rf_dict['Mstat']=Mstat[gi] rf_dict['Istat']=Istat[gi] rf_dict['Dstat']=Dstat[gi] rf_dict['cc']=cc[gi] rf_dict['mergerFlag']=boolean_merger1[gi] rf_dict['SubfindID']=sfid[gi] cols=['gini','m20','asym','Mstat','Istat','Dstat','cc'] rflabel='params' if PARAMS_MOD is True: gi = np.where(np.isfinite(S_GM20)*np.isfinite(F_GM20)*np.isfinite(asym)*np.isfinite(Mstat)*np.isfinite(Istat)*np.isfinite(Dstat)*np.isfinite(cc)*(mstar >= rf_masscut) != 0)[0] print(gi.shape, pc1.shape) rf_dict['dGM20']=S_GM20[gi] rf_dict['fGM20']=F_GM20[gi] rf_dict['asym']=asym[gi] rf_dict['Mstat']=Mstat[gi] rf_dict['Istat']=Istat[gi] rf_dict['Dstat']=Dstat[gi] rf_dict['cc']=cc[gi] rf_dict['mergerFlag']=boolean_merger1[gi] rf_dict['SubfindID']=sfid[gi] cols=['dGM20','fGM20','asym','Mstat','Istat','Dstat','cc'] rflabel='paramsmod' if RUN_RF is True: if redshift < 4.2: df=pandas.DataFrame(rf_dict) print("Running Random Forest... ", sk, fk) result, labels, label_probability = PyML.randomForestMC(df,iterations=RF_ITER,cols=cols) #result = summary statistics, feature importances (N iterations x N statistics/importances) #labels = labels following random forest (N galaxies x N iterations) #label_probability = probability of label following random forest (N galaxies x N iterations) #saves the output as a file if not os.path.lexists('rfoutput'): os.mkdir('rfoutput') labels['mergerFlag']=df['mergerFlag'] label_probability['mergerFlag']=df['mergerFlag'] labels['SubfindID']=df['SubfindID'] label_probability['SubfindID']=df['SubfindID'] df.to_pickle('rfoutput/'+rflabel+'_data_cut_{}_{}.pkl'.format(sk,fk)) result.to_pickle('rfoutput/'+rflabel+'_result_cut_{}_{}.pkl'.format(sk,fk)) labels.to_pickle('rfoutput/'+rflabel+'_labels_cut_{}_{}.pkl'.format(sk,fk)) label_probability.to_pickle('rfoutput/'+rflabel+'_label_probability_cut_{}_{}.pkl'.format(sk,fk)) PCs.to_pickle('rfoutput/'+rflabel+'_pc_cut_{}_{}.pkl'.format(sk,fk)) bins=18 xlim=[9.7,12.2] ylim=[-2.0,3.0] rlim=[0.1,1.7] plot_filen = 'pc1/sfr_radius_mass_'+sk+'_'+fk+'_pc1.pdf' if not os.path.lexists('pc1'): os.mkdir('pc1') f1 = pyplot.figure(figsize=(3.5,5.0), dpi=300) pyplot.subplots_adjust(left=0.15, right=0.98, bottom=0.08, top=0.88,wspace=0.0,hspace=0.0) colorobj = plot_sfr_radius_mass(msF,merF,sk,fk,f1,xlim=xlim,ylim=ylim,rlim=rlim,Cval=pc1,vmin=-2,vmax=3,bins=bins) gth.make_colorbar(colorobj,title='PC1 morphology',ticks=[-2,-1,0,1,2,3]) f1.savefig(plot_filen,dpi=300) pyplot.close(f1) plot_filen = 'pc3/sfr_radius_mass_'+sk+'_'+fk+'_pc3.pdf' if not os.path.lexists('pc3'): os.mkdir('pc3') f1 = pyplot.figure(figsize=(3.5,5.0), dpi=300) pyplot.subplots_adjust(left=0.15, right=0.98, bottom=0.08, top=0.88,wspace=0.0,hspace=0.0) colorobj = plot_sfr_radius_mass(msF,merF,sk,fk,f1,xlim=xlim,ylim=ylim,rlim=rlim,Cval=pc3,vmin=-1,vmax=3,bins=bins) gth.make_colorbar(colorobj,title='PC3 morphology',ticks=[-1,0,1,2,3]) f1.savefig(plot_filen,dpi=300) pyplot.close(f1) plot_filen = 'asym/sfr_radius_mass_'+sk+'_'+fk+'_asym.pdf' if not os.path.lexists('asym'): os.mkdir('asym') f1 = pyplot.figure(figsize=(3.5,5.0), dpi=300) pyplot.subplots_adjust(left=0.15, right=0.98, bottom=0.08, top=0.88,wspace=0.0,hspace=0.0) colorobj = plot_sfr_radius_mass(msF,merF,sk,fk,f1,xlim=xlim,ylim=ylim,rlim=rlim,Cval=asym,vmin=0.0,vmax=0.4,bins=bins) gth.make_colorbar(colorobj,title='Asymmetry',ticks=[0.0,0.20,0.40]) f1.savefig(plot_filen,dpi=300) pyplot.close(f1) plot_filen = 'merger1/sfr_radius_mass_'+sk+'_'+fk+'_merger1.pdf' if not os.path.lexists('merger1'): os.mkdir('merger1') f1 = pyplot.figure(figsize=(3.5,5.0), dpi=300) pyplot.subplots_adjust(left=0.15, right=0.98, bottom=0.08, top=0.88,wspace=0.0,hspace=0.0) colorobj = plot_sfr_radius_mass(msF,merF,sk,fk,f1,xlim=xlim,ylim=ylim,rlim=rlim,Cval=boolean_merger1,min_bin=3,gridf='fraction_grid',vmin=0.0,vmax=0.5,bins=bins) gth.make_colorbar(colorobj,title='fraction major merger',ticks=[0.0,0.25,0.50],format='%.2f') f1.savefig(plot_filen,dpi=300) pyplot.close(f1) plot_filen = 'merger3/sfr_radius_mass_'+sk+'_'+fk+'_merger3.pdf' if not os.path.lexists('merger3'): os.mkdir('merger3') f1 = pyplot.figure(figsize=(3.5,5.0), dpi=300) pyplot.subplots_adjust(left=0.15, right=0.98, bottom=0.08, top=0.88,wspace=0.0,hspace=0.0) colorobj = plot_sfr_radius_mass(msF,merF,sk,fk,f1,xlim=xlim,ylim=ylim,rlim=rlim,Cval=boolean_merger1,min_bin=3,gridf='normed_proportion_grid',vmin=0.0,vmax=1.0,bins=bins) gth.make_colorbar(colorobj,title='proportion of major mergers',ticks=[0.0,0.5,1.0],format='%.2f') f1.savefig(plot_filen,dpi=300) pyplot.close(f1) plot_filen = 'mstar_mhalo/sfr_radius_mass_'+sk+'_'+fk+'_mstar_mhalo.pdf' if not os.path.lexists('mstar_mhalo'): os.mkdir('mstar_mhalo') f1 = pyplot.figure(figsize=(3.5,5.0), dpi=300) pyplot.subplots_adjust(left=0.15, right=0.98, bottom=0.08, top=0.88,wspace=0.0,hspace=0.0) colorobj = plot_sfr_radius_mass(msF,merF,sk,fk,f1,xlim=xlim,ylim=ylim,rlim=rlim,Cval=log_mstar_mhalo,min_bin=3,gridf='median_grid',vmin=-2.0,vmax=-0.5,bins=bins) gth.make_colorbar(colorobj,title='median $log_{10} M_*/M_{h}$',ticks=[-2,-1.5,-1,-0.5]) f1.savefig(plot_filen,dpi=300) pyplot.close(f1) plot_filen = 'mhalo/sfr_radius_mass_'+sk+'_'+fk+'_mhalo.pdf' if not os.path.lexists('mhalo'): os.mkdir('mhalo') f1 = pyplot.figure(figsize=(3.5,5.0), dpi=300) pyplot.subplots_adjust(left=0.15, right=0.98, bottom=0.08, top=0.88,wspace=0.0,hspace=0.0) colorobj = plot_sfr_radius_mass(msF,merF,sk,fk,f1,xlim=xlim,ylim=ylim,rlim=rlim,Cval=np.log10(mhalo),min_bin=3,gridf='median_grid',vmin=11.5,vmax=14.0,bins=bins) gth.make_colorbar(colorobj,title='median $log_{10} M_{h}$',ticks=[11.5,12.0,13.0,14.0]) f1.savefig(plot_filen,dpi=300) pyplot.close(f1) return locals()
#AXIS = 'x' AXIS = 'y' # Hardcoded constants tuneX = 17.8509864542659 tuneY = 6.74232980750181 ref_freq = 10#9.979248046875 fs = 150 #Hz if __name__ == '__main__': if len(sys.argv) > 1: ref_freq = float(sys.argv[1]) plt.close('all') pml = PyML.PyML() pml.setao(pml.loadFromExtern('../external/bessyIIinit.py', 'ao')) # pml.loadBPMOffsets('/opt/OPI/MapperApplications/conf/Orbit/SR/RefOrbit.Dat') active_bpmsx = pml.getActiveIdx('BPMx') active_bpmsy = pml.getActiveIdx('BPMy') active_cmsx = pml.getActiveIdx('HCM') active_cmsy = pml.getActiveIdx('VCM') sx = pml.getfamilydata('BPMx', 'Pos') sy = pml.getfamilydata('BPMy', 'Pos') cx = pml.getfamilydata('HCM', 'Pos') cy = pml.getfamilydata('VCM', 'Pos') namesX = pml.getfamilydata('BPMx', 'CommonNames') namesY = pml.getfamilydata('BPMy', 'CommonNames')
# [550,600,650,700] -- C: 550 # [510,520,530,540] -- C: 520 # [515,516,517,518,519,520,521,522,523,524,525] -- C: 524.000000 # possibleC = list(np.arange(523.6,524.6,.01)) -- C: 523.830000 # # 'vec_train_scene_TESTSTUxTESTNSU_Wed02Mar2011_20-42-01rPRC_block_svmData.txt' # [0.01,1,10,100,500,1000] -- C: 1 # [0.1,.2,.3,2,3,4,5,6] -- C: 0.3 # [0.2,.3,.4,.5,.6,.7] -- C: 0.4 # list(np.arange(.3,.5,.01)) -- C: 0.31000 # CV RUNNING trainFile = 'vec_train_word_TESTSTUxTESTNSU_Wed02Mar2011_20-42-01rPRC_block_svmData.txt' #possibleC = [515,516,517,518,519,520,521,522,523,524,525] possibleC = list(np.arange(523.6, 524.6, .01)) trainData = ml.VectorDataSet(trainFile, labelsColumn=1, idColumn=0) # assumes data is csv and znormed startTime = strftime("%a%d%b%Y_%H:%M:%S") s = ml.SVM() param = ml.modelSelection.Param(s, 'C', possibleC) m = ml.modelSelection.ModelSelector(param) m.train(trainData) stopTime = strftime("%a%d%b%Y_%H:%M:%S") print(startTime, stopTime) print(m)
def art_main(cidx, ref_freq, plotopt=True): print('I set cidx to {}'.format(cidx)) plt.close('all') pml = PyML.PyML() pml.setao(pml.loadFromExtern('../../PyML/config/bessyIIinit.py', 'ao')) active_bpmsx = pml.getActiveIdx('BPMx') active_bpmsy = pml.getActiveIdx('BPMy') sx = pml.getfamilydata('BPMx', 'Pos') sy = pml.getfamilydata('BPMy', 'Pos') cx = pml.getfamilydata('HCM', 'Pos')[pml.getActiveIdx('HCM')] cy = pml.getfamilydata('VCM', 'Pos')[pml.getActiveIdx('VCM')] namesX = pml.getfamilydata('BPMx', 'CommonNames') namesY = pml.getfamilydata('BPMx', 'CommonNames') Smat_xx, Smat_yy = sktools.io.load_Smat(SMAT_FILE) Smat_xx = Smat_xx[active_bpmsx, :] Smat_yy = Smat_yy[active_bpmsy, :] phases_mat = scipy.io.loadmat(PHASE_FILE) phaseX = phases_mat['PhaseX'][:, 0] phaseY = phases_mat['PhaseZ'][:, 0] if AXIS == 'y': pos_cor = cy pos = sy[active_bpmsy] Smat = Smat_yy phase = phaseY tune = tuneY names = namesY[active_bpmsy] elif AXIS == 'x': pos_cor = cx pos = sx[active_bpmsx] Smat = Smat_xx phase = phaseX tune = tuneX names = namesX[active_bpmsx] S_inv = sktools.maths.inverse_with_svd(Smat, 32) t = np.arange(Fs * tmax) / Fs corr = np.zeros((Smat.shape[1], t.size)) corr[cidx, :] = np.sin(2 * np.pi * ref_freq * t + np.random.random() * 2 * np.pi) values = Smat.dot(corr) acos, asin = sktools.maths.extract_sin_cos(values, Fs, ref_freq) step_size = 0.1 acos_opt, asin_opt, _ = sktools.maths.optimize_rotation( acos, asin, step_size) phase_kick, coeff = skcore.get_kick(np.array(acos_opt), phase, tune, plotopt, plotopt) kick_idx = np.argmin(abs(phase - phase_kick)) r1 = S_inv.dot(acos_opt) if plotopt: plt.figure('CMs') plt.plot(pos_cor, r1) plt.ylabel('Amplitude of correction') plt.xlabel('Position [in m]') plt.title('Correctors') plt.grid('on') if seaborn: sns.despine() plt.figure('Orbits + kick') plt.plot(pos, acos_opt, '-g') plt.axvline(pos[kick_idx], -2, 2) plt.ylabel('Distance to ref. orbit [in m]') plt.xlabel('Position [in m]') plt.grid('on') if seaborn: sns.despine() if pos[kick_idx] == pos_cor[cidx]: text = names[kick_idx] + ' Good job!' else: val = abs(pos_cor[cidx] - pos[kick_idx]) if val > 240 / 2: val = 240 - val text = 'idx {} = {} found, d={}'.format(kick_idx, names[kick_idx], val) print(text) shouldidx = np.argmin(abs(pos - pos_cor[cidx])) print('It should have been idx {} = {}'.format(shouldidx, names[shouldidx])) if plotopt: for i in plt.get_fignums(): plt.figure(num=i) if seaborn: sns.despine() plt.grid('on') # plt.savefig(str(i)+'.pdf') plt.show() return val
def optC(vecFile=''): """ A function for optimizing C (the soft margin constatn) for PyML datasets and SVMs. Optimization proceeds by first stepping further inside the range of {0.1...1} in increments of 0.1 or outside it, by orders of magnitude. Max possible C values are 0.01 and 1000; Best possible precision is to the first decimal place. PyML is very verbose, so progress in this optmization is recorded in 'optC.log' in the PWD. This log file is appended to across invocations and so may grow without bound. """ import PyML as ml import numpy as np from time import strftime trainData = ml.VectorDataSet(vecFile,labelsColumn=1,idColumn=0) log = open('optC.log','a') bestC = 1 stepSize = .1 ## init w reasonable but in the stop criterion's range possibleC = np.array([.1,1]) ## middle to start; possibleC can span no more ## than 1 power of 10 otherwise this function will ## blowup or take an eternity ## possibleC must be cast as float; int breaks PyML log.write('\n\n\n**Begining new optimization.**\n') log.write('Dataset: {0}\n'.format(vecFile)) log.write('First set of possible C values: {0}.\n'.format(possibleC)) while True: log.flush() ## try all 'possibleC' startTime = strftime("%a%d%b%Y_%H:%M:%S") s = ml.SVM() param = ml.modelSelection.Param(s, 'C', possibleC) m = ml.modelSelection.ModelSelector(param) m.train(trainData) stopTime = strftime("%a%d%b%Y_%H:%M:%S") log.write('Start/stop times last iteration: {0}/{1}\n'.format(startTime,stopTime)) bestC = m.classifier.C ## The stop criterion is the ## level of precision desired. if stepSize < .1: log.write('SUCCESS. C is {0}\n'.format(bestC)) break ## C can not be greater than 1000 ## or less than .01 elif bestC > 1000 or bestC < 0.01: log.write('WARNING: C is out of range. C is {0}\n'.format(bestC)) break else: ## Where was best C for last iteration? ## Use that location to define next set of ## possible C values. indexC = possibleC.tolist().index(bestC) log.write('Best C for last interation: {0}.\n'.format(bestC)) if possibleC[indexC] == possibleC.max(): stepSize = round(possibleC.max()) possibleC = np.arange(stepSize,(stepSize*10),stepSize) log.write('At max range, new values are: {0}.\n'.format(possibleC)) elif indexC == 0: stepSize = possibleC.min()/10 possibleC = np.arange(stepSize,possibleC.min(),stepSize) log.write('At min range, new values are: {0}.\n'.format(possibleC)) else: stepSize = stepSize/10 possibleC = np.arange(possibleC[indexC-1],possibleC[indexC+1],stepSize) log.write('Was in range, next values are: {0}.\n'.format(possibleC)) log.close() return bestC