def generateKernelMatrix(DataSet):
    """Creates a kernel matrix/gram matrix from an input dataset, which is a list of examples"""

    n_samples = len(DataSet)
    kernelMatrix = np.empty([n_samples, n_samples], dtype="string")
    PatternIds = np.empty([n_samples, 1], dtype="string")
    Labels = np.empty([n_samples, 1], dtype="string")
    for i in xrange(n_samples):
        (label, tps, pID, A1) = DataSet[i]
        PatternIds[i, 0] = pID
        Labels[i, 0] = label

    for i in xrange(n_samples):
        for j in xrange(n_samples):
            (label1, tps, pID, A1) = DataSet[i]
            (label2, tps, pID, A2) = DataSet[j]
            kernelMatrix[i, j] = str(ger.GERPKernel(A1, A2))

    kernelFileMatrix = np.concatenate(PatternIds, kernelMatrix)
    labelMatrix = np.concatenate(PatternIds, Labels)

    np.savetxt("labelText.txt", labelMatrix, delimiter=',')
    np.savetxt("kernelText.txt", kernelFileMatrix, delimiter=',')

    labels = ml.Labels("labelText.txt")
    kdata = ml.kernelData("kernelText.txt")
    kdata.attachLabels(labels)

    return kdata
Example #2
0
def prepare_train_set(mgrs):
    assert (len(mgrs) > 0)
    l = PyML.Labels([i.get_param('epoch_name') for i in mgrs])
    train_data = zeros((len(mgrs), len(mgrs[0].get_samples()[0])))
    for i, mgr in enumerate(mgrs):
        # Only the first channel is taken to consideration
        train_data[i, :] = mgr.get_samples()[0]

    train_set = PyML.VectorDataSet(train_data, L=l)
    return train_set
Example #3
0
def vecSplit(vecName='',fracTrain=0.3):
	"""
	Splits a vector/csv fotmatted SVMLIB file into training and test sets 
	by row according to numTrain and numTest and writes out the resulting 
	files. Existing files are overwritten.
	q	- numTest + numTrain should equal the number of lines in vecName 
		minus the one line header.
		- Returns 'Done.'

	[08/16/2011]: a major change - instead of taking numTest and numTrain 
	directly fracTrain (the fraction of trials destined for taining) 
	was added as a invocation arg. numTrain/numTest are now discovered.
	
	An unmodified (commented out) version of the old function was left in 
	the source.
	"""
	
	## Calc numbers of features for 
	## training and testing data
	vecData = ml.VectorDataSet(vecName,labelsColumn=1,idColumn=0)
	numTrain = int(vecData.numFeatures * fracTrain)
	numTest = vecData.numFeatures - numTrain

	## Create filenames of train and test data taht will
	## be written soon...
  	## Remove 'vec' from vecName so a more informative,
	## less redundant, names can be created.
	tmpName = str.split(vecName,'vec')
	trainName = open('vec_train_{0}{1}'.format(numTrain,tmpName[-1]), 'w')
	testName = open('vec_test_{0}{1}'.format(numTest,tmpName[-1]), 'w')  

	## Randomly select features for either  
	## training or testing.
	sampler = np.asarray([1] * numTrain + [2] * numTest)
	np.random.shuffle(sampler)  

	## Create indices from 'sampler'
	featureIndex = np.arange(len(sampler))
	invertTrainIndex = featureIndex[sampler == 2]
	invertTestIndex = featureIndex[sampler == 1]

	print('trainIndex: {0}'.format(invertTrainIndex))
	print('testIndex: {0}'.format(invertTestIndex))
    
	## Use trainIndex or testIndex to eliminate features,
	## deepcopy the vecData first; eliminateFeatures()
	## operates in place.
	trainData = ml.VectorDataSet(vecData)
	trainData.eliminateFeatures(invertTrainIndex.tolist())
	trainData.save(trainName)

	testData = ml.VectorDataSet(vecData)
	testData.eliminateFeatures(invertTestIndex.tolist())
	testData.save(testName)
Example #4
0
 def on_train_clicked(self, widget):
     # FOR NOW, only SVM is supported
     if self.sClassifier == "SVM":
         filen = self.getFilenameToRead("Open training data", filter='mat')
         if filen is not None:
             data = ml.VectorDataSet(filen, labelsColumn=0)
             self.clssfr = ml.SVM()
             self.clssfr.train(data)
             # train finished. need to update button status
             self.setDisabledBtns()
             self.showMessage("Training SVM is done.")
     else:
         self.showMessage("The classifier is not supported yet!")
Example #5
0
def generateKernelMatrix(DataSet):
    """Creates a kernel matrix/gram matrix from an input dataset, which is a list of examples"""
    
    n_samples = len(DataSet)
    kernelMatrix = np.empty([n_samples,n_samples], dtype = "string")
    PatternIds = np.empty([n_samples,1],dtype = "string")
    Labels = np.empty([n_samples,1],dtype = "string") 
    for i in xrange(n_samples):
        (label, tps, pID, A1) = DataSet[i]
        PatternIds[i,0] =  pID
        Labels[i,0] = label
    
    for i in xrange(n_samples):
        for j in xrange(n_samples):
            (label1, tps, pID, A1) = DataSet[i]
            (label2, tps, pID, A2) = DataSet[j]
            kernelMatrix[i,j] = str(ger.GERPKernel(A1,A2))
    
    kernelFileMatrix = np.concatenate(PatternIds, kernelMatrix)
    labelMatrix = np.concatenate(PatternIds, Labels)
    
    np.savetxt("labelText.txt", labelMatrix, delimiter = ',')
    np.savetxt("kernelText.txt", kernelFileMatrix,delimiter=',')
    
    labels = ml.Labels("labelText.txt")
    kdata = ml.kernelData("kernelText.txt")
    kdata.attachLabels(labels)
    
    return kdata
Example #6
0
    def on_detect_clicked(self, widget):
        if self.clssfr is not None:
            filen = self.getFilenameToRead("Open hurricane data",
                                           filter='mdat')
            if filen is not None:
                fname = os.path.basename(filen)
                key, ext = os.path.splitext(fname)
                if ext == '.dat':
                    key = key[1:]  # take 'g' out

                    #testData = gdtool.createMat(self.datapath, self.trackpath, key, key)
                    #result = self.clssfr.test(ml.VectorDataSet(testData,labelsColumn=0))
                    tmpfn = 'f__tmpDetected__'
                    if os.path.exists(tmpfn): os.unlink(tmpfn)
                    # for DEMO, undersampled the normal data -- without undersampling there are too many candidates
                    gdtool.createMat(self.datapath,
                                     self.trackpath,
                                     key,
                                     key,
                                     store=tmpfn,
                                     undersample=True,
                                     genkeyf=True)
                    bneedDel = True
                else:
                    tmpfn = fname
                    bneedDel = False
                result = self.clssfr.test(
                    ml.VectorDataSet(tmpfn, labelsColumn=0))

                gdkeyfilen = ''.join([tmpfn, '.keys'])
                with open(gdkeyfilen, 'r') as f:
                    gridkeys = pickle.load(f)
                    self.stormlocs = pickle.load(f)
                predicted = result.getPredictedLabels()
                predicted = np.array(map(float, predicted))
                self.detected = np.array(gridkeys)[predicted == 1]
                if bneedDel:
                    os.unlink(tmpfn)
                    os.unlink(gdkeyfilen)

                snstroms = str(self.stormlocs.shape[0])
                sndetected = str(self.detected.shape[0])
                self.chkHurricane.set_label(snstroms + " Hurricanes")
                self.chkDetected.set_label(sndetected + " Detected")

                self.showMessage(''.join([
                    sndetected, "/", snstroms,
                    " grids are predicted to have hurricane."
                ]))
                if False:
                    with open('demo.detected', 'w') as f:
                        pickle.dump(self.detected, f)
                        pickle.dump(self.stormlocs, f)

                # test data tested. update buttons
                self.setDisabledBtns()
                self.redraw()
        else:
            self.showMessage("There is no trained classifier!")
Example #7
0
def runSVM(trainF, testF):
    """
	[6/27/2011]: This script serves little purpose as it takes to long
	to interate over 0-10,000 to optimize C automatically.
	It is more effiecinetly done by hand (for now at least).

	Go!
	"""
    import PyML as ml
    import numpy as np
    znorm = ml.Standardizer()

    # Init the SVM
    s = SVM()
    print(s)

    # Reformat the data to csv (aka Vector) so feature based
    # normalization can occur, then normalize that train and
    # test data
    test = ml.SparseDataSet(testF)
    vectestF = 'vec_' + testF
    test.save(vectestF, format='csv')
    vecTest = ml.VectorDataSet(vectestF, labelsColumn=1, idColumn=0)
    znorm.train(vecTest)

    trainedSVM = []
    # Returns a SVM trained class
    if isinstance(trainF, str):
        train = ml.SparseDataSet(trainF)
        vectrainF = 'vec_' + trainF
        train.save(vectrainF, format='csv')
        vecTrain = ml.VectorDataSet(vectrainF, labelsColumn=1, idColumn=0)
        znorm.train(vecTrain)

        # Optimize C
        param = ml.modelSelection.Param(s, 'C', list(np.arange(0, 10000, .5)))
        m = ml.modelSelection.ModelSelector(param)
        trainedSVM = m.train(vecTrain)  # Optimize C
        trainedSVM.save('svm_' + testF)
    else:
        trainedSVM = trainF

    cross = trainedSVM.stratifiedCV(trainedSVM, 10)
    results = trainedSVM(vecTest)

    return results
Example #8
0
def generateKernelMatrix(TestData,
                         TrainingData,
                         sigma=10,
                         lam=math.pow(10, -3),
                         nu=0.5,
                         kernel="twed"):
    """Creates a kernel matrix/gram matrix from an input dataset, which is a list of examples"""

    n_samplesTest = len(TestData)
    n_samplesTrain = len(TrainingData)
    kernelMatrix = np.empty([n_samplesTest, n_samplesTrain], dtype="float")
    PatternIds = np.empty([n_samplesTest, 1], dtype=object)
    Labels = np.empty([n_samplesTest, 1], dtype=object)
    for i in xrange(n_samplesTest):
        (label, tps, pID, A1) = TestData[i]
        PatternIds[i, 0] = pID
        Labels[i, 0] = label

    for i in xrange(n_samplesTest):
        for j in xrange(n_samplesTrain):
            (label1, tps, pID, A1) = TestData[i]
            (label2, tps, pID, A2) = TrainingData[j]
            if (kernel == "gerp"):
                kernelMatrix[i, j] = str(ger.GERPKernel(A1, A2, sigma))
            elif (kernel == "twed"):
                kernelMatrix[i, j] = str(
                    twed.TwedKernel(TestData[i], TrainingData[j], lam, nu,
                                    sigma))

    kernelFileMatrix = np.concatenate((PatternIds, kernelMatrix), axis=1)
    labelMatrix = np.concatenate((PatternIds, Labels), axis=1)

    np.savetxt("labelText.txt", labelMatrix, fmt='%s', delimiter=',')
    np.savetxt("kernelText.txt", kernelFileMatrix, fmt="%s", delimiter=',')

    f1 = "labelText.txt"
    f2 = "kernelText.txt"

    labels = ml.Labels(f1)

    kdata = ml.KernelData(f2)
    kdata.attachLabels(labels)
    return kdata
Example #9
0
def createVectorDataSet(A):
    """Converts a traing example list to a vector dataset"""

    labels = []
    patterns = []
    X = []
    for (label, tps, pID, B) in A:
        labels.append(label)
        patterns.append(pID)
        X.append(B)
    data = ml.VectorDataSet(X, L=labels, patternID=patterns)
    return data
Example #10
0
def zSparse(fname):
	"""
	Converts a sparse formated SVMLIB data file to Vector/CSV format
	and then znomralizes on a feature basis and writes out that file
	as fname.
	"""

	znorm = ml.Standardizer()

	sparse = ml.SparseDataSet(fname)
	sparse.save('temp',format='csv')
	
	vec = ml.VectorDataSet('temp',labelsColumn=1,idColumn=0)
	znorm.train(vec)

	vecName = 'vec_' + fname
	
	# Verbal overwrite of priors
	if os.path.exists(vecName):
		print('Overwriting {0}.'.format(vecName))
		os.remove(vecName)

	vec.save(vecName)
Example #11
0
    def test_normalize(self):
        if not self.TEST_NORMALIZE:
            return
        mgr = get_fake_manager(3, 10)
        e = ch.Normalize(norm=2)
        new_mgr = e.process([mgr])[0]

        import PyML
        data = PyML.VectorDataSet(mgr.get_samples())
        data.normalize(2)
        for i in range(3):
            for j in range(10):
                self.assertAlmostEqual(new_mgr.get_samples()[i, j],
                                 data.getMatrix()[i, j])

        LOGGER.info("Normalize tested!")
Example #12
0
def _p300_verify_svm_one_fold(t_train_mgrs, nt_train_mgrs, t_test_mgrs,
                              nt_test_mgrs, non_target_per_target, C, Cmode,
                              kernel):

    assert (len(t_test_mgrs) * non_target_per_target == len(nt_test_mgrs))
    s = PyML.svm.SVM(C=C, Cmode=Cmode, arg=kernel)

    # Train classifier on a train set...
    train_data = t_train_mgrs + nt_train_mgrs
    train_vect = zeros((len(train_data), len(train_data[0].get_samples()[0])))
    train_labels_vect = []
    for i, mgr in enumerate(train_data):
        train_labels_vect.append(mgr.get_param('epoch_name'))
        train_vect[i, :] = mgr.get_samples()[0]
    s.train(PyML.VectorDataSet(train_vect, L=PyML.Labels(train_labels_vect)))

    # test classifier on a test set
    # grab two elements of target test set and 2*non_target_per_target elements
    # of non-target test set ....
    succ = 0
    fail = 0
    i = 0
    while i + 1 < len(t_test_mgrs):
        t1 = t_test_mgrs[i]
        ns1 = nt_test_mgrs[i * non_target_per_target:(i + 1) *
                           non_target_per_target]
        whatever, t1_value = s.classify(PyML.VectorDataSet(t1.get_samples()),
                                        0)
        ns1_value = max([
            s.classify(PyML.VectorDataSet(n1.get_samples()), 0)[1]
            for n1 in ns1
        ])

        t2 = t_test_mgrs[i + 1]
        ns2 = nt_test_mgrs[(i + 1) * non_target_per_target:(i + 2) *
                           non_target_per_target]
        whatever, t2_value = s.classify(PyML.VectorDataSet(t2.get_samples()),
                                        0)
        ns2_value = max([
            s.classify(PyML.VectorDataSet(n2.get_samples()), 0)[1]
            for n2 in ns2
        ])

        # Check if the decision was good ...
        if t1_value > ns1_value and t2_value > ns2_value:
            succ += 1
        else:
            fail += 1
        i += 2
    return succ, fail
Example #13
0
    def on_load_clicked(self, widget):
        filen = self.getFilenameToRead("Load Classifier", filter='svm')
        if filen is not None:
            #db = shelve.open(filen)
            #if db.has_key('clssfr'):
            #    self.clssfr = db['clssfr']
            #else:
            #    self.showMessage("Cannot find a classifier!")
            #db.close()
            #with open(filen, 'wb') as f:
            #    self.clssfr = pickle.load(f)

            datfn = self.getFilenameToRead("Open Training Data", filter='mat')
            if datfn is not None:
                data = ml.VectorDataSet(datfn, labelsColumn=0)
                self.clssfr = loadSVM(filen,
                                      data)  ## Why do I need to feed data ???

            #self.clssfr = loadSVM(filen,None) ## edited PyML for this

            # classifier has been loaded. need to update button status
            self.setDisabledBtns()
            self.showMessage("The classifier has been loaded!")
def make_sfr_radius_mass_plots(msF,merF,rfiter=3):

    for sk,fk in zip(snap_keys,fil_keys):

        parameters, pcd, pc, pcd = make_pc_dict(msF,sk,fk)
        pc1 = pc.X[:,0].flatten()
        pc2 = pc.X[:,1].flatten()
        pc3 = pc.X[:,2].flatten()
        pc4 = pc.X[:,3].flatten()
        pc5 = pc.X[:,4].flatten()
        pc6 = pc.X[:,5].flatten()
        pc7 = pc.X[:,6].flatten()
        PCs=pandas.DataFrame(pc.X)
        
        asym = get_all_morph_val(msF,sk,fk,'ASYM')
        gini = get_all_morph_val(msF,sk,fk,'GINI')
        m20 = get_all_morph_val(msF,sk,fk,'M20')
        cc = get_all_morph_val(msF,sk,fk,'CC')
        Mstat = get_all_morph_val(msF,sk,fk,'MID1_MPRIME')
        Istat = get_all_morph_val(msF,sk,fk,'MID1_ISTAT')
        Dstat = get_all_morph_val(msF,sk,fk,'MID1_DSTAT')

        sfid = get_all_snap_val(msF,sk,'SubfindID')
        
        S_GM20 = SGM20(gini,m20)
        F_GM20 = FGM20(gini,m20)
        
        
        latest_NumMajorMergersLastGyr = get_mergerinfo_val(merF,sk,'latest_NumMajorMergersLastGyr')
        boolean_merger1 = latest_NumMajorMergersLastGyr >= 1.0

        this_NumMajorMergersLastGyr = get_mergerinfo_val(merF,sk,'this_NumMajorMergersLastGyr')
        boolean_merger2 = this_NumMajorMergersLastGyr >= 1.0
        

        mhalo = get_all_snap_val(msF,sk,'Mhalo_Msun')
        mstar = get_all_snap_val(msF,sk,'Mstar_Msun')
        log_mstar_mhalo = np.log10( mstar/mhalo )

        redshift = msF['nonparmorphs'][sk][fk]['CAMERA0']['REDSHIFT'].value[0]
        
        #set up RF data frame above, run or save input/output for each loop iteration

        rf_dict = {}
        PARAMS_MOD=True
        PARAMS_ONLY=False
        PCS_ONLY=False
        RUN_RF=True
        RF_ITER=rfiter
        rf_masscut = 10.0**(10.5)

        
        if PCS_ONLY is True:
            gi = np.where(np.isfinite(pc1)*np.isfinite(pc2)*np.isfinite(pc3)*np.isfinite(pc4)*np.isfinite(pc5)*np.isfinite(pc6)*np.isfinite(pc7)*(mstar >= rf_masscut) != 0)[0]
            print(gi.shape, pc1.shape)
            rf_dict['pc1']=pc1[gi]
            rf_dict['pc2']=pc2[gi]
            rf_dict['pc3']=pc3[gi]
            rf_dict['pc4']=pc4[gi]
            rf_dict['pc5']=pc5[gi]
            rf_dict['pc6']=pc6[gi]
            rf_dict['pc7']=pc7[gi]
            rf_dict['mergerFlag']=boolean_merger1[gi]
            rf_dict['SubfindID']=sfid[gi]

            cols=['pc1','pc2','pc3','pc4','pc5','pc6','pc7']
            rflabel='pcs'
            
        if PARAMS_ONLY is True:
            gi = np.where(np.isfinite(gini)*np.isfinite(m20)*np.isfinite(asym)*np.isfinite(Mstat)*np.isfinite(Istat)*np.isfinite(Dstat)*np.isfinite(cc)*(mstar >= rf_masscut) != 0)[0]
            print(gi.shape, pc1.shape)
            rf_dict['gini']=gini[gi]
            rf_dict['m20']=m20[gi]
            rf_dict['asym']=asym[gi]
            rf_dict['Mstat']=Mstat[gi]
            rf_dict['Istat']=Istat[gi]
            rf_dict['Dstat']=Dstat[gi]
            rf_dict['cc']=cc[gi]
            rf_dict['mergerFlag']=boolean_merger1[gi]
            rf_dict['SubfindID']=sfid[gi]

            cols=['gini','m20','asym','Mstat','Istat','Dstat','cc']
            rflabel='params'
            
        if PARAMS_MOD is True:
            gi = np.where(np.isfinite(S_GM20)*np.isfinite(F_GM20)*np.isfinite(asym)*np.isfinite(Mstat)*np.isfinite(Istat)*np.isfinite(Dstat)*np.isfinite(cc)*(mstar >= rf_masscut) != 0)[0]
            print(gi.shape, pc1.shape)
            rf_dict['dGM20']=S_GM20[gi]
            rf_dict['fGM20']=F_GM20[gi]
            rf_dict['asym']=asym[gi]
            rf_dict['Mstat']=Mstat[gi]
            rf_dict['Istat']=Istat[gi]
            rf_dict['Dstat']=Dstat[gi]
            rf_dict['cc']=cc[gi]
            rf_dict['mergerFlag']=boolean_merger1[gi]
            rf_dict['SubfindID']=sfid[gi]

            cols=['dGM20','fGM20','asym','Mstat','Istat','Dstat','cc']
            rflabel='paramsmod'

            
        if RUN_RF is True:
            if redshift < 4.2:
            
                df=pandas.DataFrame(rf_dict)
            
                print("Running Random Forest... ", sk, fk)
                result, labels, label_probability = PyML.randomForestMC(df,iterations=RF_ITER,cols=cols)
                #result = summary statistics, feature importances (N iterations x N statistics/importances)
                #labels = labels following random forest (N galaxies x N iterations)
                #label_probability = probability of label following random forest (N galaxies x N iterations)

                #saves the output as a file
                if not os.path.lexists('rfoutput'):
                    os.mkdir('rfoutput')


                labels['mergerFlag']=df['mergerFlag']
                label_probability['mergerFlag']=df['mergerFlag']
                labels['SubfindID']=df['SubfindID']
                label_probability['SubfindID']=df['SubfindID']

                
                df.to_pickle('rfoutput/'+rflabel+'_data_cut_{}_{}.pkl'.format(sk,fk))
                result.to_pickle('rfoutput/'+rflabel+'_result_cut_{}_{}.pkl'.format(sk,fk))
                labels.to_pickle('rfoutput/'+rflabel+'_labels_cut_{}_{}.pkl'.format(sk,fk))
                label_probability.to_pickle('rfoutput/'+rflabel+'_label_probability_cut_{}_{}.pkl'.format(sk,fk))
                PCs.to_pickle('rfoutput/'+rflabel+'_pc_cut_{}_{}.pkl'.format(sk,fk))


        
        bins=18

        xlim=[9.7,12.2]
        ylim=[-2.0,3.0]
        rlim=[0.1,1.7]
        
        
        plot_filen = 'pc1/sfr_radius_mass_'+sk+'_'+fk+'_pc1.pdf'
        if not os.path.lexists('pc1'):
            os.mkdir('pc1')
        
        f1 = pyplot.figure(figsize=(3.5,5.0), dpi=300)
        pyplot.subplots_adjust(left=0.15, right=0.98, bottom=0.08, top=0.88,wspace=0.0,hspace=0.0)
        colorobj = plot_sfr_radius_mass(msF,merF,sk,fk,f1,xlim=xlim,ylim=ylim,rlim=rlim,Cval=pc1,vmin=-2,vmax=3,bins=bins)
        gth.make_colorbar(colorobj,title='PC1 morphology',ticks=[-2,-1,0,1,2,3])

        f1.savefig(plot_filen,dpi=300)
        pyplot.close(f1)


        
        plot_filen = 'pc3/sfr_radius_mass_'+sk+'_'+fk+'_pc3.pdf'
        if not os.path.lexists('pc3'):
            os.mkdir('pc3')
        
        f1 = pyplot.figure(figsize=(3.5,5.0), dpi=300)
        pyplot.subplots_adjust(left=0.15, right=0.98, bottom=0.08, top=0.88,wspace=0.0,hspace=0.0)
        colorobj = plot_sfr_radius_mass(msF,merF,sk,fk,f1,xlim=xlim,ylim=ylim,rlim=rlim,Cval=pc3,vmin=-1,vmax=3,bins=bins)
        gth.make_colorbar(colorobj,title='PC3 morphology',ticks=[-1,0,1,2,3])

        f1.savefig(plot_filen,dpi=300)
        pyplot.close(f1)
    

        
        plot_filen = 'asym/sfr_radius_mass_'+sk+'_'+fk+'_asym.pdf'
        if not os.path.lexists('asym'):
            os.mkdir('asym')
        
        f1 = pyplot.figure(figsize=(3.5,5.0), dpi=300)
        pyplot.subplots_adjust(left=0.15, right=0.98, bottom=0.08, top=0.88,wspace=0.0,hspace=0.0)
        colorobj = plot_sfr_radius_mass(msF,merF,sk,fk,f1,xlim=xlim,ylim=ylim,rlim=rlim,Cval=asym,vmin=0.0,vmax=0.4,bins=bins)
        gth.make_colorbar(colorobj,title='Asymmetry',ticks=[0.0,0.20,0.40])

        f1.savefig(plot_filen,dpi=300)
        pyplot.close(f1)


        
        plot_filen = 'merger1/sfr_radius_mass_'+sk+'_'+fk+'_merger1.pdf'
        if not os.path.lexists('merger1'):
            os.mkdir('merger1')
        
        f1 = pyplot.figure(figsize=(3.5,5.0), dpi=300)
        pyplot.subplots_adjust(left=0.15, right=0.98, bottom=0.08, top=0.88,wspace=0.0,hspace=0.0)
        colorobj = plot_sfr_radius_mass(msF,merF,sk,fk,f1,xlim=xlim,ylim=ylim,rlim=rlim,Cval=boolean_merger1,min_bin=3,gridf='fraction_grid',vmin=0.0,vmax=0.5,bins=bins)
        gth.make_colorbar(colorobj,title='fraction major merger',ticks=[0.0,0.25,0.50],format='%.2f')

        f1.savefig(plot_filen,dpi=300)
        pyplot.close(f1)

        
        plot_filen = 'merger3/sfr_radius_mass_'+sk+'_'+fk+'_merger3.pdf'
        if not os.path.lexists('merger3'):
            os.mkdir('merger3')
        
        f1 = pyplot.figure(figsize=(3.5,5.0), dpi=300)
        pyplot.subplots_adjust(left=0.15, right=0.98, bottom=0.08, top=0.88,wspace=0.0,hspace=0.0)
        colorobj = plot_sfr_radius_mass(msF,merF,sk,fk,f1,xlim=xlim,ylim=ylim,rlim=rlim,Cval=boolean_merger1,min_bin=3,gridf='normed_proportion_grid',vmin=0.0,vmax=1.0,bins=bins)
        gth.make_colorbar(colorobj,title='proportion of major mergers',ticks=[0.0,0.5,1.0],format='%.2f')

        f1.savefig(plot_filen,dpi=300)
        pyplot.close(f1)



        plot_filen = 'mstar_mhalo/sfr_radius_mass_'+sk+'_'+fk+'_mstar_mhalo.pdf'
        if not os.path.lexists('mstar_mhalo'):
            os.mkdir('mstar_mhalo')
        
        f1 = pyplot.figure(figsize=(3.5,5.0), dpi=300)
        pyplot.subplots_adjust(left=0.15, right=0.98, bottom=0.08, top=0.88,wspace=0.0,hspace=0.0)
        colorobj = plot_sfr_radius_mass(msF,merF,sk,fk,f1,xlim=xlim,ylim=ylim,rlim=rlim,Cval=log_mstar_mhalo,min_bin=3,gridf='median_grid',vmin=-2.0,vmax=-0.5,bins=bins)
        gth.make_colorbar(colorobj,title='median $log_{10} M_*/M_{h}$',ticks=[-2,-1.5,-1,-0.5])

        f1.savefig(plot_filen,dpi=300)
        pyplot.close(f1)




        plot_filen = 'mhalo/sfr_radius_mass_'+sk+'_'+fk+'_mhalo.pdf'
        if not os.path.lexists('mhalo'):
            os.mkdir('mhalo')
        
        f1 = pyplot.figure(figsize=(3.5,5.0), dpi=300)
        pyplot.subplots_adjust(left=0.15, right=0.98, bottom=0.08, top=0.88,wspace=0.0,hspace=0.0)
        colorobj = plot_sfr_radius_mass(msF,merF,sk,fk,f1,xlim=xlim,ylim=ylim,rlim=rlim,Cval=np.log10(mhalo),min_bin=3,gridf='median_grid',vmin=11.5,vmax=14.0,bins=bins)
        gth.make_colorbar(colorobj,title='median $log_{10} M_{h}$',ticks=[11.5,12.0,13.0,14.0])

        f1.savefig(plot_filen,dpi=300)
        pyplot.close(f1)

        
        
    return locals()
Example #15
0
#AXIS = 'x'
AXIS = 'y'

# Hardcoded constants
tuneX = 17.8509864542659
tuneY = 6.74232980750181
ref_freq = 10#9.979248046875
fs = 150 #Hz

if __name__ == '__main__':
    if len(sys.argv) > 1:
        ref_freq = float(sys.argv[1])

    plt.close('all')
    pml = PyML.PyML()
    pml.setao(pml.loadFromExtern('../external/bessyIIinit.py', 'ao'))
#    pml.loadBPMOffsets('/opt/OPI/MapperApplications/conf/Orbit/SR/RefOrbit.Dat')

    active_bpmsx = pml.getActiveIdx('BPMx')
    active_bpmsy = pml.getActiveIdx('BPMy')
    active_cmsx = pml.getActiveIdx('HCM')
    active_cmsy = pml.getActiveIdx('VCM')
    sx = pml.getfamilydata('BPMx', 'Pos')
    sy = pml.getfamilydata('BPMy', 'Pos')
    cx = pml.getfamilydata('HCM', 'Pos')
    cy = pml.getfamilydata('VCM', 'Pos')

    namesX = pml.getfamilydata('BPMx', 'CommonNames')
    namesY = pml.getfamilydata('BPMy', 'CommonNames')
# [550,600,650,700] -- C: 550
# [510,520,530,540] -- C: 520
# [515,516,517,518,519,520,521,522,523,524,525] -- C: 524.000000
# possibleC =  list(np.arange(523.6,524.6,.01)) -- C: 523.830000
#

# 'vec_train_scene_TESTSTUxTESTNSU_Wed02Mar2011_20-42-01rPRC_block_svmData.txt'
# [0.01,1,10,100,500,1000] -- C: 1
# [0.1,.2,.3,2,3,4,5,6] -- C: 0.3
# [0.2,.3,.4,.5,.6,.7] -- C: 0.4
# list(np.arange(.3,.5,.01)) -- C: 0.31000 # CV RUNNING

trainFile = 'vec_train_word_TESTSTUxTESTNSU_Wed02Mar2011_20-42-01rPRC_block_svmData.txt'

#possibleC = [515,516,517,518,519,520,521,522,523,524,525]
possibleC = list(np.arange(523.6, 524.6, .01))
trainData = ml.VectorDataSet(trainFile, labelsColumn=1, idColumn=0)
# assumes data is csv and znormed

startTime = strftime("%a%d%b%Y_%H:%M:%S")

s = ml.SVM()
param = ml.modelSelection.Param(s, 'C', possibleC)
m = ml.modelSelection.ModelSelector(param)
m.train(trainData)

stopTime = strftime("%a%d%b%Y_%H:%M:%S")

print(startTime, stopTime)
print(m)
Example #17
0
def art_main(cidx, ref_freq, plotopt=True):
    print('I set cidx to {}'.format(cidx))
    plt.close('all')
    pml = PyML.PyML()
    pml.setao(pml.loadFromExtern('../../PyML/config/bessyIIinit.py', 'ao'))

    active_bpmsx = pml.getActiveIdx('BPMx')
    active_bpmsy = pml.getActiveIdx('BPMy')

    sx = pml.getfamilydata('BPMx', 'Pos')
    sy = pml.getfamilydata('BPMy', 'Pos')

    cx = pml.getfamilydata('HCM', 'Pos')[pml.getActiveIdx('HCM')]
    cy = pml.getfamilydata('VCM', 'Pos')[pml.getActiveIdx('VCM')]

    namesX = pml.getfamilydata('BPMx', 'CommonNames')
    namesY = pml.getfamilydata('BPMx', 'CommonNames')

    Smat_xx, Smat_yy = sktools.io.load_Smat(SMAT_FILE)
    Smat_xx = Smat_xx[active_bpmsx, :]
    Smat_yy = Smat_yy[active_bpmsy, :]

    phases_mat = scipy.io.loadmat(PHASE_FILE)
    phaseX = phases_mat['PhaseX'][:, 0]
    phaseY = phases_mat['PhaseZ'][:, 0]

    if AXIS == 'y':
        pos_cor = cy
        pos = sy[active_bpmsy]
        Smat = Smat_yy
        phase = phaseY
        tune = tuneY
        names = namesY[active_bpmsy]
    elif AXIS == 'x':
        pos_cor = cx
        pos = sx[active_bpmsx]
        Smat = Smat_xx
        phase = phaseX
        tune = tuneX
        names = namesX[active_bpmsx]

    S_inv = sktools.maths.inverse_with_svd(Smat, 32)

    t = np.arange(Fs * tmax) / Fs
    corr = np.zeros((Smat.shape[1], t.size))
    corr[cidx, :] = np.sin(2 * np.pi * ref_freq * t +
                           np.random.random() * 2 * np.pi)
    values = Smat.dot(corr)

    acos, asin = sktools.maths.extract_sin_cos(values, Fs, ref_freq)

    step_size = 0.1
    acos_opt, asin_opt, _ = sktools.maths.optimize_rotation(
        acos, asin, step_size)
    phase_kick, coeff = skcore.get_kick(np.array(acos_opt), phase, tune,
                                        plotopt, plotopt)
    kick_idx = np.argmin(abs(phase - phase_kick))
    r1 = S_inv.dot(acos_opt)

    if plotopt:
        plt.figure('CMs')
        plt.plot(pos_cor, r1)
        plt.ylabel('Amplitude of correction')
        plt.xlabel('Position [in m]')
        plt.title('Correctors')
        plt.grid('on')
        if seaborn:
            sns.despine()

        plt.figure('Orbits + kick')
        plt.plot(pos, acos_opt, '-g')
        plt.axvline(pos[kick_idx], -2, 2)
        plt.ylabel('Distance to ref. orbit [in m]')
        plt.xlabel('Position [in m]')
        plt.grid('on')
        if seaborn:
            sns.despine()

    if pos[kick_idx] == pos_cor[cidx]:
        text = names[kick_idx] + ' Good job!'
    else:
        val = abs(pos_cor[cidx] - pos[kick_idx])
        if val > 240 / 2:
            val = 240 - val
        text = 'idx {} = {} found, d={}'.format(kick_idx, names[kick_idx], val)
    print(text)
    shouldidx = np.argmin(abs(pos - pos_cor[cidx]))
    print('It should have been idx {} = {}'.format(shouldidx,
                                                   names[shouldidx]))
    if plotopt:
        for i in plt.get_fignums():
            plt.figure(num=i)
            if seaborn:
                sns.despine()
            plt.grid('on')


#            plt.savefig(str(i)+'.pdf')
        plt.show()
    return val
Example #18
0
def optC(vecFile=''):
	"""
	A function for optimizing C (the soft margin constatn) 
	for PyML datasets and SVMs.  Optimization proceeds by
	first stepping further inside the range of
	{0.1...1} in increments of 0.1 or outside it, by 
	orders of magnitude.
	
	Max possible C values are 0.01 and 1000; Best possible 
	precision is to the first decimal place.

	PyML is very verbose, so progress in this optmization 
	is recorded in 'optC.log' in the PWD. This log file is 
	appended to across invocations and so may grow without bound.
	"""
	import PyML as ml
	import numpy as np
	from time import strftime
	
	trainData = ml.VectorDataSet(vecFile,labelsColumn=1,idColumn=0)
	log = open('optC.log','a')

	bestC = 1
	stepSize = .1 
		## init w reasonable but in the stop criterion's range

	possibleC = np.array([.1,1])
		## middle to start; possibleC can span no more 
		## than 1 power of 10 otherwise this function will
		## blowup or take an eternity

		## possibleC must be cast as float; int breaks PyML
	log.write('\n\n\n**Begining new optimization.**\n')
	log.write('Dataset: {0}\n'.format(vecFile))
	log.write('First set of possible C values: {0}.\n'.format(possibleC))

	while True:
		log.flush()

		## try all 'possibleC'
		startTime = strftime("%a%d%b%Y_%H:%M:%S")
		s = ml.SVM()
		param = ml.modelSelection.Param(s, 'C', possibleC)
		m = ml.modelSelection.ModelSelector(param)
		m.train(trainData)
		stopTime = strftime("%a%d%b%Y_%H:%M:%S")
		
		log.write('Start/stop times last iteration: 
				{0}/{1}\n'.format(startTime,stopTime))
		
		bestC = m.classifier.C

		## The stop criterion is the 
		## level of precision desired.
		if stepSize < .1:
			log.write('SUCCESS. C is {0}\n'.format(bestC))
			break
		## C can not be greater than 1000
		## or less than .01
		elif bestC > 1000 or bestC < 0.01:
			log.write('WARNING: C is out of range. C is {0}\n'.format(bestC))
			break
		else:
			## Where was best C for last iteration?
			## Use that location to define next set of 
			## possible C values.
			indexC = possibleC.tolist().index(bestC)
			log.write('Best C for last interation: {0}.\n'.format(bestC))
			
			if possibleC[indexC] == possibleC.max():
				stepSize = round(possibleC.max())
				possibleC = np.arange(stepSize,(stepSize*10),stepSize)
				log.write('At max range, new values are: {0}.\n'.format(possibleC))
			elif indexC == 0:
				stepSize = possibleC.min()/10
				possibleC = np.arange(stepSize,possibleC.min(),stepSize)	
				log.write('At min range, new values are: {0}.\n'.format(possibleC))
			else:
				stepSize = stepSize/10
				possibleC = np.arange(possibleC[indexC-1],possibleC[indexC+1],stepSize)
				log.write('Was in range, next values are: {0}.\n'.format(possibleC))
	log.close()
	return bestC