def lda_copycat(confused_words_list, sample_size, ftype, data_dir, mlf_file, hlist_path=None):
    label_dict = {}
    df = pd.DataFrame(columns=['label', 'data'])
    boundaries = mlf_to_dict(mlf_file)

    # Iterate over confused words
    for label, word in enumerate(confused_words_list):
        label_dict[label + 1] = word

        # Sample from all phrases containing confused words
        filenames = os.listdir(data_dir)
        word_filenames = np.array([name for name in filenames if '_{}_'.format(word) in name])
        word_filenames_sample = np.random.choice(word_filenames, size=sample_size)

        # Iterate over sampled files for specific word
        count = 0
        for fname in word_filenames_sample:
            try:
                phrase = fname.replace('.htk', '')
                phrase_data = read_ark_file(fname) if ftype == 'ark' else read_htk_file(hlist_path, os.path.join(data_dir, fname))
                mult = float(list(boundaries[phrase].items())[-1][1][0][2]) / len(phrase_data)
                word_data = phrase_data[int(boundaries[phrase][word][0][1]/mult) : int(boundaries[phrase][word][0][2]/mult)]
                for frame in word_data:
                    df = df.append({'label': label + 1, 'data': np.array(frame)}, ignore_index=True)
                count += 1
            except:
                print('skip')
            if count > 100:
                break
    lda(df)

    return df
Exemple #2
0
def train(train_set):
    pos_train_set, neg_train_set = train_set

    pos_train_mat = lda.data2mat(pos_train_set)
    neg_train_mat = lda.data2mat(neg_train_set)

    #    lda.plot_data(pos_train_mat, neg_train_mat)
    #    plt.show()

    w, project_points = lda.lda(pos_train_mat, neg_train_mat)
    #    print 'w = ', w
    #    print 'project_points =', project_points
    return (w, project_points)
def train(train_set):
    pos_train_set, neg_train_set = train_set

    pos_train_mat = lda.data2mat(pos_train_set)
    neg_train_mat = lda.data2mat(neg_train_set)

#    lda.plot_data(pos_train_mat, neg_train_mat)
#    plt.show()

    w, project_points = lda.lda(pos_train_mat, neg_train_mat)
#    print 'w = ', w
#    print 'project_points =', project_points
    return (w, project_points)
    def merit( g0 ) :
        try:
            pot = scubic.sc(allIR  = s0, \
                        allGR  = g0, \
                        allIRw = wL, \
                        allGRw = wC )

            # The lda within the optimization loop is told to ignore errors
            # of the density distribution going beyond the extents.  
            # This will be checked after the optmization is done.
            lda0 = lda.lda(potential = pot, Temperature = T_Er,\
                            a_s=a_s, globalMu='halfMott', extents=extents,\
                            ignoreExtents=True, select='htse' )
    
            etaFstar = penalty( lda0.etaF_star , 5 ) 
            if 'Number' in kwargs.keys():
                return  etaFstar * Npenalty( lda0.Number) 
            else:
                return  etaFstar
        except Exception as e :
            negslope = 'Bottom of the band has a negative slope'
            posslope = 'Radial density profile along 111 has a positive slope'
            threshol = 'Chemical potential exceeds the evaporation threshold'
            thresh100= 'Chemical potential exceeds the bottom of the band along 100'

            if negslope in e.message:
                return 1e4 # this is caused by too much green 
                           # return large value to asign penalty
            elif posslope in e.message:
                return 1e4 # this is caused by too much green
                           # as the chemical potential comes to close
                           # to the threshold and atoms accumulate on 
                           # the beams 
                           # return large value to asign penalty
            elif threshol in e.message:
                return 1e4 # this is caused by too much green
                           # the chemical potential is above the evap th. 
                           # return large value to asign penalty
            elif thresh100 in e.message:
                return 1e4 # this is caused by too much green
                           # the chemical potential is above the bottom of 
                           # the band along 100 
                           # return large value to asign penalty
                              
 
            elif 'vanish' in e.message : 
                # this is is caused by insufficient extents
                print "extents = %.1f"%  extents  
                raise 
            else:
                raise 
    def merit(g0):
        try:
            pot = scubic.sc(allIR  = s0, \
                        allGR  = g0, \
                        allIRw = wL, \
                        allGRw = wC )

            # The lda within the optimization loop is told to ignore errors
            # of the density distribution going beyond the extents.
            # This will be checked after the optmization is done.
            lda0 = lda.lda(potential = pot, Temperature = T_Er,\
                            a_s=a_s, globalMu='halfMott', extents=extents,\
                            ignoreExtents=True, select='htse' )

            etaFstar = penalty(lda0.etaF_star, 5)
            if 'Number' in kwargs.keys():
                return etaFstar * Npenalty(lda0.Number)
            else:
                return etaFstar
        except Exception as e:
            negslope = 'Bottom of the band has a negative slope'
            posslope = 'Radial density profile along 111 has a positive slope'
            threshol = 'Chemical potential exceeds the evaporation threshold'
            thresh100 = 'Chemical potential exceeds the bottom of the band along 100'

            if negslope in e.message:
                return 1e4  # this is caused by too much green
                # return large value to asign penalty
            elif posslope in e.message:
                return 1e4  # this is caused by too much green
                # as the chemical potential comes to close
                # to the threshold and atoms accumulate on
                # the beams
                # return large value to asign penalty
            elif threshol in e.message:
                return 1e4  # this is caused by too much green
                # the chemical potential is above the evap th.
                # return large value to asign penalty
            elif thresh100 in e.message:
                return 1e4  # this is caused by too much green
                # the chemical potential is above the bottom of
                # the band along 100
                # return large value to asign penalty

            elif 'vanish' in e.message:
                # this is is caused by insufficient extents
                print "extents = %.1f" % extents
                raise
            else:
                raise
Exemple #6
0
def main():
    data_base1 = 'List03\Databases\KC1.csv'
    data_base2 = 'List03\Databases\CM1.csv'
    columns_names = "loc,v(g),ev(g),iv(g),n,v,l,d,i,e,b,t,lOCode,lOComment,lOBlank,locCodeAndComment,uniq_Op,uniq_Opnd,total_Op,total_Opnd,branchCount,defects".split(
        ',')
    df = pd.read_csv(data_base1, names=columns_names)  #Change daba_base1 or 2
    data = df.iloc[:, :-1].copy()  #Data without target
    target = df['defects']  #Target
    class_values = df['defects'].unique()  #Number of Classes
    k_components = 3  #[1,3,5,9,15,20]                    #Components for PCA
    #PCA, LDA instances
    pca_instance = pca.pca(data, target)
    lda_instance = lda.lda(df, target, class_values)

    #PCA----------------------------------------------------------------------
    cov_matriz = pca_instance.cov_matriz()
    eigenvalues, eigenvectors = pca_instance.get_eigen_value_vector(cov_matriz)
    eigen_vec = pca_instance.get_eigenvecs(eigenvalues, eigenvectors,
                                           k_components)
    pca_instance.normalize()
    new_dataset = pca_instance.change_base(eigen_vec,
                                           pca_instance.normalize_data)

    #LDA---------------------------------------------------------------------
    mean_vectors = lda_instance.calc_mean_vect()
    data_class = lda_instance.get_data_per_class()
    s_w = lda_instance.calc_sw(mean_vectors, data_class)
    s_b = lda_instance.calc_sb(mean_vectors)
    eig_pairs = lda_instance.get_eigs(s_w, s_b)
    lda_components = lda_instance.get_k_eigenvcs(eig_pairs,
                                                 len(class_values) - 1)
    new_space = pd.DataFrame(lda_instance.transform(lda_components))

    skf = StratifiedKFold(n_splits=3)  #Number of folds
    knns = [1, 3, 5]
    print("Components PCA :%.1d" % k_components)
    for j in knns:
        print("KNN = %.1d" % j)
        print("PCA")
        accuracy_pca = pca_instance.knn(new_dataset, j, skf)
        accuracy_without_pca = pca_instance.knn(data, j, skf)
        print("Acurracy with PCA:%.3f " % np.mean(accuracy_pca))
        print("Acurracy without PCA:%.3f\n" % np.mean(accuracy_without_pca))

        print("LDA")
        accuracy_lda = lda_instance.knn(new_space, j, skf)
        accuracy_without_lda = lda_instance.knn(data, j, skf)
        print("Acurracy with LDA:%.3f " % np.mean(accuracy_lda))
        print("Acurracy without LDA:%.3f\n" % np.mean(accuracy_without_lda))
def val_uci(train_x, train_y, test_x, test_y, fs, dim, cls, f):
    if fs == 'pca':
        train_x, w = pca(train_x, dim)
        test_x = test_x.dot(w.T)
    elif fs == 'lda':
        train_x, w = lda(train_x, train_y, dim)
        test_x = test_x.dot(w.T)

    out = cls(train_x, train_y)(test_x)

    acc = (out == test_y).sum() / out.shape[0] * 100
    acc = '%-8.2f' % (acc)

    print(acc)
    f.write(acc)
def get_trap_results( **kwargs  ):
    """
    If the parameters for the trap are known, the trap results can be obtained 
    directly with this function 
    """
    s0 = kwargs.get('s0', 7. ) 
    wL = kwargs.get('wL', 47. )
    wC = kwargs.get('wC', 40. )
    alpha = wL/wC
    a_s = kwargs.get('a_s', 650. )
    T_Er= kwargs.get('T_Er', 0.2 )
    extents = kwargs.get('extents', 40.)
  
    gOptimal = kwargs.get('g0',4.304)
    
    potOpt = scubic.sc( allIR=s0, allGR=gOptimal, allIRw=wL, allGRw=wC ) 
    ldaOpt = lda.lda( potential = potOpt, Temperature=T_Er, \
                      a_s=a_s, globalMu='halfMott', extents=extents)  
    return [ gOptimal, ldaOpt.EtaEvap, ldaOpt.Number, \
             ldaOpt.Entropy/ldaOpt.Number, ldaOpt.getRadius(), ldaOpt.getRadius()/wL,  ldaOpt.DeltaEvap ]
def get_trap_results(**kwargs):
    """
    If the parameters for the trap are known, the trap results can be obtained 
    directly with this function 
    """
    s0 = kwargs.get('s0', 7.)
    wL = kwargs.get('wL', 47.)
    wC = kwargs.get('wC', 40.)
    alpha = wL / wC
    a_s = kwargs.get('a_s', 650.)
    T_Er = kwargs.get('T_Er', 0.2)
    extents = kwargs.get('extents', 40.)

    gOptimal = kwargs.get('g0', 4.304)

    potOpt = scubic.sc(allIR=s0, allGR=gOptimal, allIRw=wL, allGRw=wC)
    ldaOpt = lda.lda( potential = potOpt, Temperature=T_Er, \
                      a_s=a_s, globalMu='halfMott', extents=extents)
    return [ gOptimal, ldaOpt.EtaEvap, ldaOpt.Number, \
             ldaOpt.Entropy/ldaOpt.Number, ldaOpt.getRadius(), ldaOpt.getRadius()/wL,  ldaOpt.DeltaEvap ]
def build_model(lines_num = -1):
	dataset = open('rawdata.csv', 'r')

	[bidList,rawList]=tokenize(dataset,lines_num)

	stopped_result = stop_words(rawList,lines_num)

	stem_result = stem(stopped_result,lines_num)

	[corpus,dictionary] = doc_term_matrix(stem_result)
	# it seems like without stem the words makes more sense, still working on it
	
	################
	####  LDA  #####
	################
	ldaList = lda(corpus, dictionary,lines_num)
	
	print("load data...")
	
	with open('outfile','wb') as fp:
		pickle.dump(ldaList,fp)
Exemple #11
0
def extract_first_two_dims(d):
  x, y = dataset.__dict__[d]()

  nsamples, nfeatures = x.shape
 
  # original features
  f = open(d + '_ori.feature', 'wt')
  for n in range(nsamples):
    f.write(str(x[n, 0]) + ',' + str(x[n, 1]) + ',' + str(y[n]) + '\n')
  f.close()

  # pca features
  pca_x, pca_w = pca.pca(x, 2)
  f = open(d + '_pca.feature', 'wt')
  for n in range(nsamples):
    f.write(str(pca_x[n, 0]) + ',' + str(pca_x[n, 1]) + ',' + str(y[n]) + '\n')
  f.close()

  # lda features
  lda_x, lda_w = lda.lda(x, y, 2)
  f = open(d + '_lda.feature', 'wt')
  for n in range(nsamples):
    f.write(str(lda_x[n, 0]) + ',' + str(lda_x[n, 1]) + ',' + str(y[n]) + '\n')
  f.close()
if __name__ == '__main__': 
    # define training set
    data=[line.rstrip().split('\t') for infile in os.listdir('./data') for line in file('./data/'+infile)]
    tdata=dataformat.trimmed_data(data)
    timeline=dataformat.getTimeline(tdata)
    fdata=dataformat.formatted_dataset(tdata)
    print "\n data formatted"
    neurons=2
    # create network
    ffn = FeedForwardNetwork(neurons, 3, 2) #using one per class coding 
    #using lda 

    d=[]
    for line in fdata:
     d.append(line[0])
    obj=lda.lda()
    obj.generateCorpusAndDic(d)
    obj.generateLDAModel(2)
    inputs=obj.getDocTopicProb()
    
    print "\n lda done"
    size=int (len(inputs) * .8)
    train_inputs=inputs[:size]
    train_outputs=[]
    counter=0
    while(counter<size):
     train_outputs.append(fdata[counter][1])
     counter+=1
    
    test_inputs=inputs[size:]
    test_outputs=[]
Exemple #13
0
ecoli = np.loadtxt('../9 Unsupervised/shortecoli.data')
labels = ecoli[:,7:]
data = ecoli[:,:7]
data -= np.mean(data,axis=0)
data /= data.max(axis=0)

order = range(np.shape(data)[0])
np.random.shuffle(order)
data = data[order]
w0 = np.where(labels==1)
w1 = np.where(labels==2)
w2 = np.where(labels==3)

import lda
newData,w = lda.lda(data,labels,2)

pl.plot(data[w0,0],data[w0,1],'ok')
pl.plot(data[w1,0],data[w1,1],'^k')
pl.plot(data[w2,0],data[w2,1],'vk')
pl.axis([-1.5,1.8,-1.5,1.8])
pl.axis('off')
pl.figure(2)
pl.plot(newData[w0,0],newData[w0,1],'ok')
pl.plot(newData[w1,0],newData[w1,1],'^k')
pl.plot(newData[w2,0],newData[w2,1],'vk')
pl.axis([-1.5,1.8,-1.5,1.8])
pl.axis('off')

import pca
x,y,evals,evecs = pca.pca(data,2)
Exemple #14
0
def single_spi(**kwargs):

    savedir = kwargs.pop('savedir', None)
    numlist = kwargs.pop('numlist', [1.2e5, 1.3e5, 1.4e5, 1.5e5, 1.6e5])
    mulist = kwargs.pop('mulist',  [-0.15, -0.075, 0., 0.10, 0.18])
    bestForce = kwargs.pop('bestForce', -1)

    s = kwargs.pop('params_s', 7.)
    g = kwargs.pop('params_g', 3.666)
    wIR = kwargs.pop('params_wIR', 47.)
    wGR = kwargs.pop('params_wGR', 47./1.175)
    direc = '111'
    mu0 = 'halfMott'

    aS = kwargs.pop('params_aS', 300.)
    Tdens = kwargs.pop('params_Tdens', 0.6)
    Tspi = kwargs.pop('params_Tspi',  0.6)

    extents = kwargs.pop('extents', 30.)
    spiextents = kwargs.pop('spiextents', 25.)
    sthextents = kwargs.pop('sthextents', 30.)
    entextents = kwargs.pop('entextents', 25.)
    finegrid = kwargs.pop('finegrid', False)

    sarr = np.array([[s], [s], [s]])
    bands = scubic.bands3dvec(sarr, NBand=0)
    t0 = np.mean((bands[1] - bands[0])/12.)  # tunneling 0 in recoils

    Tdens_Er = Tdens*t0
    Tspi_Er = Tspi*t0

    print "========================================"
    print " Single Spi"
    print " gr={:0.3f}, aS={:03d}".format(g, int(aS))
    print " Tdens={:0.2f}, Tspi={:0.2f}".format(Tdens, Tspi)

    select = 'qmc'
    spis = []

    for tag, muPlus in enumerate(mulist):
        numgoal = numlist[tag]
        print
        print "num = %.3g, muPlus = %.3f" % (numgoal, muPlus)
        pot = scubic.sc(allIR=s, allGR=g, allIRw=wIR, allGRw=wGR)

        lda0 = lda.lda(potential=pot, Temperature=Tdens_Er, a_s=aS,
                       extents=extents,
                       Natoms=numgoal, halfMottPlus=muPlus,\
                       #                       globalMu=mu0, halfMottPlus=muPlus,\
                       verbose=True, \
                       select=select,\
                       ignoreExtents=False, ignoreSlopeErrors=True, \
                       ignoreMuThreshold=True)

        spibulk, spi, sthbulk, sth, r111, n111, U111, t111, mut111, \
            entrbulk, entr111,\
            lda_num, density111, k111, k111htse_list = \
            lda0.getBulkSpi(Tspi=Tspi, inhomog=True,
                            spiextents=spiextents, sthextents=sthextents,
                            entextents=entextents, do_k111=False)

        if finegrid:
            r111_fine, spi111_fine, n111_fine, k111_fine, mu111_Er = \
                lda0.getSpiFineGrid(Tspi=Tspi, numpoints=320,
                                    inhomog=True, spiextents=spiextents,
                                    entextents=entextents)
        else:
            r111_fine, spi111_fine, n111_fine, k111_fine, mu111_Er = \
                 None, None, None, None, None

        spis.append({
                      'gr': g,
                      'muPlus': muPlus,
                      'SpiBulk': spibulk,
                      'spi111': spi,
                      'SthBulk': sthbulk,
                      'sth111': sth,
                      'r111': r111,
                      'n111': n111,
                      'U111': U111,
                      'mut111': mut111,
                      't111': t111,
                      'entrbulk': entrbulk,
                      'entr111': entr111,
                      'k111': k111,
                      'k111htse_list': k111htse_list,
                      'Number': lda0.Number,
                      'ldanum': lda_num,\
                      # dens111 is the one obtained from QMC
                      'dens111': density111,\
                      'Tdens': Tdens,\
                      'Tspi': Tspi,\
                      'aS': aS,\
                      'savedir': savedir,\
                      'r111_fine': r111_fine,\
                      'spi111_fine': spi111_fine,\
                      'n111_fine': n111_fine,\
                      'k111_fine': k111_fine,\
                      'mu111_Er': mu111_Er,\
                      'v0111': lda0.pot.S0(lda0.X111, lda0.Y111, lda0.Z111)[0]
                      })

        # Figure to check inhomogeneity only run if temperature is high
        if Tspi > 0.85 and Tdens > 0.85:
            fig111, binresult, peak_dens, radius1e, peak_t, output = \
                lda.CheckInhomog(lda0, closefig=True, n_ylim=(-0.1, 2.0))

            figfname = savedir + 'Inhomog/{:0.3f}gr_{:03d}_{}_T{:0.4f}Er.png'.\
                format(g, tag, select, Tspi)

            figfname = kwargs.pop('params_figfname', figfname)

            fig111.savefig(figfname, dpi=300)

    print
    print "Atom number = {:5.3g}".format(spis[0]['Number'])
    print "Entropy     = {:0.2f}".format(spis[0]['entrbulk'])

    plot_spis( spis, bestForce=bestForce, \
               # kwargs
               **kwargs)

    return spis[bestForce]
Exemple #15
0
f = open('olhwdb.kmeans.pca.res', 'wt')

for k in range(1, 11):
  for c in range(1, 4):
    train_x, w = pca.pca(train.x, k * 10)
    test_x = test.x.dot(w.T)
    cls = classifiers.KMeans(train_x, train.y, c)
    out = cls(test_x)
    acc = '%-6.2f'%((out == test.y).sum() / out.shape[0] * 100)
    print(acc)
    f.write(acc)
  f.write('\n')

f.close()
'''

f = open('olhwdb.kmeans.lda.res', 'wt')

for k in range(1, 11):
    for c in range(1, 4):
        train_x, w = lda.lda(train.x, train.y, k * 10)
        test_x = test.x.dot(w.T)
        cls = classifiers.KMeans(train_x, train.y, c)
        out = cls(test_x)
        acc = '%-6.2f' % ((out == test.y).sum() / out.shape[0] * 100)
        print(acc)
        f.write(acc)
    f.write('\n')

f.close()
def dmu_dr( rpoints, **kwargs ):
    s       = kwargs.pop('params_s', 7.) 
    g       = kwargs.pop('params_g', 3.666)
    wIR     = kwargs.pop('params_wIR', 47.) 
    wGR     = kwargs.pop('params_wGR', 47./1.175) 
    extents = kwargs.pop('params_extents', 31.)
    direc   = '111'
    mu0     = 'halfMott'
    muBrent = kwargs.pop('params_muBrent',(-0.2,0.3)) 
    muBrentShift = kwargs.pop('params_muBrentShift', 0.)



    aS     = kwargs.pop('params_aS', 300.) 
    muPlus = kwargs.pop('params_muPlus', 0.00 )
    Natoms = kwargs.pop('params_Natoms', None)
    

    select = 'nlce'
    #print 
    #print "muPlus = ", muPlus
    pot = scubic.sc(allIR=s, allGR=g, allIRw=wIR, allGRw=wGR)

    Tlist = kwargs.pop('Tlist', [0.036])
    outdict = {} 
    for TT, Tval in enumerate(Tlist):
        print TT,
        sys.stdout.flush()
        logger.warning('working on Tval = {:0.4f}'.format(Tval) )
        if Natoms is None:
            lda0 = lda.lda(potential = pot, Temperature=Tval, a_s=aS, \
                           override_npoints = 240,\
                           extents=extents, \
                           globalMu=mu0, halfMottPlus=muPlus,\
                           verbose=False, \
                           select = select,\
                           ignoreExtents=False, ignoreSlopeErrors=True, \
                           ignoreMuThreshold=True)
        else:
            lda0 = lda.lda(potential = pot, Temperature=Tval, a_s=aS, \
                           override_npoints = 240,\
                           extents=extents, \
                           Natoms = Natoms,\
                           muBrent=muBrent, muBrentShift=muBrentShift,\
                           verbose=False, \
                           select = select,\
                           ignoreExtents=False, ignoreSlopeErrors=True, \
                           ignoreMuThreshold=True)
    
        r111, n111 = lda0.getDensity( lda0.globalMu, lda0.T)  
        localMu_t = lda0.get_localMu_t( lda0.globalMu )
    
        localMu_t_f = extrap1d( interp1d( r111, localMu_t ) )
        dmu_dr = deriv( rpoints, localMu_t_f )
        dmu_dr111 = deriv( r111, localMu_t_f ) 
    
        t0 = lda0.tunneling_111.min()
        # Need to also get the value of T/t0 and the overall S/N 
        _spibulk, _spi, _r111, _n111, _U111, _t111, _entrbulk, _entr111,\
        _lda_num, _density111, _k111, _k111htse_list = \
            lda0.getBulkSpi(Tspi=Tval/t0, inhomog=True, \
               spiextents=extents, entextents=extents, do_k111=False) 
    
        Tdict = {
                   'r111':r111 ,\
                   'n111':n111 ,\
                   'Ut111':lda0.onsite_111 / lda0.tunneling_111 ,\
                   'localMu_t':localMu_t ,\
                   'dmu_dr': dmu_dr ,\
                   'dmu_dr111': dmu_dr111 ,\
                   'num':lda0.Number,\
                   'T/t0': Tval/t0 ,\
                   'S/N':_entrbulk ,\
                          } 
        outdict[ Tval ] = Tdict

    return outdict 
Exemple #17
0
import numpy as np
import os
import matplotlib.pyplot as plt
from util import imread, show_eigenface, show_reconstruction, performance
from pca import pca
from lda import lda

if __name__ == '__main__':
    filepath = os.path.join('Yale_Face_Database', 'Training')
    H, W = 231, 195
    X, y = imread(filepath, H, W)

    eigenvalues_pca, eigenvectors_pca, X_mean = pca(X, num_dim=31)
    X_pca = eigenvectors_pca.T @ (X - X_mean)
    eigenvalues_lda, eigenvectors_lda = lda(X_pca, y)

    # Transform matrix
    U = eigenvectors_pca @ eigenvectors_lda
    print('U shape: {}'.format(U.shape))

    # show top 25 eigenface
    show_eigenface(U, 25, H, W)

    # reduce dim (projection)
    Z = U.T @ X

    # recover
    X_recover = U @ Z + X_mean
    show_reconstruction(X, X_recover, 10, H, W)

    # accuracy
def optimal_FixedRadius( **kwargs ) :
    """ 
    This function takes fixed values of s0, wL, wC and finds the value of
    green that would be required to make a sample with a radius that is
    a fixed fraction of the lattice beam waist. 
 
    The value of the fraction is hardcoded in the function
    """
    fraction = 0.32

    s0 = kwargs.get('s0', 7. ) 
    wL = kwargs.get('wL', 47. )
    wC = kwargs.get('wC', 40. )
    alpha = wL/wC
    a_s = kwargs.get('a_s', 650. )
    T_Er= kwargs.get('T_Er', 0.2 )
    extents = kwargs.get('extents', 40.)

    def merit( g0 ) :
        try:
            pot = scubic.sc(allIR  = s0, \
                        allGR  = g0, \
                        allIRw = wL, \
                        allGRw = wC )

            # The lda within the optimization loop is told to ignore errors
            # of the density distribution going beyond the extents.  
            # This will be checked after the optmization is done.
            lda0 = lda.lda(potential = pot, Temperature = T_Er,\
                            a_s=a_s, globalMu='halfMott', extents=extents,\
                            ignoreExtents=True )

            return   (fraction*wL -   lda0.getRadius())**2.
            #return   fraction -   lda0.getRadius()/wL
        except Exception as e :
            negslope = 'Bottom of the band has a negative slope'
            posslope = 'Radial density profile along 111 has a positive slope'
            threshol = 'Chemical potential exceeds the evaporation threshold'
            thresh100= 'Chemical potential exceeds the bottom of the band along 100'

            if negslope in e.message:
                return 1e6 # this is caused by too much green 
                           # return large value to asign penalty
            elif posslope in e.message:
                return 1e6 # this is caused by too much green
                           # as the chemical potential comes to close
                           # to the threshold and atoms accumulate on 
                           # the beams 
                           # return large value to asign penalty
            elif threshol in e.message:
                return 1e6 # this is caused by too much green
                           # the chemical potential is above the evap th. 
                           # return large value to asign penalty
            elif thresh100 in e.message:
                return 1e6 # this is caused by too much green
                           # the chemical potential is above the bottom of 
                           # the band along 100 
                           # return large value to asign penalty
                              
 
            elif 'vanish' in e.message : 
                # this is is caused by insufficient extents
                print "extents = %.1f"%  extents  
                raise 
            else:
                raise 
            #print "Fail at g0=%.2f"% g0 
            #raise

    g0bounds =  (1., min(s0, (4.*s0-2.*np.sqrt(s0))/(4.*(alpha**2.)) ) ) 
 

    
    #(x, res) = brentq( merit, g0bounds[0], g0bounds[1] )  
    #gOptimal =  x
 
    res = minimize_scalar( merit, bounds=g0bounds, tol=1e-4, \
              method='bounded' )
    gOptimal =  res.x 

    #print "gOpt=%.2f"%gOptimal

    potOpt = scubic.sc( allIR=s0, allGR=gOptimal, allIRw=wL, allGRw=wC ) 
    ldaOpt = lda.lda( potential = potOpt, Temperature=T_Er, \
                      a_s=a_s, globalMu='halfMott', extents=extents)  
    return [ gOptimal, ldaOpt.EtaEvap, ldaOpt.Number, \
             ldaOpt.Entropy/ldaOpt.Number, ldaOpt.getRadius(), ldaOpt.getRadius()/wL ]
def optimal( **kwargs ) :
    """ 
    This function takes fixed values of s0, wL, wC and optimizes the 
    evaporation figure of merit, eta_F,  by using the green compensation
    as a variable. 
    """

    s0 = kwargs.get('s0', 7. ) 
    wL = kwargs.get('wL', 47. )
    wC = kwargs.get('wC', 40. )
    alpha = wL/wC
    a_s = kwargs.get('a_s', 650. )
    T_Er= kwargs.get('T_Er', 0.2 )
    extents = kwargs.get('extents', 40.)

    if 'Number' in kwargs.keys():  
        N0 = kwargs['Number']  

    def Npenalty( Num ):
        p = 4.
        if Num > N0: 
            return np.exp( (Num - N0)/1e5 )**p 
        else:
            return 1. 

    def penalty(x,p):
        """
        This function is used to penalyze  EtaF < 1 , which amounts to 
        spilling out along the lattice beams.
        """ 
        if x < 1.:
            return np.exp(-(x-1.))**p
        else:
            return x 
    
        #return np.piecewise(x, [x < 1., x >= 1.], \
        #           [lambda x: , lambda x: x])        

    def merit( g0 ) :
        try:
            pot = scubic.sc(allIR  = s0, \
                        allGR  = g0, \
                        allIRw = wL, \
                        allGRw = wC )

            # The lda within the optimization loop is told to ignore errors
            # of the density distribution going beyond the extents.  
            # This will be checked after the optmization is done.
            lda0 = lda.lda(potential = pot, Temperature = T_Er,\
                            a_s=a_s, globalMu='halfMott', extents=extents,\
                            ignoreExtents=True, select='htse' )
    
            etaFstar = penalty( lda0.etaF_star , 5 ) 
            if 'Number' in kwargs.keys():
                return  etaFstar * Npenalty( lda0.Number) 
            else:
                return  etaFstar
        except Exception as e :
            negslope = 'Bottom of the band has a negative slope'
            posslope = 'Radial density profile along 111 has a positive slope'
            threshol = 'Chemical potential exceeds the evaporation threshold'
            thresh100= 'Chemical potential exceeds the bottom of the band along 100'

            if negslope in e.message:
                return 1e4 # this is caused by too much green 
                           # return large value to asign penalty
            elif posslope in e.message:
                return 1e4 # this is caused by too much green
                           # as the chemical potential comes to close
                           # to the threshold and atoms accumulate on 
                           # the beams 
                           # return large value to asign penalty
            elif threshol in e.message:
                return 1e4 # this is caused by too much green
                           # the chemical potential is above the evap th. 
                           # return large value to asign penalty
            elif thresh100 in e.message:
                return 1e4 # this is caused by too much green
                           # the chemical potential is above the bottom of 
                           # the band along 100 
                           # return large value to asign penalty
                              
 
            elif 'vanish' in e.message : 
                # this is is caused by insufficient extents
                print "extents = %.1f"%  extents  
                raise 
            else:
                raise 
            #print "Fail at g0=%.2f"% g0 
            #raise

    g0bounds =  (0., min(s0,s0/(alpha**2.)))   
    res = minimize_scalar( merit, bounds=g0bounds, tol=4e-2, \
              method='bounded' )
    gOptimal =  res.x 

    #print "gOpt=%.2f"%gOptimal

    potOpt = scubic.sc( allIR=s0, allGR=gOptimal, allIRw=wL, allGRw=wC ) 
    ldaOpt = lda.lda( potential = potOpt, Temperature=T_Er, \
                      a_s=a_s, globalMu='halfMott', extents=extents)  
    return [ gOptimal, ldaOpt.EtaEvap, ldaOpt.Number, \
             ldaOpt.Entropy/ldaOpt.Number, ldaOpt.getRadius(), ldaOpt.getRadius()/wL,  ldaOpt.DeltaEvap ]
# standardize
# X_cancer = standardize(X_cancer)

# normalize
X_cancer = normalize(X_cancer)

# y_cancer = y_cancer.reshape(-1,1)

#%%
# repeated kfold test
# wine dataset
import warnings

warnings.filterwarnings("ignore")
# classifier options
ld = lda()
clf = LinearDiscriminantAnalysis()
# ld = LogReg(learning_rate=0.001)
# clf = LogisticRegression(solver='liblinear',C=1000)

score = []
scorec = []
for i in range(100):
    for train_index, test_index in kfold_index(5, X_wine):
        # print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X_wine[train_index], X_wine[test_index]
        y_train, y_test = y_wine[train_index], y_wine[test_index]
        # project
        ld.fit(X_train, y_train)
        y_pred = ld.predict(X_test)
        score.append(evaluate_acc(y_test, y_pred))
Exemple #21
0
def resultBtn_click():
    images = comboImages.get()
    videos = comboVideos.get()
    links = comboLinks.get()
    if (comboDay.get()=="No"):
        day = 0
    elif comboDay.get()=="Yes":
        day = 1
    
   # thmz = text.get("1.0",END)
    
    file_content = str(textContent.get("1.0",'end-1c'))
    content = [word.strip(string.punctuation) for word in file_content.split()]
    while("" in content) : 
        content.remove("")

    file_title = str(textTitle.get("1.0",'end-1c'))
    title = [word.strip(string.punctuation) for word in file_title.split()]
    while("" in title) : 
        title.remove("")

    numLinks = 5 #Number of Links
    numVideos = 0 #Number of Videos
    numImages = 2 #Number of Images

    countwordsT = countWords(title) #Number of Words Title

    countwordsC = countWords(content) #Number of Words Content
##
    countunique = countUnique(content) #Number of Unique Words

    nonstopCount = nonStopCount(content) # Number of non-Stop Words
    #print(nonstopCount)

    rateNonStopWords = 0.999999995192
# Rate of non-Stop Words
#
    rateUniqueNonStopWords = nonStopCount(uniqueWords(content))/nonstopCount # Rate of Unique non-Stop Words

    average_token_length = averageWordLength(content) # Average Words Length

    global_subjectivity = TextBlob(' '.join(content)).subjectivity
    title_subjectivity = TextBlob(' '.join(title)).subjectivity
    global_sentiment_polarity = TextBlob(' '.join(content)).polarity 
    title_sentiment_polarity = TextBlob(' '.join(title)).polarity
    LDA = lda('content.txt')

    print('n_tokens_title =',countwordsT)
    print('n_tokens_content =',countwordsC)

    print('n_unique_tokens =',countunique)
    print('n_non_stop_words =',rateNonStopWords)
    print('n_non_stop_unique_tokens =',rateUniqueNonStopWords)

    print('num_href =',numLinks)
    print('num_imgs =',numImages)
    print('num_videos =',numVideos)
    print('average_token_length =',average_token_length)

    num_keywords=num_keyword(" ".join(title))
    print('num_keywords =', num_keywords)

    is_workday = 1
    is_weekend = 0
    print('is_workday=', is_workday)
    print('is_weekend=', is_weekend)


    print('LDA00 =',LDA[0][1])
    print('LDA01 =',LDA[1][1])
    print('LDA02 =',LDA[2][1])
    print('LDA03 =',LDA[3][1])
    print('LDA04 =',LDA[4][1])

    print('global_subjectivity =',global_subjectivity)
    print('global_sentiment_polarity=', global_sentiment_polarity)

    avg_positive_polarity = 0.35
    min_positive_polarity = 0.1
    max_positive_polarity = 0.75
    abs_title_sub= abs_title_subjectivity(title_subjectivity)
    abs_title_sentiment_polarity = abs(title_sentiment_polarity)
    print('avg_positive_polarity=',avg_positive_polarity)
    print('min_positive_polarity=',min_positive_polarity)
    print('max_positive_polarity=',max_positive_polarity)
    print('title_subjectivity=',title_subjectivity)
    print('title_sentiment_polarity=',title_sentiment_polarity)
    print('abs_title_subjectivity=',abs_title_sub)
    print('abs_title_sentiment_polarity=',abs_title_sentiment_polarity)

    result = str(countwordsT) + ' ' + str(countwordsC) + ' ' + str(countunique) + ' ' + str(rateNonStopWords) + ' ' + str(rateUniqueNonStopWords) + ' ' + str(numLinks) + ' ' + str(numVideos) + ' ' + str(numImages) + ' ' + str(average_token_length) + ' ' + str(num_keywords) + ' ' + str(is_workday) + ' ' + str(is_weekend)+ ' ' + str(LDA[0][1]) + ' ' + str(LDA[1][1]) + ' ' + str(LDA[2][1]) + ' ' + str(LDA[3][1]) + ' ' + str(LDA[4][1]) + ' ' + str(global_subjectivity) + ' ' + str(global_sentiment_polarity) + ' ' + str(avg_positive_polarity) + ' ' + str(min_positive_polarity) + ' ' + str(max_positive_polarity) + ' ' + str(title_subjectivity) +' ' + str(title_sentiment_polarity) + ' ' + str(abs_title_sub) + ' ' + str(abs_title_sentiment_polarity) 
    resultText.insert(END,result)
imax = np.concatenate((iris.max(axis=0)*np.ones((1,5)),iris.min(axis=0)*np.ones((1,5))),axis=0).max(axis=0)
iris[:,:4] = iris[:,:4]/imax[:4]
labels = iris[:,4:]
iris = iris[:,:4]

order = range(np.shape(iris)[0])
np.random.shuffle(order)
iris = iris[order,:]
labels = labels[order,0]

w0 = np.where(labels==0)
w1 = np.where(labels==1)
w2 = np.where(labels==2)

import lda
newData,w = lda.lda(iris,labels,2)
print np.shape(newData)
pl.plot(iris[w0,0],iris[w0,1],'ok')
pl.plot(iris[w1,0],iris[w1,1],'^k')
pl.plot(iris[w2,0],iris[w2,1],'vk')
pl.axis([-1.5,1.8,-1.5,1.8])
pl.axis('off')
pl.figure(2)
pl.plot(newData[w0,0],newData[w0,1],'ok')
pl.plot(newData[w1,0],newData[w1,1],'^k')
pl.plot(newData[w2,0],newData[w2,1],'vk')
pl.axis([-1.5,1.8,-1.5,1.8])
pl.axis('off')

import pca
x,y,evals,evecs = pca.pca(iris,2)
import itertools
import random
import sklearn
import time
from lda import lda
from sklearn import svm
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC

nr_topics = 20
alpha = 50/float(nr_topics)
beta = 0.1
nr_runs = 1
top_words = 50
top_topics = 5
lda = lda(alpha, beta, nr_topics)

# Do all 5 folds
for i in range(0, 5):
	# Set fold number
	lda.reset_to_next_fold(i)
	#Get info about documents
	dataset = lda.dataset
	#Get word counts per document
	word_counts = lda.doc_word

	# THESE ARE THE SETS NEEDED FOR THIS FOLD
	print "Building test and training set..."
	training_set = lda.dataset
	training_labels = lda.labels_dataset
	test_set = lda.testset
def optimal(**kwargs):
    """ 
    This function takes fixed values of s0, wL, wC and optimizes the 
    evaporation figure of merit, eta_F,  by using the green compensation
    as a variable. 
    """

    s0 = kwargs.get('s0', 7.)
    wL = kwargs.get('wL', 47.)
    wC = kwargs.get('wC', 40.)
    alpha = wL / wC
    a_s = kwargs.get('a_s', 650.)
    T_Er = kwargs.get('T_Er', 0.2)
    extents = kwargs.get('extents', 40.)

    if 'Number' in kwargs.keys():
        N0 = kwargs['Number']

    def Npenalty(Num):
        p = 4.
        if Num > N0:
            return np.exp((Num - N0) / 1e5)**p
        else:
            return 1.

    def penalty(x, p):
        """
        This function is used to penalyze  EtaF < 1 , which amounts to 
        spilling out along the lattice beams.
        """
        if x < 1.:
            return np.exp(-(x - 1.))**p
        else:
            return x

        #return np.piecewise(x, [x < 1., x >= 1.], \
        #           [lambda x: , lambda x: x])

    def merit(g0):
        try:
            pot = scubic.sc(allIR  = s0, \
                        allGR  = g0, \
                        allIRw = wL, \
                        allGRw = wC )

            # The lda within the optimization loop is told to ignore errors
            # of the density distribution going beyond the extents.
            # This will be checked after the optmization is done.
            lda0 = lda.lda(potential = pot, Temperature = T_Er,\
                            a_s=a_s, globalMu='halfMott', extents=extents,\
                            ignoreExtents=True, select='htse' )

            etaFstar = penalty(lda0.etaF_star, 5)
            if 'Number' in kwargs.keys():
                return etaFstar * Npenalty(lda0.Number)
            else:
                return etaFstar
        except Exception as e:
            negslope = 'Bottom of the band has a negative slope'
            posslope = 'Radial density profile along 111 has a positive slope'
            threshol = 'Chemical potential exceeds the evaporation threshold'
            thresh100 = 'Chemical potential exceeds the bottom of the band along 100'

            if negslope in e.message:
                return 1e4  # this is caused by too much green
                # return large value to asign penalty
            elif posslope in e.message:
                return 1e4  # this is caused by too much green
                # as the chemical potential comes to close
                # to the threshold and atoms accumulate on
                # the beams
                # return large value to asign penalty
            elif threshol in e.message:
                return 1e4  # this is caused by too much green
                # the chemical potential is above the evap th.
                # return large value to asign penalty
            elif thresh100 in e.message:
                return 1e4  # this is caused by too much green
                # the chemical potential is above the bottom of
                # the band along 100
                # return large value to asign penalty

            elif 'vanish' in e.message:
                # this is is caused by insufficient extents
                print "extents = %.1f" % extents
                raise
            else:
                raise
            #print "Fail at g0=%.2f"% g0
            #raise

    g0bounds = (0., min(s0, s0 / (alpha**2.)))
    res = minimize_scalar( merit, bounds=g0bounds, tol=4e-2, \
              method='bounded' )
    gOptimal = res.x

    #print "gOpt=%.2f"%gOptimal

    potOpt = scubic.sc(allIR=s0, allGR=gOptimal, allIRw=wL, allGRw=wC)
    ldaOpt = lda.lda( potential = potOpt, Temperature=T_Er, \
                      a_s=a_s, globalMu='halfMott', extents=extents)
    return [ gOptimal, ldaOpt.EtaEvap, ldaOpt.Number, \
             ldaOpt.Entropy/ldaOpt.Number, ldaOpt.getRadius(), ldaOpt.getRadius()/wL,  ldaOpt.DeltaEvap ]
Exemple #25
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from lda import lda

# model intialization
ldaModel = lda(K = 10)
# load training corpus
ldaModel.preprocessing(corpus = './data/corpus.txt')
# variational EM algorithm
ldaModel.EM()
# topic words
topicWords = ldaModel.topicWords(maxTopicWordsNum = 10)
# topic inference given documents
inference = ldaModel.topicInference(documents = './data/held_out_sentences.txt')
Exemple #26
0
def Spi_vs_N( aS=200., Spi_inhomog=False, Tspi=0.9, \
              savedir='dataplots/VaryN_Spi', \
              mulist =[-0.15, -0.075, 0., 0.10, 0.18], 
              bestForce = -1 ,
              spiextents = 25., 
              entextents = 25., 
              finegrid = False, 
              ):

    s       = 7.
    g       = 3.666
    wIR     = 47.
    wGR     = 47./1.175
    T       = 0.035
    extents = 30.
    direc   = '111'
    mu0     = 'halfMott'

    spis = [] 
    select = 'nlce'

    for tag, muPlus in enumerate(mulist):
        print 
        print "muPlus = ", muPlus
        pot = scubic.sc(allIR=s, allGR=g, allIRw=wIR, allGRw=wGR)

        lda0 = lda.lda(potential = pot, Temperature=T, a_s=aS, \
                       extents=extents, \
                       globalMu=mu0, halfMottPlus=muPlus,\
                       verbose=True, \
                       select = select,\
                       ignoreExtents=False, ignoreSlopeErrors=True, \
                       ignoreMuThreshold=True)

        spibulk, spi, r111, n111, U111, t111, entrbulk, entr111,\
        lda_num, density111  = \
            lda0.getBulkSpi(Tspi=Tspi, inhomog=Spi_inhomog, \
               spiextents=spiextents, entextents=entextents)
    
        if finegrid: 
            r111_fine, spi111_fine, n111_fine = lda0.getSpiFineGrid( Tspi=Tspi, numpoints=320,\
                inhomog=Spi_inhomog, spiextents=spiextents, entextents=entextents )
        else:
            r111_fine, spi111_fine, n111_fine = None, None, None


        spis.append( {'SpiBulk':spibulk,\
                      'spi111':spi,\
                      'r111':r111,\
                      'n111':n111,\
                      'U111':U111,\
                      't111':t111,\
                      'entrbulk':entrbulk,\
                      'entr111':entr111,\
                      'Number':lda0.Number,\
                      'ldanum':lda_num,\
                      'dens111':density111,\
                      'Tn':T,\
                      'Tspi':Tspi,\
                      'aS':aS,\
                      'savedir':savedir,\
                      'r111_fine':r111_fine,\
                      'spi111_fine':spi111_fine,\
                      'n111_fine':n111_fine,\
                      } ) 

        # Figure to check inhomogeneity
        fig111, binresult, peak_dens, radius1e, peak_t, output = \
            lda.CheckInhomog( lda0, closefig = True, n_ylim=(-0.1,2.0) ) ;

        figfname = savedir + 'Inhomog/{:0.3f}gr_{:03d}_{}_T{:0.4f}Er.png'.\
                   format(g,tag,select,T)

        fig111.savefig(figfname, dpi=300)

    plot_spis( spis, inhomog=Spi_inhomog, bestForce=bestForce)
    
    return spis[bestForce] 
Exemple #27
0
import sys
import numpy as np
from matplotlib import pyplot as plt

import lda

if __name__ == '__main__':

    filename = sys.argv[1]

    pos_mat, neg_mat = lda.read_data(filename)

    lda.plot_data(pos_mat, neg_mat)

    print pos_mat
    print neg_mat

    w, project_points = lda.lda(neg_mat, pos_mat)

    if w[1] < 0:
        w = [-x for x in w]
    print 'w = ', w
    print 'project_points = ', project_points

    # draw vector w
    plt.plot(*zip((0, 0), w), color='blue')

    plt.axis([0, 1, 0, 1])

    plt.show()
def optimal_FixedRadius(**kwargs):
    """ 
    This function takes fixed values of s0, wL, wC and finds the value of
    green that would be required to make a sample with a radius that is
    a fixed fraction of the lattice beam waist. 
 
    The value of the fraction is hardcoded in the function
    """
    fraction = 0.32

    s0 = kwargs.get('s0', 7.)
    wL = kwargs.get('wL', 47.)
    wC = kwargs.get('wC', 40.)
    alpha = wL / wC
    a_s = kwargs.get('a_s', 650.)
    T_Er = kwargs.get('T_Er', 0.2)
    extents = kwargs.get('extents', 40.)

    def merit(g0):
        try:
            pot = scubic.sc(allIR  = s0, \
                        allGR  = g0, \
                        allIRw = wL, \
                        allGRw = wC )

            # The lda within the optimization loop is told to ignore errors
            # of the density distribution going beyond the extents.
            # This will be checked after the optmization is done.
            lda0 = lda.lda(potential = pot, Temperature = T_Er,\
                            a_s=a_s, globalMu='halfMott', extents=extents,\
                            ignoreExtents=True )

            return (fraction * wL - lda0.getRadius())**2.
            #return   fraction -   lda0.getRadius()/wL
        except Exception as e:
            negslope = 'Bottom of the band has a negative slope'
            posslope = 'Radial density profile along 111 has a positive slope'
            threshol = 'Chemical potential exceeds the evaporation threshold'
            thresh100 = 'Chemical potential exceeds the bottom of the band along 100'

            if negslope in e.message:
                return 1e6  # this is caused by too much green
                # return large value to asign penalty
            elif posslope in e.message:
                return 1e6  # this is caused by too much green
                # as the chemical potential comes to close
                # to the threshold and atoms accumulate on
                # the beams
                # return large value to asign penalty
            elif threshol in e.message:
                return 1e6  # this is caused by too much green
                # the chemical potential is above the evap th.
                # return large value to asign penalty
            elif thresh100 in e.message:
                return 1e6  # this is caused by too much green
                # the chemical potential is above the bottom of
                # the band along 100
                # return large value to asign penalty

            elif 'vanish' in e.message:
                # this is is caused by insufficient extents
                print "extents = %.1f" % extents
                raise
            else:
                raise
            #print "Fail at g0=%.2f"% g0
            #raise

    g0bounds = (1., min(s0, (4. * s0 - 2. * np.sqrt(s0)) / (4. * (alpha**2.))))

    #(x, res) = brentq( merit, g0bounds[0], g0bounds[1] )
    #gOptimal =  x

    res = minimize_scalar( merit, bounds=g0bounds, tol=1e-4, \
              method='bounded' )
    gOptimal = res.x

    #print "gOpt=%.2f"%gOptimal

    potOpt = scubic.sc(allIR=s0, allGR=gOptimal, allIRw=wL, allGRw=wC)
    ldaOpt = lda.lda( potential = potOpt, Temperature=T_Er, \
                      a_s=a_s, globalMu='halfMott', extents=extents)
    return [ gOptimal, ldaOpt.EtaEvap, ldaOpt.Number, \
             ldaOpt.Entropy/ldaOpt.Number, ldaOpt.getRadius(), ldaOpt.getRadius()/wL ]