def lda_copycat(confused_words_list, sample_size, ftype, data_dir, mlf_file, hlist_path=None): label_dict = {} df = pd.DataFrame(columns=['label', 'data']) boundaries = mlf_to_dict(mlf_file) # Iterate over confused words for label, word in enumerate(confused_words_list): label_dict[label + 1] = word # Sample from all phrases containing confused words filenames = os.listdir(data_dir) word_filenames = np.array([name for name in filenames if '_{}_'.format(word) in name]) word_filenames_sample = np.random.choice(word_filenames, size=sample_size) # Iterate over sampled files for specific word count = 0 for fname in word_filenames_sample: try: phrase = fname.replace('.htk', '') phrase_data = read_ark_file(fname) if ftype == 'ark' else read_htk_file(hlist_path, os.path.join(data_dir, fname)) mult = float(list(boundaries[phrase].items())[-1][1][0][2]) / len(phrase_data) word_data = phrase_data[int(boundaries[phrase][word][0][1]/mult) : int(boundaries[phrase][word][0][2]/mult)] for frame in word_data: df = df.append({'label': label + 1, 'data': np.array(frame)}, ignore_index=True) count += 1 except: print('skip') if count > 100: break lda(df) return df
def train(train_set): pos_train_set, neg_train_set = train_set pos_train_mat = lda.data2mat(pos_train_set) neg_train_mat = lda.data2mat(neg_train_set) # lda.plot_data(pos_train_mat, neg_train_mat) # plt.show() w, project_points = lda.lda(pos_train_mat, neg_train_mat) # print 'w = ', w # print 'project_points =', project_points return (w, project_points)
def merit( g0 ) : try: pot = scubic.sc(allIR = s0, \ allGR = g0, \ allIRw = wL, \ allGRw = wC ) # The lda within the optimization loop is told to ignore errors # of the density distribution going beyond the extents. # This will be checked after the optmization is done. lda0 = lda.lda(potential = pot, Temperature = T_Er,\ a_s=a_s, globalMu='halfMott', extents=extents,\ ignoreExtents=True, select='htse' ) etaFstar = penalty( lda0.etaF_star , 5 ) if 'Number' in kwargs.keys(): return etaFstar * Npenalty( lda0.Number) else: return etaFstar except Exception as e : negslope = 'Bottom of the band has a negative slope' posslope = 'Radial density profile along 111 has a positive slope' threshol = 'Chemical potential exceeds the evaporation threshold' thresh100= 'Chemical potential exceeds the bottom of the band along 100' if negslope in e.message: return 1e4 # this is caused by too much green # return large value to asign penalty elif posslope in e.message: return 1e4 # this is caused by too much green # as the chemical potential comes to close # to the threshold and atoms accumulate on # the beams # return large value to asign penalty elif threshol in e.message: return 1e4 # this is caused by too much green # the chemical potential is above the evap th. # return large value to asign penalty elif thresh100 in e.message: return 1e4 # this is caused by too much green # the chemical potential is above the bottom of # the band along 100 # return large value to asign penalty elif 'vanish' in e.message : # this is is caused by insufficient extents print "extents = %.1f"% extents raise else: raise
def merit(g0): try: pot = scubic.sc(allIR = s0, \ allGR = g0, \ allIRw = wL, \ allGRw = wC ) # The lda within the optimization loop is told to ignore errors # of the density distribution going beyond the extents. # This will be checked after the optmization is done. lda0 = lda.lda(potential = pot, Temperature = T_Er,\ a_s=a_s, globalMu='halfMott', extents=extents,\ ignoreExtents=True, select='htse' ) etaFstar = penalty(lda0.etaF_star, 5) if 'Number' in kwargs.keys(): return etaFstar * Npenalty(lda0.Number) else: return etaFstar except Exception as e: negslope = 'Bottom of the band has a negative slope' posslope = 'Radial density profile along 111 has a positive slope' threshol = 'Chemical potential exceeds the evaporation threshold' thresh100 = 'Chemical potential exceeds the bottom of the band along 100' if negslope in e.message: return 1e4 # this is caused by too much green # return large value to asign penalty elif posslope in e.message: return 1e4 # this is caused by too much green # as the chemical potential comes to close # to the threshold and atoms accumulate on # the beams # return large value to asign penalty elif threshol in e.message: return 1e4 # this is caused by too much green # the chemical potential is above the evap th. # return large value to asign penalty elif thresh100 in e.message: return 1e4 # this is caused by too much green # the chemical potential is above the bottom of # the band along 100 # return large value to asign penalty elif 'vanish' in e.message: # this is is caused by insufficient extents print "extents = %.1f" % extents raise else: raise
def main(): data_base1 = 'List03\Databases\KC1.csv' data_base2 = 'List03\Databases\CM1.csv' columns_names = "loc,v(g),ev(g),iv(g),n,v,l,d,i,e,b,t,lOCode,lOComment,lOBlank,locCodeAndComment,uniq_Op,uniq_Opnd,total_Op,total_Opnd,branchCount,defects".split( ',') df = pd.read_csv(data_base1, names=columns_names) #Change daba_base1 or 2 data = df.iloc[:, :-1].copy() #Data without target target = df['defects'] #Target class_values = df['defects'].unique() #Number of Classes k_components = 3 #[1,3,5,9,15,20] #Components for PCA #PCA, LDA instances pca_instance = pca.pca(data, target) lda_instance = lda.lda(df, target, class_values) #PCA---------------------------------------------------------------------- cov_matriz = pca_instance.cov_matriz() eigenvalues, eigenvectors = pca_instance.get_eigen_value_vector(cov_matriz) eigen_vec = pca_instance.get_eigenvecs(eigenvalues, eigenvectors, k_components) pca_instance.normalize() new_dataset = pca_instance.change_base(eigen_vec, pca_instance.normalize_data) #LDA--------------------------------------------------------------------- mean_vectors = lda_instance.calc_mean_vect() data_class = lda_instance.get_data_per_class() s_w = lda_instance.calc_sw(mean_vectors, data_class) s_b = lda_instance.calc_sb(mean_vectors) eig_pairs = lda_instance.get_eigs(s_w, s_b) lda_components = lda_instance.get_k_eigenvcs(eig_pairs, len(class_values) - 1) new_space = pd.DataFrame(lda_instance.transform(lda_components)) skf = StratifiedKFold(n_splits=3) #Number of folds knns = [1, 3, 5] print("Components PCA :%.1d" % k_components) for j in knns: print("KNN = %.1d" % j) print("PCA") accuracy_pca = pca_instance.knn(new_dataset, j, skf) accuracy_without_pca = pca_instance.knn(data, j, skf) print("Acurracy with PCA:%.3f " % np.mean(accuracy_pca)) print("Acurracy without PCA:%.3f\n" % np.mean(accuracy_without_pca)) print("LDA") accuracy_lda = lda_instance.knn(new_space, j, skf) accuracy_without_lda = lda_instance.knn(data, j, skf) print("Acurracy with LDA:%.3f " % np.mean(accuracy_lda)) print("Acurracy without LDA:%.3f\n" % np.mean(accuracy_without_lda))
def val_uci(train_x, train_y, test_x, test_y, fs, dim, cls, f): if fs == 'pca': train_x, w = pca(train_x, dim) test_x = test_x.dot(w.T) elif fs == 'lda': train_x, w = lda(train_x, train_y, dim) test_x = test_x.dot(w.T) out = cls(train_x, train_y)(test_x) acc = (out == test_y).sum() / out.shape[0] * 100 acc = '%-8.2f' % (acc) print(acc) f.write(acc)
def get_trap_results( **kwargs ): """ If the parameters for the trap are known, the trap results can be obtained directly with this function """ s0 = kwargs.get('s0', 7. ) wL = kwargs.get('wL', 47. ) wC = kwargs.get('wC', 40. ) alpha = wL/wC a_s = kwargs.get('a_s', 650. ) T_Er= kwargs.get('T_Er', 0.2 ) extents = kwargs.get('extents', 40.) gOptimal = kwargs.get('g0',4.304) potOpt = scubic.sc( allIR=s0, allGR=gOptimal, allIRw=wL, allGRw=wC ) ldaOpt = lda.lda( potential = potOpt, Temperature=T_Er, \ a_s=a_s, globalMu='halfMott', extents=extents) return [ gOptimal, ldaOpt.EtaEvap, ldaOpt.Number, \ ldaOpt.Entropy/ldaOpt.Number, ldaOpt.getRadius(), ldaOpt.getRadius()/wL, ldaOpt.DeltaEvap ]
def get_trap_results(**kwargs): """ If the parameters for the trap are known, the trap results can be obtained directly with this function """ s0 = kwargs.get('s0', 7.) wL = kwargs.get('wL', 47.) wC = kwargs.get('wC', 40.) alpha = wL / wC a_s = kwargs.get('a_s', 650.) T_Er = kwargs.get('T_Er', 0.2) extents = kwargs.get('extents', 40.) gOptimal = kwargs.get('g0', 4.304) potOpt = scubic.sc(allIR=s0, allGR=gOptimal, allIRw=wL, allGRw=wC) ldaOpt = lda.lda( potential = potOpt, Temperature=T_Er, \ a_s=a_s, globalMu='halfMott', extents=extents) return [ gOptimal, ldaOpt.EtaEvap, ldaOpt.Number, \ ldaOpt.Entropy/ldaOpt.Number, ldaOpt.getRadius(), ldaOpt.getRadius()/wL, ldaOpt.DeltaEvap ]
def build_model(lines_num = -1): dataset = open('rawdata.csv', 'r') [bidList,rawList]=tokenize(dataset,lines_num) stopped_result = stop_words(rawList,lines_num) stem_result = stem(stopped_result,lines_num) [corpus,dictionary] = doc_term_matrix(stem_result) # it seems like without stem the words makes more sense, still working on it ################ #### LDA ##### ################ ldaList = lda(corpus, dictionary,lines_num) print("load data...") with open('outfile','wb') as fp: pickle.dump(ldaList,fp)
def extract_first_two_dims(d): x, y = dataset.__dict__[d]() nsamples, nfeatures = x.shape # original features f = open(d + '_ori.feature', 'wt') for n in range(nsamples): f.write(str(x[n, 0]) + ',' + str(x[n, 1]) + ',' + str(y[n]) + '\n') f.close() # pca features pca_x, pca_w = pca.pca(x, 2) f = open(d + '_pca.feature', 'wt') for n in range(nsamples): f.write(str(pca_x[n, 0]) + ',' + str(pca_x[n, 1]) + ',' + str(y[n]) + '\n') f.close() # lda features lda_x, lda_w = lda.lda(x, y, 2) f = open(d + '_lda.feature', 'wt') for n in range(nsamples): f.write(str(lda_x[n, 0]) + ',' + str(lda_x[n, 1]) + ',' + str(y[n]) + '\n') f.close()
if __name__ == '__main__': # define training set data=[line.rstrip().split('\t') for infile in os.listdir('./data') for line in file('./data/'+infile)] tdata=dataformat.trimmed_data(data) timeline=dataformat.getTimeline(tdata) fdata=dataformat.formatted_dataset(tdata) print "\n data formatted" neurons=2 # create network ffn = FeedForwardNetwork(neurons, 3, 2) #using one per class coding #using lda d=[] for line in fdata: d.append(line[0]) obj=lda.lda() obj.generateCorpusAndDic(d) obj.generateLDAModel(2) inputs=obj.getDocTopicProb() print "\n lda done" size=int (len(inputs) * .8) train_inputs=inputs[:size] train_outputs=[] counter=0 while(counter<size): train_outputs.append(fdata[counter][1]) counter+=1 test_inputs=inputs[size:] test_outputs=[]
ecoli = np.loadtxt('../9 Unsupervised/shortecoli.data') labels = ecoli[:,7:] data = ecoli[:,:7] data -= np.mean(data,axis=0) data /= data.max(axis=0) order = range(np.shape(data)[0]) np.random.shuffle(order) data = data[order] w0 = np.where(labels==1) w1 = np.where(labels==2) w2 = np.where(labels==3) import lda newData,w = lda.lda(data,labels,2) pl.plot(data[w0,0],data[w0,1],'ok') pl.plot(data[w1,0],data[w1,1],'^k') pl.plot(data[w2,0],data[w2,1],'vk') pl.axis([-1.5,1.8,-1.5,1.8]) pl.axis('off') pl.figure(2) pl.plot(newData[w0,0],newData[w0,1],'ok') pl.plot(newData[w1,0],newData[w1,1],'^k') pl.plot(newData[w2,0],newData[w2,1],'vk') pl.axis([-1.5,1.8,-1.5,1.8]) pl.axis('off') import pca x,y,evals,evecs = pca.pca(data,2)
def single_spi(**kwargs): savedir = kwargs.pop('savedir', None) numlist = kwargs.pop('numlist', [1.2e5, 1.3e5, 1.4e5, 1.5e5, 1.6e5]) mulist = kwargs.pop('mulist', [-0.15, -0.075, 0., 0.10, 0.18]) bestForce = kwargs.pop('bestForce', -1) s = kwargs.pop('params_s', 7.) g = kwargs.pop('params_g', 3.666) wIR = kwargs.pop('params_wIR', 47.) wGR = kwargs.pop('params_wGR', 47./1.175) direc = '111' mu0 = 'halfMott' aS = kwargs.pop('params_aS', 300.) Tdens = kwargs.pop('params_Tdens', 0.6) Tspi = kwargs.pop('params_Tspi', 0.6) extents = kwargs.pop('extents', 30.) spiextents = kwargs.pop('spiextents', 25.) sthextents = kwargs.pop('sthextents', 30.) entextents = kwargs.pop('entextents', 25.) finegrid = kwargs.pop('finegrid', False) sarr = np.array([[s], [s], [s]]) bands = scubic.bands3dvec(sarr, NBand=0) t0 = np.mean((bands[1] - bands[0])/12.) # tunneling 0 in recoils Tdens_Er = Tdens*t0 Tspi_Er = Tspi*t0 print "========================================" print " Single Spi" print " gr={:0.3f}, aS={:03d}".format(g, int(aS)) print " Tdens={:0.2f}, Tspi={:0.2f}".format(Tdens, Tspi) select = 'qmc' spis = [] for tag, muPlus in enumerate(mulist): numgoal = numlist[tag] print print "num = %.3g, muPlus = %.3f" % (numgoal, muPlus) pot = scubic.sc(allIR=s, allGR=g, allIRw=wIR, allGRw=wGR) lda0 = lda.lda(potential=pot, Temperature=Tdens_Er, a_s=aS, extents=extents, Natoms=numgoal, halfMottPlus=muPlus,\ # globalMu=mu0, halfMottPlus=muPlus,\ verbose=True, \ select=select,\ ignoreExtents=False, ignoreSlopeErrors=True, \ ignoreMuThreshold=True) spibulk, spi, sthbulk, sth, r111, n111, U111, t111, mut111, \ entrbulk, entr111,\ lda_num, density111, k111, k111htse_list = \ lda0.getBulkSpi(Tspi=Tspi, inhomog=True, spiextents=spiextents, sthextents=sthextents, entextents=entextents, do_k111=False) if finegrid: r111_fine, spi111_fine, n111_fine, k111_fine, mu111_Er = \ lda0.getSpiFineGrid(Tspi=Tspi, numpoints=320, inhomog=True, spiextents=spiextents, entextents=entextents) else: r111_fine, spi111_fine, n111_fine, k111_fine, mu111_Er = \ None, None, None, None, None spis.append({ 'gr': g, 'muPlus': muPlus, 'SpiBulk': spibulk, 'spi111': spi, 'SthBulk': sthbulk, 'sth111': sth, 'r111': r111, 'n111': n111, 'U111': U111, 'mut111': mut111, 't111': t111, 'entrbulk': entrbulk, 'entr111': entr111, 'k111': k111, 'k111htse_list': k111htse_list, 'Number': lda0.Number, 'ldanum': lda_num,\ # dens111 is the one obtained from QMC 'dens111': density111,\ 'Tdens': Tdens,\ 'Tspi': Tspi,\ 'aS': aS,\ 'savedir': savedir,\ 'r111_fine': r111_fine,\ 'spi111_fine': spi111_fine,\ 'n111_fine': n111_fine,\ 'k111_fine': k111_fine,\ 'mu111_Er': mu111_Er,\ 'v0111': lda0.pot.S0(lda0.X111, lda0.Y111, lda0.Z111)[0] }) # Figure to check inhomogeneity only run if temperature is high if Tspi > 0.85 and Tdens > 0.85: fig111, binresult, peak_dens, radius1e, peak_t, output = \ lda.CheckInhomog(lda0, closefig=True, n_ylim=(-0.1, 2.0)) figfname = savedir + 'Inhomog/{:0.3f}gr_{:03d}_{}_T{:0.4f}Er.png'.\ format(g, tag, select, Tspi) figfname = kwargs.pop('params_figfname', figfname) fig111.savefig(figfname, dpi=300) print print "Atom number = {:5.3g}".format(spis[0]['Number']) print "Entropy = {:0.2f}".format(spis[0]['entrbulk']) plot_spis( spis, bestForce=bestForce, \ # kwargs **kwargs) return spis[bestForce]
f = open('olhwdb.kmeans.pca.res', 'wt') for k in range(1, 11): for c in range(1, 4): train_x, w = pca.pca(train.x, k * 10) test_x = test.x.dot(w.T) cls = classifiers.KMeans(train_x, train.y, c) out = cls(test_x) acc = '%-6.2f'%((out == test.y).sum() / out.shape[0] * 100) print(acc) f.write(acc) f.write('\n') f.close() ''' f = open('olhwdb.kmeans.lda.res', 'wt') for k in range(1, 11): for c in range(1, 4): train_x, w = lda.lda(train.x, train.y, k * 10) test_x = test.x.dot(w.T) cls = classifiers.KMeans(train_x, train.y, c) out = cls(test_x) acc = '%-6.2f' % ((out == test.y).sum() / out.shape[0] * 100) print(acc) f.write(acc) f.write('\n') f.close()
def dmu_dr( rpoints, **kwargs ): s = kwargs.pop('params_s', 7.) g = kwargs.pop('params_g', 3.666) wIR = kwargs.pop('params_wIR', 47.) wGR = kwargs.pop('params_wGR', 47./1.175) extents = kwargs.pop('params_extents', 31.) direc = '111' mu0 = 'halfMott' muBrent = kwargs.pop('params_muBrent',(-0.2,0.3)) muBrentShift = kwargs.pop('params_muBrentShift', 0.) aS = kwargs.pop('params_aS', 300.) muPlus = kwargs.pop('params_muPlus', 0.00 ) Natoms = kwargs.pop('params_Natoms', None) select = 'nlce' #print #print "muPlus = ", muPlus pot = scubic.sc(allIR=s, allGR=g, allIRw=wIR, allGRw=wGR) Tlist = kwargs.pop('Tlist', [0.036]) outdict = {} for TT, Tval in enumerate(Tlist): print TT, sys.stdout.flush() logger.warning('working on Tval = {:0.4f}'.format(Tval) ) if Natoms is None: lda0 = lda.lda(potential = pot, Temperature=Tval, a_s=aS, \ override_npoints = 240,\ extents=extents, \ globalMu=mu0, halfMottPlus=muPlus,\ verbose=False, \ select = select,\ ignoreExtents=False, ignoreSlopeErrors=True, \ ignoreMuThreshold=True) else: lda0 = lda.lda(potential = pot, Temperature=Tval, a_s=aS, \ override_npoints = 240,\ extents=extents, \ Natoms = Natoms,\ muBrent=muBrent, muBrentShift=muBrentShift,\ verbose=False, \ select = select,\ ignoreExtents=False, ignoreSlopeErrors=True, \ ignoreMuThreshold=True) r111, n111 = lda0.getDensity( lda0.globalMu, lda0.T) localMu_t = lda0.get_localMu_t( lda0.globalMu ) localMu_t_f = extrap1d( interp1d( r111, localMu_t ) ) dmu_dr = deriv( rpoints, localMu_t_f ) dmu_dr111 = deriv( r111, localMu_t_f ) t0 = lda0.tunneling_111.min() # Need to also get the value of T/t0 and the overall S/N _spibulk, _spi, _r111, _n111, _U111, _t111, _entrbulk, _entr111,\ _lda_num, _density111, _k111, _k111htse_list = \ lda0.getBulkSpi(Tspi=Tval/t0, inhomog=True, \ spiextents=extents, entextents=extents, do_k111=False) Tdict = { 'r111':r111 ,\ 'n111':n111 ,\ 'Ut111':lda0.onsite_111 / lda0.tunneling_111 ,\ 'localMu_t':localMu_t ,\ 'dmu_dr': dmu_dr ,\ 'dmu_dr111': dmu_dr111 ,\ 'num':lda0.Number,\ 'T/t0': Tval/t0 ,\ 'S/N':_entrbulk ,\ } outdict[ Tval ] = Tdict return outdict
import numpy as np import os import matplotlib.pyplot as plt from util import imread, show_eigenface, show_reconstruction, performance from pca import pca from lda import lda if __name__ == '__main__': filepath = os.path.join('Yale_Face_Database', 'Training') H, W = 231, 195 X, y = imread(filepath, H, W) eigenvalues_pca, eigenvectors_pca, X_mean = pca(X, num_dim=31) X_pca = eigenvectors_pca.T @ (X - X_mean) eigenvalues_lda, eigenvectors_lda = lda(X_pca, y) # Transform matrix U = eigenvectors_pca @ eigenvectors_lda print('U shape: {}'.format(U.shape)) # show top 25 eigenface show_eigenface(U, 25, H, W) # reduce dim (projection) Z = U.T @ X # recover X_recover = U @ Z + X_mean show_reconstruction(X, X_recover, 10, H, W) # accuracy
def optimal_FixedRadius( **kwargs ) : """ This function takes fixed values of s0, wL, wC and finds the value of green that would be required to make a sample with a radius that is a fixed fraction of the lattice beam waist. The value of the fraction is hardcoded in the function """ fraction = 0.32 s0 = kwargs.get('s0', 7. ) wL = kwargs.get('wL', 47. ) wC = kwargs.get('wC', 40. ) alpha = wL/wC a_s = kwargs.get('a_s', 650. ) T_Er= kwargs.get('T_Er', 0.2 ) extents = kwargs.get('extents', 40.) def merit( g0 ) : try: pot = scubic.sc(allIR = s0, \ allGR = g0, \ allIRw = wL, \ allGRw = wC ) # The lda within the optimization loop is told to ignore errors # of the density distribution going beyond the extents. # This will be checked after the optmization is done. lda0 = lda.lda(potential = pot, Temperature = T_Er,\ a_s=a_s, globalMu='halfMott', extents=extents,\ ignoreExtents=True ) return (fraction*wL - lda0.getRadius())**2. #return fraction - lda0.getRadius()/wL except Exception as e : negslope = 'Bottom of the band has a negative slope' posslope = 'Radial density profile along 111 has a positive slope' threshol = 'Chemical potential exceeds the evaporation threshold' thresh100= 'Chemical potential exceeds the bottom of the band along 100' if negslope in e.message: return 1e6 # this is caused by too much green # return large value to asign penalty elif posslope in e.message: return 1e6 # this is caused by too much green # as the chemical potential comes to close # to the threshold and atoms accumulate on # the beams # return large value to asign penalty elif threshol in e.message: return 1e6 # this is caused by too much green # the chemical potential is above the evap th. # return large value to asign penalty elif thresh100 in e.message: return 1e6 # this is caused by too much green # the chemical potential is above the bottom of # the band along 100 # return large value to asign penalty elif 'vanish' in e.message : # this is is caused by insufficient extents print "extents = %.1f"% extents raise else: raise #print "Fail at g0=%.2f"% g0 #raise g0bounds = (1., min(s0, (4.*s0-2.*np.sqrt(s0))/(4.*(alpha**2.)) ) ) #(x, res) = brentq( merit, g0bounds[0], g0bounds[1] ) #gOptimal = x res = minimize_scalar( merit, bounds=g0bounds, tol=1e-4, \ method='bounded' ) gOptimal = res.x #print "gOpt=%.2f"%gOptimal potOpt = scubic.sc( allIR=s0, allGR=gOptimal, allIRw=wL, allGRw=wC ) ldaOpt = lda.lda( potential = potOpt, Temperature=T_Er, \ a_s=a_s, globalMu='halfMott', extents=extents) return [ gOptimal, ldaOpt.EtaEvap, ldaOpt.Number, \ ldaOpt.Entropy/ldaOpt.Number, ldaOpt.getRadius(), ldaOpt.getRadius()/wL ]
def optimal( **kwargs ) : """ This function takes fixed values of s0, wL, wC and optimizes the evaporation figure of merit, eta_F, by using the green compensation as a variable. """ s0 = kwargs.get('s0', 7. ) wL = kwargs.get('wL', 47. ) wC = kwargs.get('wC', 40. ) alpha = wL/wC a_s = kwargs.get('a_s', 650. ) T_Er= kwargs.get('T_Er', 0.2 ) extents = kwargs.get('extents', 40.) if 'Number' in kwargs.keys(): N0 = kwargs['Number'] def Npenalty( Num ): p = 4. if Num > N0: return np.exp( (Num - N0)/1e5 )**p else: return 1. def penalty(x,p): """ This function is used to penalyze EtaF < 1 , which amounts to spilling out along the lattice beams. """ if x < 1.: return np.exp(-(x-1.))**p else: return x #return np.piecewise(x, [x < 1., x >= 1.], \ # [lambda x: , lambda x: x]) def merit( g0 ) : try: pot = scubic.sc(allIR = s0, \ allGR = g0, \ allIRw = wL, \ allGRw = wC ) # The lda within the optimization loop is told to ignore errors # of the density distribution going beyond the extents. # This will be checked after the optmization is done. lda0 = lda.lda(potential = pot, Temperature = T_Er,\ a_s=a_s, globalMu='halfMott', extents=extents,\ ignoreExtents=True, select='htse' ) etaFstar = penalty( lda0.etaF_star , 5 ) if 'Number' in kwargs.keys(): return etaFstar * Npenalty( lda0.Number) else: return etaFstar except Exception as e : negslope = 'Bottom of the band has a negative slope' posslope = 'Radial density profile along 111 has a positive slope' threshol = 'Chemical potential exceeds the evaporation threshold' thresh100= 'Chemical potential exceeds the bottom of the band along 100' if negslope in e.message: return 1e4 # this is caused by too much green # return large value to asign penalty elif posslope in e.message: return 1e4 # this is caused by too much green # as the chemical potential comes to close # to the threshold and atoms accumulate on # the beams # return large value to asign penalty elif threshol in e.message: return 1e4 # this is caused by too much green # the chemical potential is above the evap th. # return large value to asign penalty elif thresh100 in e.message: return 1e4 # this is caused by too much green # the chemical potential is above the bottom of # the band along 100 # return large value to asign penalty elif 'vanish' in e.message : # this is is caused by insufficient extents print "extents = %.1f"% extents raise else: raise #print "Fail at g0=%.2f"% g0 #raise g0bounds = (0., min(s0,s0/(alpha**2.))) res = minimize_scalar( merit, bounds=g0bounds, tol=4e-2, \ method='bounded' ) gOptimal = res.x #print "gOpt=%.2f"%gOptimal potOpt = scubic.sc( allIR=s0, allGR=gOptimal, allIRw=wL, allGRw=wC ) ldaOpt = lda.lda( potential = potOpt, Temperature=T_Er, \ a_s=a_s, globalMu='halfMott', extents=extents) return [ gOptimal, ldaOpt.EtaEvap, ldaOpt.Number, \ ldaOpt.Entropy/ldaOpt.Number, ldaOpt.getRadius(), ldaOpt.getRadius()/wL, ldaOpt.DeltaEvap ]
# standardize # X_cancer = standardize(X_cancer) # normalize X_cancer = normalize(X_cancer) # y_cancer = y_cancer.reshape(-1,1) #%% # repeated kfold test # wine dataset import warnings warnings.filterwarnings("ignore") # classifier options ld = lda() clf = LinearDiscriminantAnalysis() # ld = LogReg(learning_rate=0.001) # clf = LogisticRegression(solver='liblinear',C=1000) score = [] scorec = [] for i in range(100): for train_index, test_index in kfold_index(5, X_wine): # print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = X_wine[train_index], X_wine[test_index] y_train, y_test = y_wine[train_index], y_wine[test_index] # project ld.fit(X_train, y_train) y_pred = ld.predict(X_test) score.append(evaluate_acc(y_test, y_pred))
def resultBtn_click(): images = comboImages.get() videos = comboVideos.get() links = comboLinks.get() if (comboDay.get()=="No"): day = 0 elif comboDay.get()=="Yes": day = 1 # thmz = text.get("1.0",END) file_content = str(textContent.get("1.0",'end-1c')) content = [word.strip(string.punctuation) for word in file_content.split()] while("" in content) : content.remove("") file_title = str(textTitle.get("1.0",'end-1c')) title = [word.strip(string.punctuation) for word in file_title.split()] while("" in title) : title.remove("") numLinks = 5 #Number of Links numVideos = 0 #Number of Videos numImages = 2 #Number of Images countwordsT = countWords(title) #Number of Words Title countwordsC = countWords(content) #Number of Words Content ## countunique = countUnique(content) #Number of Unique Words nonstopCount = nonStopCount(content) # Number of non-Stop Words #print(nonstopCount) rateNonStopWords = 0.999999995192 # Rate of non-Stop Words # rateUniqueNonStopWords = nonStopCount(uniqueWords(content))/nonstopCount # Rate of Unique non-Stop Words average_token_length = averageWordLength(content) # Average Words Length global_subjectivity = TextBlob(' '.join(content)).subjectivity title_subjectivity = TextBlob(' '.join(title)).subjectivity global_sentiment_polarity = TextBlob(' '.join(content)).polarity title_sentiment_polarity = TextBlob(' '.join(title)).polarity LDA = lda('content.txt') print('n_tokens_title =',countwordsT) print('n_tokens_content =',countwordsC) print('n_unique_tokens =',countunique) print('n_non_stop_words =',rateNonStopWords) print('n_non_stop_unique_tokens =',rateUniqueNonStopWords) print('num_href =',numLinks) print('num_imgs =',numImages) print('num_videos =',numVideos) print('average_token_length =',average_token_length) num_keywords=num_keyword(" ".join(title)) print('num_keywords =', num_keywords) is_workday = 1 is_weekend = 0 print('is_workday=', is_workday) print('is_weekend=', is_weekend) print('LDA00 =',LDA[0][1]) print('LDA01 =',LDA[1][1]) print('LDA02 =',LDA[2][1]) print('LDA03 =',LDA[3][1]) print('LDA04 =',LDA[4][1]) print('global_subjectivity =',global_subjectivity) print('global_sentiment_polarity=', global_sentiment_polarity) avg_positive_polarity = 0.35 min_positive_polarity = 0.1 max_positive_polarity = 0.75 abs_title_sub= abs_title_subjectivity(title_subjectivity) abs_title_sentiment_polarity = abs(title_sentiment_polarity) print('avg_positive_polarity=',avg_positive_polarity) print('min_positive_polarity=',min_positive_polarity) print('max_positive_polarity=',max_positive_polarity) print('title_subjectivity=',title_subjectivity) print('title_sentiment_polarity=',title_sentiment_polarity) print('abs_title_subjectivity=',abs_title_sub) print('abs_title_sentiment_polarity=',abs_title_sentiment_polarity) result = str(countwordsT) + ' ' + str(countwordsC) + ' ' + str(countunique) + ' ' + str(rateNonStopWords) + ' ' + str(rateUniqueNonStopWords) + ' ' + str(numLinks) + ' ' + str(numVideos) + ' ' + str(numImages) + ' ' + str(average_token_length) + ' ' + str(num_keywords) + ' ' + str(is_workday) + ' ' + str(is_weekend)+ ' ' + str(LDA[0][1]) + ' ' + str(LDA[1][1]) + ' ' + str(LDA[2][1]) + ' ' + str(LDA[3][1]) + ' ' + str(LDA[4][1]) + ' ' + str(global_subjectivity) + ' ' + str(global_sentiment_polarity) + ' ' + str(avg_positive_polarity) + ' ' + str(min_positive_polarity) + ' ' + str(max_positive_polarity) + ' ' + str(title_subjectivity) +' ' + str(title_sentiment_polarity) + ' ' + str(abs_title_sub) + ' ' + str(abs_title_sentiment_polarity) resultText.insert(END,result)
imax = np.concatenate((iris.max(axis=0)*np.ones((1,5)),iris.min(axis=0)*np.ones((1,5))),axis=0).max(axis=0) iris[:,:4] = iris[:,:4]/imax[:4] labels = iris[:,4:] iris = iris[:,:4] order = range(np.shape(iris)[0]) np.random.shuffle(order) iris = iris[order,:] labels = labels[order,0] w0 = np.where(labels==0) w1 = np.where(labels==1) w2 = np.where(labels==2) import lda newData,w = lda.lda(iris,labels,2) print np.shape(newData) pl.plot(iris[w0,0],iris[w0,1],'ok') pl.plot(iris[w1,0],iris[w1,1],'^k') pl.plot(iris[w2,0],iris[w2,1],'vk') pl.axis([-1.5,1.8,-1.5,1.8]) pl.axis('off') pl.figure(2) pl.plot(newData[w0,0],newData[w0,1],'ok') pl.plot(newData[w1,0],newData[w1,1],'^k') pl.plot(newData[w2,0],newData[w2,1],'vk') pl.axis([-1.5,1.8,-1.5,1.8]) pl.axis('off') import pca x,y,evals,evecs = pca.pca(iris,2)
import itertools import random import sklearn import time from lda import lda from sklearn import svm from sklearn.multiclass import OneVsRestClassifier from sklearn.svm import LinearSVC nr_topics = 20 alpha = 50/float(nr_topics) beta = 0.1 nr_runs = 1 top_words = 50 top_topics = 5 lda = lda(alpha, beta, nr_topics) # Do all 5 folds for i in range(0, 5): # Set fold number lda.reset_to_next_fold(i) #Get info about documents dataset = lda.dataset #Get word counts per document word_counts = lda.doc_word # THESE ARE THE SETS NEEDED FOR THIS FOLD print "Building test and training set..." training_set = lda.dataset training_labels = lda.labels_dataset test_set = lda.testset
def optimal(**kwargs): """ This function takes fixed values of s0, wL, wC and optimizes the evaporation figure of merit, eta_F, by using the green compensation as a variable. """ s0 = kwargs.get('s0', 7.) wL = kwargs.get('wL', 47.) wC = kwargs.get('wC', 40.) alpha = wL / wC a_s = kwargs.get('a_s', 650.) T_Er = kwargs.get('T_Er', 0.2) extents = kwargs.get('extents', 40.) if 'Number' in kwargs.keys(): N0 = kwargs['Number'] def Npenalty(Num): p = 4. if Num > N0: return np.exp((Num - N0) / 1e5)**p else: return 1. def penalty(x, p): """ This function is used to penalyze EtaF < 1 , which amounts to spilling out along the lattice beams. """ if x < 1.: return np.exp(-(x - 1.))**p else: return x #return np.piecewise(x, [x < 1., x >= 1.], \ # [lambda x: , lambda x: x]) def merit(g0): try: pot = scubic.sc(allIR = s0, \ allGR = g0, \ allIRw = wL, \ allGRw = wC ) # The lda within the optimization loop is told to ignore errors # of the density distribution going beyond the extents. # This will be checked after the optmization is done. lda0 = lda.lda(potential = pot, Temperature = T_Er,\ a_s=a_s, globalMu='halfMott', extents=extents,\ ignoreExtents=True, select='htse' ) etaFstar = penalty(lda0.etaF_star, 5) if 'Number' in kwargs.keys(): return etaFstar * Npenalty(lda0.Number) else: return etaFstar except Exception as e: negslope = 'Bottom of the band has a negative slope' posslope = 'Radial density profile along 111 has a positive slope' threshol = 'Chemical potential exceeds the evaporation threshold' thresh100 = 'Chemical potential exceeds the bottom of the band along 100' if negslope in e.message: return 1e4 # this is caused by too much green # return large value to asign penalty elif posslope in e.message: return 1e4 # this is caused by too much green # as the chemical potential comes to close # to the threshold and atoms accumulate on # the beams # return large value to asign penalty elif threshol in e.message: return 1e4 # this is caused by too much green # the chemical potential is above the evap th. # return large value to asign penalty elif thresh100 in e.message: return 1e4 # this is caused by too much green # the chemical potential is above the bottom of # the band along 100 # return large value to asign penalty elif 'vanish' in e.message: # this is is caused by insufficient extents print "extents = %.1f" % extents raise else: raise #print "Fail at g0=%.2f"% g0 #raise g0bounds = (0., min(s0, s0 / (alpha**2.))) res = minimize_scalar( merit, bounds=g0bounds, tol=4e-2, \ method='bounded' ) gOptimal = res.x #print "gOpt=%.2f"%gOptimal potOpt = scubic.sc(allIR=s0, allGR=gOptimal, allIRw=wL, allGRw=wC) ldaOpt = lda.lda( potential = potOpt, Temperature=T_Er, \ a_s=a_s, globalMu='halfMott', extents=extents) return [ gOptimal, ldaOpt.EtaEvap, ldaOpt.Number, \ ldaOpt.Entropy/ldaOpt.Number, ldaOpt.getRadius(), ldaOpt.getRadius()/wL, ldaOpt.DeltaEvap ]
#!/usr/bin/env python3 # -*- coding: utf-8 -*- from lda import lda # model intialization ldaModel = lda(K = 10) # load training corpus ldaModel.preprocessing(corpus = './data/corpus.txt') # variational EM algorithm ldaModel.EM() # topic words topicWords = ldaModel.topicWords(maxTopicWordsNum = 10) # topic inference given documents inference = ldaModel.topicInference(documents = './data/held_out_sentences.txt')
def Spi_vs_N( aS=200., Spi_inhomog=False, Tspi=0.9, \ savedir='dataplots/VaryN_Spi', \ mulist =[-0.15, -0.075, 0., 0.10, 0.18], bestForce = -1 , spiextents = 25., entextents = 25., finegrid = False, ): s = 7. g = 3.666 wIR = 47. wGR = 47./1.175 T = 0.035 extents = 30. direc = '111' mu0 = 'halfMott' spis = [] select = 'nlce' for tag, muPlus in enumerate(mulist): print print "muPlus = ", muPlus pot = scubic.sc(allIR=s, allGR=g, allIRw=wIR, allGRw=wGR) lda0 = lda.lda(potential = pot, Temperature=T, a_s=aS, \ extents=extents, \ globalMu=mu0, halfMottPlus=muPlus,\ verbose=True, \ select = select,\ ignoreExtents=False, ignoreSlopeErrors=True, \ ignoreMuThreshold=True) spibulk, spi, r111, n111, U111, t111, entrbulk, entr111,\ lda_num, density111 = \ lda0.getBulkSpi(Tspi=Tspi, inhomog=Spi_inhomog, \ spiextents=spiextents, entextents=entextents) if finegrid: r111_fine, spi111_fine, n111_fine = lda0.getSpiFineGrid( Tspi=Tspi, numpoints=320,\ inhomog=Spi_inhomog, spiextents=spiextents, entextents=entextents ) else: r111_fine, spi111_fine, n111_fine = None, None, None spis.append( {'SpiBulk':spibulk,\ 'spi111':spi,\ 'r111':r111,\ 'n111':n111,\ 'U111':U111,\ 't111':t111,\ 'entrbulk':entrbulk,\ 'entr111':entr111,\ 'Number':lda0.Number,\ 'ldanum':lda_num,\ 'dens111':density111,\ 'Tn':T,\ 'Tspi':Tspi,\ 'aS':aS,\ 'savedir':savedir,\ 'r111_fine':r111_fine,\ 'spi111_fine':spi111_fine,\ 'n111_fine':n111_fine,\ } ) # Figure to check inhomogeneity fig111, binresult, peak_dens, radius1e, peak_t, output = \ lda.CheckInhomog( lda0, closefig = True, n_ylim=(-0.1,2.0) ) ; figfname = savedir + 'Inhomog/{:0.3f}gr_{:03d}_{}_T{:0.4f}Er.png'.\ format(g,tag,select,T) fig111.savefig(figfname, dpi=300) plot_spis( spis, inhomog=Spi_inhomog, bestForce=bestForce) return spis[bestForce]
import sys import numpy as np from matplotlib import pyplot as plt import lda if __name__ == '__main__': filename = sys.argv[1] pos_mat, neg_mat = lda.read_data(filename) lda.plot_data(pos_mat, neg_mat) print pos_mat print neg_mat w, project_points = lda.lda(neg_mat, pos_mat) if w[1] < 0: w = [-x for x in w] print 'w = ', w print 'project_points = ', project_points # draw vector w plt.plot(*zip((0, 0), w), color='blue') plt.axis([0, 1, 0, 1]) plt.show()
def optimal_FixedRadius(**kwargs): """ This function takes fixed values of s0, wL, wC and finds the value of green that would be required to make a sample with a radius that is a fixed fraction of the lattice beam waist. The value of the fraction is hardcoded in the function """ fraction = 0.32 s0 = kwargs.get('s0', 7.) wL = kwargs.get('wL', 47.) wC = kwargs.get('wC', 40.) alpha = wL / wC a_s = kwargs.get('a_s', 650.) T_Er = kwargs.get('T_Er', 0.2) extents = kwargs.get('extents', 40.) def merit(g0): try: pot = scubic.sc(allIR = s0, \ allGR = g0, \ allIRw = wL, \ allGRw = wC ) # The lda within the optimization loop is told to ignore errors # of the density distribution going beyond the extents. # This will be checked after the optmization is done. lda0 = lda.lda(potential = pot, Temperature = T_Er,\ a_s=a_s, globalMu='halfMott', extents=extents,\ ignoreExtents=True ) return (fraction * wL - lda0.getRadius())**2. #return fraction - lda0.getRadius()/wL except Exception as e: negslope = 'Bottom of the band has a negative slope' posslope = 'Radial density profile along 111 has a positive slope' threshol = 'Chemical potential exceeds the evaporation threshold' thresh100 = 'Chemical potential exceeds the bottom of the band along 100' if negslope in e.message: return 1e6 # this is caused by too much green # return large value to asign penalty elif posslope in e.message: return 1e6 # this is caused by too much green # as the chemical potential comes to close # to the threshold and atoms accumulate on # the beams # return large value to asign penalty elif threshol in e.message: return 1e6 # this is caused by too much green # the chemical potential is above the evap th. # return large value to asign penalty elif thresh100 in e.message: return 1e6 # this is caused by too much green # the chemical potential is above the bottom of # the band along 100 # return large value to asign penalty elif 'vanish' in e.message: # this is is caused by insufficient extents print "extents = %.1f" % extents raise else: raise #print "Fail at g0=%.2f"% g0 #raise g0bounds = (1., min(s0, (4. * s0 - 2. * np.sqrt(s0)) / (4. * (alpha**2.)))) #(x, res) = brentq( merit, g0bounds[0], g0bounds[1] ) #gOptimal = x res = minimize_scalar( merit, bounds=g0bounds, tol=1e-4, \ method='bounded' ) gOptimal = res.x #print "gOpt=%.2f"%gOptimal potOpt = scubic.sc(allIR=s0, allGR=gOptimal, allIRw=wL, allGRw=wC) ldaOpt = lda.lda( potential = potOpt, Temperature=T_Er, \ a_s=a_s, globalMu='halfMott', extents=extents) return [ gOptimal, ldaOpt.EtaEvap, ldaOpt.Number, \ ldaOpt.Entropy/ldaOpt.Number, ldaOpt.getRadius(), ldaOpt.getRadius()/wL ]