def entropy(num_list, base=2): assert base > 0 ent = 0.0 for p in num_list: if p > 0.0: ent += -p * logn(base, p) return ent
def entropy(num_list, base=2): assert base > 0 ent = 0.0 for p in num_list: if p>0.0: ent += -p*logn(base, p) return ent
def TF_IDF_Replies(screen_name): # pickle.dump(users, open("repliesMentioning_"+str(screen_name)+".p", "wb")) users = pickle.load(open("repliesMentioning_" + str(screen_name) + ".p", "rb")) # TF(t) = (Number of times term t appears in a document) / (Total number of terms in the document). # IDF(t) = log_e(Total number of documents / Number of documents with term t in it). idf = {} for u in users: print u tweets = users[u] tf = {} totalWords = 0 for t in tweets: print t t = t.lower() words = t.split() totalWords += len(words) for w in words: idf.setdefault(w, {}) idf[w][u] = 0 tf.setdefault(w, 0) tf[w] += 1 tfFinal = {} idfFinal = {} for w in tf: value = tf[w] value = float(float(value) / float(totalWords)) tfFinal[w] = value for w in idf: totalNumDocs = len(users) numDocWithW = len(idf[w]) # print numDocWithW w1 = float(float(totalNumDocs) / float(numDocWithW)) # np.log(x) value = logn(e, w1) idfFinal[w] = value tfIDF = {} for w in idfFinal: if w in tfFinal: wTF = tfFinal[w] else: wTF = 1 wIDF = idfFinal[w] final = wTF * wIDF tfIDF[w] = final # for w in tfIDF: # print w+","+str(tfIDF[w]) sorted_tfIDF = sorted(tfIDF.items(), key=operator.itemgetter(1), reverse=True) t = 0 for w, v in sorted_tfIDF: if t < 15: print w t += 1 print
def scale_dof_obj(self, scale, dof): base = np.exp( 1 ) # fitted params are invariant to this logarithm base (i.e.10, or e) nfalse = (len(self.alteqnull) - np.sum(self.alteqnull)) imax = int(np.ceil(self.qmax * nfalse)) # of only non zer dof component p = sp.stats.chi2.sf(self.lrtsort[0:imax] / scale, dof) logp = logn(base, p) r = logn(base, self.qnulllrtsort[0:imax]) - logp if self.abserr: err = np.absolute(r).sum() else: # mean square error err = (r * r).mean() return err, imax
def variable_size(self): """ Obtener tamaño necesario para codificar una variable del fenotipo. :return: Tamaño necesario. """ arity = Configuration.Configuration.alphabet.size() a = 1 + (self.max_val - self.min_val) * pow(10, self.decimals) size = int(logn(arity, a)) return size
def __init__(self, size=0): #size: the number of all items #bit_size: the length of the bitarray #hashnum: defines how many hash function used self.size = size # -------------------------------------- # bitarray size should use this fomular # bit_size = -size*lnp/(ln2).^2 # where p == 0.01 false positive percentage #-------------------------------------- self.bit_size = int(-self.size * logn(e, 0.01) / (logn(e, 2)**2)) #print self.bit_size self.bitarray = bitarray(self.bit_size) #print self.bitarray # -------------------------------------- # hash function number should use this fomular # hash function number = ln2* bitarray size/item size #-------------------------------------- self.hashnum = int(logn(e, 2) * self.bit_size / self.size)
def calLikelihood(self): sumlikehood = 0.0 for n in range(0, len(self.traindata)): likehood = 0.0 for k in range(0, self.Mnum): likehood += self.piList[k] * self.guassList[k].N( self.traindata[n]) sumlikehood += logn(e, likehood) self.likehoods = sumlikehood if sumlikehood == 0: self.likehoods = 0.00001
def fit2(fix_ori, xaxis): fix=np.zeros(len(fix_ori)) l1 = len(fix_ori) l2 = 3*l1/4 l3 = l1/4 for i in range(len(fix_ori)): fix[i]=fix_ori[i]/fix_ori[0] v0= [1.4,fix[0], 1.4,fix[0]] # print 'Initial value: used ' , '[%7.3f %3.1f]' % (v0[0], v0[1]) # v0= [(logn(e,fix[5])-logn(e,fix[l3]))/(xaxis[l3]-xaxis[5]),fix[0]] # print 'Initial value: computed 5-', l3 , '[%7.3f %3.1f]' % (v0[0], v0[1]) decay = (logn(e,fix[30])-logn(e,fix[l2]))/(xaxis[l2]-xaxis[30]) v0= [decay*2.0,fix[0]/2.0,decay/2.0,fix[0]/2.0] print 'Initial value: computed 30-',l2 , '[%7.3f %3.1f ][ %7.3f %3.1f]' % (v0[0], v0[1], v0[2], v0[3]) vv = fmin(double_expdecay, v0, args=(fix, xaxis),maxiter=1000, maxfun=1000,full_output=True,disp=True) v=vv[0] chi_mono=vv[1] fitparam = [v[0],v[1],v[2],v[3],vv[1]] print '[%7.3f %3.1f ][%7.3f %3.1f]' % (v[0], v[1], v[2], v[3]), ' chi_bi %6.2e' % chi_mono return fitparam
def HPrime(arr): bi = [] lnOfbi = [] bi_x_lnOfbi = [] speciesTotal = sum(arr, 0) for i in range(len(arr)): bi.append(arr[i] / speciesTotal) for i in range(len(bi)): lnOfbi.append(logn(e, bi[i])) for i in range(len(bi)): bi_x_lnOfbi.append(bi[i] * lnOfbi[i]) return [sum(bi_x_lnOfbi), speciesTotal / len(arr), len(arr)]
def DU1por(): ''' In [1]: from sympy import * In [2]: import numpy as np In [3]: x = Symbol('x') In [4]: y = x**2 + 1 In [5]: yprime = y.diff(x) In [6]: yprime Out[6]: 2⋅x In [7]: f = lambdify(x, yprime, 'numpy') In [8]: f(np.ones(5)) Out[8]: [ 2. 2. 2. 2. 2.] ''' from numpy.lib.scimath import logn from math import log from math import e # xy'-y2lnx+y=0 # y'=(y2lnx-y)/x def f(x,y): return ((y*y*logn(e, x)-y)/x) def fmath(x,y): return ((y*y*log(x,e)-y)/x) y = [1,1,1,1,1,1,1,1,1,1] x = [1,1,1,1,1,1,1,1,1,1] y[0] = 1 x[0] = 1 h = 0.1 for i in range (10): x[i] = 1+i*h y[i] = y[i-1]+( h * f(x[i],y[i])) print(y[i]-(1/(1+logn(e, x[i])))) print('-----------------------------------') y2 = [1,1,1,1,1,1,1,1,1,1] x2 = [1,1,1,1,1,1,1,1,1,1] for i in range (10): x2[i] = 1+i*h y2[i] = y2[i-1]+( h * f(x2[i],y2[i])) print(y2[i]-(1/(1+log(x2[i],e))))
def DU1por3(): from numpy.lib.scimath import logn from math import log from math import e # xy'-y2lnx+y=0 # y'=(y2lnx-y)/x def f(x,y): return ((y*y*logn(e, x)-y)/x) y = [1,1,1,1,1,1,1,1,1,1] x = [1,1,1,1,1,1,1,1,1,1] y[0] = 1 x[0] = 1 h = 0.1 for i in range (3): x[i] = 1+i*h k1 = h*f(x[i-1],y[i-1]) k2 = h*f(x[i-1]+(h/2), y[i-1]+(1/2)*k1) k3 = h*f(x[i-1]+(h/2), y[i-1]+(1/2)*k2) k4 = h*f(x[i-1]+h, y[i-1]+k3) y[i] = y[i-1]+(1/6)*(k1+2*k2+2*k3+k4) print(y[i]-(1/(1+logn(e, x[i])))) i = 3 while i < 10: # y[i] - y[i-1])/h = A # y[i] = A*h+y[i-1] x[i] = 1+i*h y[i] = ((1/24)*(55*f(x[i-1],y[i-1])-59*f(x[i-2],y[i-2])+37*f(x[i-3],y[i-3])-9*f(x[i-4],y[i-4])))*h+y[i-1] print(y[i]-(1/(1+logn(e, x[i])))) i += 1
def evaluate_ppl(y_t_batch, y_h_batch): y_t_batch = np.clip(y_t_batch, 1e-20, 1 - 1e-20) total_loss = [] for n in range(len(y_t_batch)): H = 0. for t in range(len(y_t_batch[n])): if 1 in y_h_batch[n][t]: index = np.where(y_h_batch[n][t] == 1)[0][0] H += logn(2, y_t_batch[n][t][index]) #H += np.sum(np.multiply(y_h_batch[n][t], logn(2,y_t_batch[n][t]))) else: loss = (H / t) break total_loss.append(loss) loss = -sum(total_loss) / len(total_loss) return loss
def query_tfidf(doc_content, doc_count): tf = {} tfidf = {} list_of_term1 = [] for term1 in doc_content: tf[term1] = tf.get(term1, 0) + 1 list_of_term1.append(term1) c1 = list(set(list_of_term1)) for token, freq in tf.iteritems(): for term1 in doc_content: if token == term1: df = get_df_inverted(token) if df != 0: idf = logn(10, float(doc_count) / df) tfidf[token] = freq * idf tfidf = normalize(tfidf) query = {"tokens": c1, "tfidf": tfidf} return query
def DU1por2(): ''' In [1]: from sympy import * In [2]: import numpy as np In [3]: x = Symbol('x') In [4]: y = x**2 + 1 In [5]: yprime = y.diff(x) In [6]: yprime Out[6]: 2⋅x In [7]: f = lambdify(x, yprime, 'numpy') In [8]: f(np.ones(5)) Out[8]: [ 2. 2. 2. 2. 2.] ''' from numpy.lib.scimath import logn from math import log from math import e # xy'-y2lnx+y=0 # y'=(y2lnx-y)/x def f(x,y): return ((y*y*logn(e, x)-y)/x) y = [1,1,1,1,1,1,1,1,1,1] x = [1,1,1,1,1,1,1,1,1,1] y[0] = 1 x[0] = 1 h = 0.1 for i in range (10): x[i] = 1+i*h k1 = h*f(x[i-1],y[i-1]) k2 = h*f(x[i-1]+(h/2), y[i-1]+(1/2)*k1) k3 = h*f(x[i-1]+(h/2), y[i-1]+(1/2)*k2) k4 = h*f(x[i-1]+h, y[i-1]+k3) y[i] = y[i-1]+(1/6)*(k1+2*k2+2*k3+k4) print(y[i]-(1/(1+logn(e, x[i]))))
def gen_time(): return time - ave * logn(e, 1-random.random())
def documents_index(): vector_space = [] tokens = {} docFileNames = getFileNames("f:/ir/data") docs_dict = {} get_documents = [] docFileName = docFileNames[0] docFile_list = gen_documents(docFileName) for pagedict in docFile_list: lines = '\n'.join((pagedict['TITLE'], pagedict['TEXT'])) pageseq = int(pagedict['DOCSEQ']) text = lines.decode('utf-8') text = text.split() word_list = [] for i in text: if not i in arabicStopWords: word_list.append(i) text = ' '.join(word_list) word_list1 = process_text( text ) # remove diacritics and punctcutions, stopwords, and tokenize text terms = [] for wordx in word_list1: stemAr = isri_light(wordx) #using light stemmer #print "stemAr=",stemAr #stemAr = isri_heavy(wordx) #using heavy stemmer #print "stemAr=",stemAr terms.append(wordx) docs_dict[pageseq] = terms for doc_id, doc_content in docs_dict.items(): tf = {} list_of_term1 = [] for term1 in doc_content: tf[term1] = tf.get(term1, 0) + 1 list_of_term1.append(term1) c1 = list(set(list_of_term1)) for token, freq in tf.iteritems(): tokens.setdefault(token, []).append((doc_id, freq)) document = {"doc_id": doc_id, "tokens": c1, "tfidf": {}} vector_space.append(document) doc_count = len(vector_space) dinv = {} for token, docs in tokens.iteritems(): dinv[token] = [len(docs), docs] l_docs = len(docs) idf = logn(10, float(doc_count) / float(l_docs)) for doc_id, tf in docs: tfidf = tf * idf for i in vector_space: if (i['doc_id'] == doc_id) and (token in i['tokens']): i['tfidf'][token] = tfidf with open('f:/ir/inverted_index.json', 'w') as f: json.dump(dinv, f) f.close() '''with open('f:/inverted_index.json') as f: inv= json.load(f) f.close()''' for doc in vector_space: doc["tfidf"] = normalize(doc["tfidf"]) return vector_space
def exp(ave): return - ave * logn(e, 1-random.random())
#coding:utf-8 import matplotlib.pyplot as plt import numpy as np from numpy.lib.scimath import logn from math import e import matplotlib as mpl #防止中文乱码问题 mpl.rcParams['font.sans-serif'] = [u'SimHei'] mpl.rcParams['axes.unicode_minus'] = False x = np.linspace(0, 6, 120) plt.plot(x, np.log(x) / np.log(0.5), 'y-', linewidth=2, label=u'log0.5(x)') plt.plot(x, logn(e, x), 'b-', linewidth=2, label=u'loge(x)') #plt.plot(x,np.log(x)/np.log(5)) plt.plot(x, logn(5, x), 'g-', linewidth=2, label=u'loge(x)') plt.plot(x, np.log10(x), 'r-', linewidth=2, label=u'loge(x)') plt.plot([1, 1, 1, 1], [-3, 0, 1, 5], '--', color='darkgray') plt.axis([0, 2.5, -3.5, 5.5]) plt.legend(loc='lower right') #图例的位置 plt.grid(True) plt.show()
#!/usr/bin/env python import pandas as pd import matplotlib.pyplot as plt from numpy.lib.scimath import logn from math import e origin = "~/qbb2015/stringtie/SRR072893/t_data.ctab" df893 = pd.read_table(origin) df= [] for item in df893["FPKM"]: if item > 0: item = logn(e, item) df.append(item) plt.figure() plt.hist(df, color = 'blue', bins =100) plt.title("ln(FPKM) of SRR072893") plt.xlabel("Value") plt.ylabel("ln(FPKM)") plt.savefig("plot3.png")
for j in range(0, 59): x.append(epsilon[j]) y.append(epsilon[j + 1]) #import rpy2 from scipy import stats #def loadData(): gradient, intercept, r_value, p_value, std_err = stats.linregress(x, y) print "Gradient and intercept", gradient, intercept from numpy.lib.scimath import logn from math import e a = intercept b = gradient k = logn(e, b) * -252 m = a / (1 - b) lam = [] for j in range(0, 59): lam.append(Xt[j + 1] - a - b * Xt[j]) import numpy as np lamvar = np.var(lam) print(lamvar) from numpy import array, zeros, sqrt, shape sigmaeq = sqrt(lamvar / (1 - b**2))
def _calc_zoom_level(self): d = self.options self.zoom_level = 1 + int( logn(self.zoom_step, d['rsphere'] / (abs(d['llcrnrx']) + abs(d['urcrnrx']))))
x.append(epsilon[j]) y.append(epsilon[j + 1]) #import rpy2 from scipy import stats #def loadData(): gradient, intercept, r_value, p_value, std_err = stats.linregress(x,y) print "Gradient and intercept", gradient, intercept from numpy.lib.scimath import logn from math import e a = intercept b = gradient k = logn(e, b)*-252 m = a/(1-b) lam = [] for j in range(0,59): lam.append(Xt[j+1] - a - b * Xt[j]) import numpy as np lamvar = np.var(lam) print(lamvar) from numpy import array, zeros, sqrt, shape
def chooseAtRandom (thelist): idx = randrange(0,len(thelist)) return thelist[idx] def createRandomMapping(): for i in range(1,1<<n): for l in leights: keys = F.keys() neighs = neighbors(l) already = [F[j] for j in keys if (j in neighs)] if i in already: continue possibles = neighborsNotAssigned(l) if len(possibles) == 0: return i-1 h = chooseAtRandom(possibles) F[h]=i if __name__ == '__main__': M_2 = createRandomMapping() print M_2,F print "sum-rate:",(logn(2, len(leights)) + logn(2, M_2)) / n # print "$S_L$:",leights,"\\\\" # print "$S_R$:",heavies,"\\\\" # print "\\\\" # for l in leights: # print "neighbors of",l,"are:",neighbors(l),"\\\\" pass
def _max_zoom_level(self): self.max_zoom_level = logn(self.zoom_step, max(self.current_values.shape)) - 6
def TF_IDF_Replies(screen_name): #pickle.dump(users, open("repliesMentioning_"+str(screen_name)+".p", "wb")) users = pickle.load( open("repliesMentioning_" + str(screen_name) + ".p", "rb")) #TF(t) = (Number of times term t appears in a document) / (Total number of terms in the document). #IDF(t) = log_e(Total number of documents / Number of documents with term t in it). idf = {} for u in users: print u tweets = users[u] tf = {} totalWords = 0 for t in tweets: print t t = t.lower() words = t.split() totalWords += len(words) for w in words: idf.setdefault(w, {}) idf[w][u] = 0 tf.setdefault(w, 0) tf[w] += 1 tfFinal = {} idfFinal = {} for w in tf: value = tf[w] value = float(float(value) / float(totalWords)) tfFinal[w] = value for w in idf: totalNumDocs = len(users) numDocWithW = len(idf[w]) #print numDocWithW w1 = float(float(totalNumDocs) / float(numDocWithW)) #np.log(x) value = logn(e, w1) idfFinal[w] = value tfIDF = {} for w in idfFinal: if w in tfFinal: wTF = tfFinal[w] else: wTF = 1 wIDF = idfFinal[w] final = wTF * wIDF tfIDF[w] = final #for w in tfIDF: # print w+","+str(tfIDF[w]) sorted_tfIDF = sorted(tfIDF.items(), key=operator.itemgetter(1), reverse=True) t = 0 for w, v in sorted_tfIDF: if t < 15: print w t += 1 print
def sum_rate(H, n, r, tests): return (logn(2, 2 * Vc_size(H, n, r, tests)) + r) / (n + 1)
def f(x,y): return ((y*y*logn(e, x)-y)/x)