Ejemplo n.º 1
0
def entropy(num_list, base=2):
    assert base > 0
    ent = 0.0
    for p in num_list:
        if p > 0.0:
            ent += -p * logn(base, p)
    return ent
Ejemplo n.º 2
0
def entropy(num_list, base=2):
	assert base > 0
	ent = 0.0
	for p in num_list:
		if p>0.0:
			ent += -p*logn(base, p)
	return ent
Ejemplo n.º 3
0
def TF_IDF_Replies(screen_name):
    # pickle.dump(users, open("repliesMentioning_"+str(screen_name)+".p", "wb"))
    users = pickle.load(open("repliesMentioning_" + str(screen_name) + ".p", "rb"))
    # TF(t) = (Number of times term t appears in a document) / (Total number of terms in the document).
    # IDF(t) = log_e(Total number of documents / Number of documents with term t in it).

    idf = {}
    for u in users:
        print u

        tweets = users[u]

        tf = {}
        totalWords = 0
        for t in tweets:

            print t
            t = t.lower()
            words = t.split()
            totalWords += len(words)
            for w in words:
                idf.setdefault(w, {})
                idf[w][u] = 0
                tf.setdefault(w, 0)
                tf[w] += 1
        tfFinal = {}
        idfFinal = {}
        for w in tf:
            value = tf[w]
            value = float(float(value) / float(totalWords))
            tfFinal[w] = value

        for w in idf:
            totalNumDocs = len(users)
            numDocWithW = len(idf[w])
            # print numDocWithW
            w1 = float(float(totalNumDocs) / float(numDocWithW))
            # np.log(x)
            value = logn(e, w1)
            idfFinal[w] = value
        tfIDF = {}
        for w in idfFinal:
            if w in tfFinal:
                wTF = tfFinal[w]
            else:
                wTF = 1
            wIDF = idfFinal[w]
            final = wTF * wIDF
            tfIDF[w] = final
            # for w in tfIDF:
            # 	print w+","+str(tfIDF[w])
        sorted_tfIDF = sorted(tfIDF.items(), key=operator.itemgetter(1), reverse=True)
        t = 0
        for w, v in sorted_tfIDF:
            if t < 15:
                print w
            t += 1

        print
Ejemplo n.º 4
0
    def scale_dof_obj(self, scale, dof):
        base = np.exp(
            1
        )  # fitted params are invariant to this logarithm base (i.e.10, or e)

        nfalse = (len(self.alteqnull) - np.sum(self.alteqnull))

        imax = int(np.ceil(self.qmax *
                           nfalse))  # of only non zer dof component
        p = sp.stats.chi2.sf(self.lrtsort[0:imax] / scale, dof)
        logp = logn(base, p)
        r = logn(base, self.qnulllrtsort[0:imax]) - logp
        if self.abserr:
            err = np.absolute(r).sum()
        else:  # mean square error
            err = (r * r).mean()
        return err, imax
Ejemplo n.º 5
0
 def variable_size(self):
     """
     Obtener tamaño necesario para codificar una variable del fenotipo.
     :return: Tamaño necesario.
     """
     arity = Configuration.Configuration.alphabet.size()
     a = 1 + (self.max_val - self.min_val) * pow(10, self.decimals)
     size = int(logn(arity, a))
     return size
Ejemplo n.º 6
0
 def __init__(self, size=0):
     #size: the number of all items
     #bit_size: the length of the bitarray
     #hashnum: defines how many hash function used
     self.size = size
     # --------------------------------------
     # bitarray size should use this fomular
     # bit_size = -size*lnp/(ln2).^2
     # where p == 0.01 false positive percentage
     #--------------------------------------
     self.bit_size = int(-self.size * logn(e, 0.01) / (logn(e, 2)**2))
     #print self.bit_size
     self.bitarray = bitarray(self.bit_size)
     #print self.bitarray
     # --------------------------------------
     # hash function number should use this fomular
     # hash function number = ln2* bitarray size/item size
     #--------------------------------------
     self.hashnum = int(logn(e, 2) * self.bit_size / self.size)
Ejemplo n.º 7
0
 def calLikelihood(self):
     sumlikehood = 0.0
     for n in range(0, len(self.traindata)):
         likehood = 0.0
         for k in range(0, self.Mnum):
             likehood += self.piList[k] * self.guassList[k].N(
                 self.traindata[n])
         sumlikehood += logn(e, likehood)
     self.likehoods = sumlikehood
     if sumlikehood == 0:
         self.likehoods = 0.00001
Ejemplo n.º 8
0
def fit2(fix_ori, xaxis):
	fix=np.zeros(len(fix_ori))
	l1 = len(fix_ori)
	l2 = 3*l1/4
	l3 = l1/4
	for i in range(len(fix_ori)):
		fix[i]=fix_ori[i]/fix_ori[0]
	v0= [1.4,fix[0], 1.4,fix[0]]
#	print 'Initial value: used      ' , '[%7.3f %3.1f]' % (v0[0], v0[1])
#	v0= [(logn(e,fix[5])-logn(e,fix[l3]))/(xaxis[l3]-xaxis[5]),fix[0]]
#	print 'Initial value: computed   5-', l3 , '[%7.3f %3.1f]' % (v0[0], v0[1])
	decay = (logn(e,fix[30])-logn(e,fix[l2]))/(xaxis[l2]-xaxis[30])
	v0= [decay*2.0,fix[0]/2.0,decay/2.0,fix[0]/2.0]
	print 'Initial value: computed  30-',l2 , '[%7.3f %3.1f ][ %7.3f %3.1f]' % (v0[0], v0[1], v0[2], v0[3])
	
	vv = fmin(double_expdecay, v0, args=(fix, xaxis),maxiter=1000, maxfun=1000,full_output=True,disp=True)
	v=vv[0]
	chi_mono=vv[1]
	fitparam = [v[0],v[1],v[2],v[3],vv[1]]
	print '[%7.3f %3.1f ][%7.3f %3.1f]' % (v[0], v[1], v[2], v[3]), ' chi_bi %6.2e' % chi_mono
	return fitparam
def HPrime(arr):
    bi = []
    lnOfbi = []
    bi_x_lnOfbi = []
    speciesTotal = sum(arr, 0)

    for i in range(len(arr)):
        bi.append(arr[i] / speciesTotal)
    for i in range(len(bi)):
        lnOfbi.append(logn(e, bi[i]))
    for i in range(len(bi)):
        bi_x_lnOfbi.append(bi[i] * lnOfbi[i])

    return [sum(bi_x_lnOfbi), speciesTotal / len(arr), len(arr)]
Ejemplo n.º 10
0
def DU1por():

    '''
    In [1]: from sympy import *
    In [2]: import numpy as np
    In [3]: x = Symbol('x')
    In [4]: y = x**2 + 1
    In [5]: yprime = y.diff(x)
    In [6]: yprime
    Out[6]: 2⋅x

    In [7]: f = lambdify(x, yprime, 'numpy')
    In [8]: f(np.ones(5))
    Out[8]: [ 2.  2.  2.  2.  2.]
    '''
    
    from numpy.lib.scimath import logn
    from math import log
    from math import e

    # xy'-y2lnx+y=0
    # y'=(y2lnx-y)/x
    
    def f(x,y):
        return ((y*y*logn(e, x)-y)/x)

    def fmath(x,y):
        return ((y*y*log(x,e)-y)/x)

    y = [1,1,1,1,1,1,1,1,1,1]
    x = [1,1,1,1,1,1,1,1,1,1]
    y[0] = 1
    x[0] = 1
    h = 0.1
    
    for i in range (10):
        
        x[i] = 1+i*h
        y[i] = y[i-1]+( h * f(x[i],y[i]))
        print(y[i]-(1/(1+logn(e, x[i]))))

    print('-----------------------------------')

    y2 = [1,1,1,1,1,1,1,1,1,1]
    x2 = [1,1,1,1,1,1,1,1,1,1]
    for i in range (10):
        
        x2[i] = 1+i*h
        y2[i] = y2[i-1]+( h * f(x2[i],y2[i]))
        print(y2[i]-(1/(1+log(x2[i],e))))
Ejemplo n.º 11
0
def DU1por3():
    
    from numpy.lib.scimath import logn
    from math import log
    from math import e

    # xy'-y2lnx+y=0
    # y'=(y2lnx-y)/x
    
    def f(x,y):
        return ((y*y*logn(e, x)-y)/x)

    y = [1,1,1,1,1,1,1,1,1,1]
    x = [1,1,1,1,1,1,1,1,1,1]
    y[0] = 1
    x[0] = 1
    h = 0.1
    
    for i in range (3):
        x[i] = 1+i*h
        k1 = h*f(x[i-1],y[i-1])
        k2 = h*f(x[i-1]+(h/2), y[i-1]+(1/2)*k1)
        k3 = h*f(x[i-1]+(h/2), y[i-1]+(1/2)*k2)
        k4 = h*f(x[i-1]+h, y[i-1]+k3)    
        y[i] = y[i-1]+(1/6)*(k1+2*k2+2*k3+k4)
        print(y[i]-(1/(1+logn(e, x[i]))))

    i = 3
    
    while i < 10:
        # y[i] - y[i-1])/h = A
        # y[i] = A*h+y[i-1]
        x[i] = 1+i*h
        y[i] = ((1/24)*(55*f(x[i-1],y[i-1])-59*f(x[i-2],y[i-2])+37*f(x[i-3],y[i-3])-9*f(x[i-4],y[i-4])))*h+y[i-1]
        print(y[i]-(1/(1+logn(e, x[i]))))
        i += 1
Ejemplo n.º 12
0
def evaluate_ppl(y_t_batch, y_h_batch):
    y_t_batch = np.clip(y_t_batch, 1e-20, 1 - 1e-20)
    total_loss = []
    for n in range(len(y_t_batch)):
        H = 0.
        for t in range(len(y_t_batch[n])):
            if 1 in y_h_batch[n][t]:
                index = np.where(y_h_batch[n][t] == 1)[0][0]
                H += logn(2, y_t_batch[n][t][index])
                #H += np.sum(np.multiply(y_h_batch[n][t], logn(2,y_t_batch[n][t])))
            else:
                loss = (H / t)
                break
        total_loss.append(loss)
    loss = -sum(total_loss) / len(total_loss)
    return loss
def query_tfidf(doc_content, doc_count):
    tf = {}
    tfidf = {}
    list_of_term1 = []
    for term1 in doc_content:
        tf[term1] = tf.get(term1, 0) + 1
        list_of_term1.append(term1)
    c1 = list(set(list_of_term1))
    for token, freq in tf.iteritems():
        for term1 in doc_content:
            if token == term1:
                df = get_df_inverted(token)
                if df != 0:
                    idf = logn(10, float(doc_count) / df)
                    tfidf[token] = freq * idf
    tfidf = normalize(tfidf)
    query = {"tokens": c1, "tfidf": tfidf}
    return query
Ejemplo n.º 14
0
def DU1por2():

    '''
    In [1]: from sympy import *
    In [2]: import numpy as np
    In [3]: x = Symbol('x')
    In [4]: y = x**2 + 1
    In [5]: yprime = y.diff(x)
    In [6]: yprime
    Out[6]: 2⋅x

    In [7]: f = lambdify(x, yprime, 'numpy')
    In [8]: f(np.ones(5))
    Out[8]: [ 2.  2.  2.  2.  2.]
    '''
    
    from numpy.lib.scimath import logn
    from math import log
    from math import e

    # xy'-y2lnx+y=0
    # y'=(y2lnx-y)/x
    
    def f(x,y):
        return ((y*y*logn(e, x)-y)/x)

    y = [1,1,1,1,1,1,1,1,1,1]
    x = [1,1,1,1,1,1,1,1,1,1]
    y[0] = 1
    x[0] = 1
    h = 0.1
    
    for i in range (10):
        x[i] = 1+i*h
        k1 = h*f(x[i-1],y[i-1])
        k2 = h*f(x[i-1]+(h/2), y[i-1]+(1/2)*k1)
        k3 = h*f(x[i-1]+(h/2), y[i-1]+(1/2)*k2)
        k4 = h*f(x[i-1]+h, y[i-1]+k3)    
        y[i] = y[i-1]+(1/6)*(k1+2*k2+2*k3+k4)
        print(y[i]-(1/(1+logn(e, x[i]))))
Ejemplo n.º 15
0
def gen_time():
  return time - ave * logn(e, 1-random.random())
Ejemplo n.º 16
0
def documents_index():
    vector_space = []
    tokens = {}
    docFileNames = getFileNames("f:/ir/data")
    docs_dict = {}
    get_documents = []
    docFileName = docFileNames[0]
    docFile_list = gen_documents(docFileName)
    for pagedict in docFile_list:
        lines = '\n'.join((pagedict['TITLE'], pagedict['TEXT']))
        pageseq = int(pagedict['DOCSEQ'])
        text = lines.decode('utf-8')
        text = text.split()
        word_list = []
        for i in text:
            if not i in arabicStopWords:
                word_list.append(i)
        text = ' '.join(word_list)
        word_list1 = process_text(
            text
        )  # remove diacritics and punctcutions, stopwords, and tokenize text

        terms = []
        for wordx in word_list1:
            stemAr = isri_light(wordx)  #using light stemmer
            #print "stemAr=",stemAr
            #stemAr = isri_heavy(wordx) #using heavy stemmer
            #print "stemAr=",stemAr
            terms.append(wordx)
        docs_dict[pageseq] = terms

    for doc_id, doc_content in docs_dict.items():
        tf = {}
        list_of_term1 = []
        for term1 in doc_content:
            tf[term1] = tf.get(term1, 0) + 1
            list_of_term1.append(term1)
        c1 = list(set(list_of_term1))
        for token, freq in tf.iteritems():
            tokens.setdefault(token, []).append((doc_id, freq))
        document = {"doc_id": doc_id, "tokens": c1, "tfidf": {}}
        vector_space.append(document)
    doc_count = len(vector_space)
    dinv = {}
    for token, docs in tokens.iteritems():
        dinv[token] = [len(docs), docs]
        l_docs = len(docs)
        idf = logn(10, float(doc_count) / float(l_docs))
        for doc_id, tf in docs:
            tfidf = tf * idf
            for i in vector_space:
                if (i['doc_id'] == doc_id) and (token in i['tokens']):
                    i['tfidf'][token] = tfidf
    with open('f:/ir/inverted_index.json', 'w') as f:
        json.dump(dinv, f)
    f.close()
    '''with open('f:/inverted_index.json') as f:
        inv= json.load(f)
    f.close()'''

    for doc in vector_space:
        doc["tfidf"] = normalize(doc["tfidf"])
    return vector_space
Ejemplo n.º 17
0
def exp(ave):
  return - ave * logn(e, 1-random.random())
Ejemplo n.º 18
0
#coding:utf-8

import matplotlib.pyplot as plt
import numpy as np
from numpy.lib.scimath import logn
from math import e
import matplotlib as mpl
#防止中文乱码问题
mpl.rcParams['font.sans-serif'] = [u'SimHei']
mpl.rcParams['axes.unicode_minus'] = False

x = np.linspace(0, 6, 120)

plt.plot(x, np.log(x) / np.log(0.5), 'y-', linewidth=2, label=u'log0.5(x)')
plt.plot(x, logn(e, x), 'b-', linewidth=2, label=u'loge(x)')
#plt.plot(x,np.log(x)/np.log(5))
plt.plot(x, logn(5, x), 'g-', linewidth=2, label=u'loge(x)')
plt.plot(x, np.log10(x), 'r-', linewidth=2, label=u'loge(x)')
plt.plot([1, 1, 1, 1], [-3, 0, 1, 5], '--', color='darkgray')

plt.axis([0, 2.5, -3.5, 5.5])
plt.legend(loc='lower right')  #图例的位置
plt.grid(True)
plt.show()
Ejemplo n.º 19
0
#!/usr/bin/env python

import pandas as pd
import matplotlib.pyplot as plt
from numpy.lib.scimath import logn
from math import e

origin = "~/qbb2015/stringtie/SRR072893/t_data.ctab"

df893 = pd.read_table(origin)

df= []
for item in df893["FPKM"]:
    if item > 0:
        item = logn(e, item)
        df.append(item)


plt.figure()
plt.hist(df, color = 'blue', bins =100)
plt.title("ln(FPKM) of SRR072893")
plt.xlabel("Value")
plt.ylabel("ln(FPKM)")
plt.savefig("plot3.png")



Ejemplo n.º 20
0
for j in range(0, 59):
    x.append(epsilon[j])
    y.append(epsilon[j + 1])

#import rpy2
from scipy import stats
#def loadData():
gradient, intercept, r_value, p_value, std_err = stats.linregress(x, y)
print "Gradient and intercept", gradient, intercept

from numpy.lib.scimath import logn
from math import e

a = intercept
b = gradient
k = logn(e, b) * -252
m = a / (1 - b)

lam = []
for j in range(0, 59):
    lam.append(Xt[j + 1] - a - b * Xt[j])

import numpy as np

lamvar = np.var(lam)

print(lamvar)

from numpy import array, zeros, sqrt, shape

sigmaeq = sqrt(lamvar / (1 - b**2))
Ejemplo n.º 21
0
 def _calc_zoom_level(self):
     d = self.options
     self.zoom_level = 1 + int(
         logn(self.zoom_step,
              d['rsphere'] / (abs(d['llcrnrx']) + abs(d['urcrnrx']))))
Ejemplo n.º 22
0
    x.append(epsilon[j])
    y.append(epsilon[j + 1])


#import rpy2
from scipy import stats
#def loadData():
gradient, intercept, r_value, p_value, std_err = stats.linregress(x,y)
print "Gradient and intercept", gradient, intercept  

from numpy.lib.scimath import logn
from math import e

a = intercept
b = gradient
k = logn(e, b)*-252
m = a/(1-b)

lam = []
for j in range(0,59):
    lam.append(Xt[j+1] - a - b * Xt[j])


import numpy as np

lamvar = np.var(lam)

print(lamvar)

from numpy import array, zeros, sqrt, shape
Ejemplo n.º 23
0
def chooseAtRandom (thelist):
    idx = randrange(0,len(thelist))
    return thelist[idx]

def createRandomMapping():
    for i in range(1,1<<n):
        for l in leights:
            keys = F.keys()
            neighs = neighbors(l)
            already = [F[j] for j in keys if (j in neighs)]
            if i in already:
                continue
            possibles = neighborsNotAssigned(l)
            if len(possibles) == 0:
                return i-1
            h = chooseAtRandom(possibles)
            F[h]=i

if __name__ == '__main__':
    M_2 = createRandomMapping()
    print M_2,F
    print "sum-rate:",(logn(2, len(leights)) + logn(2, M_2)) / n

#     print "$S_L$:",leights,"\\\\"
#     print "$S_R$:",heavies,"\\\\"
#     print "\\\\"
#     for l in leights:
#         print "neighbors of",l,"are:",neighbors(l),"\\\\"
    
    
    pass
Ejemplo n.º 24
0
 def _max_zoom_level(self):
     self.max_zoom_level = logn(self.zoom_step,
          max(self.current_values.shape)) - 6
Ejemplo n.º 25
0
def TF_IDF_Replies(screen_name):
    #pickle.dump(users, open("repliesMentioning_"+str(screen_name)+".p", "wb"))
    users = pickle.load(
        open("repliesMentioning_" + str(screen_name) + ".p", "rb"))
    #TF(t) = (Number of times term t appears in a document) / (Total number of terms in the document).
    #IDF(t) = log_e(Total number of documents / Number of documents with term t in it).

    idf = {}
    for u in users:
        print u

        tweets = users[u]

        tf = {}
        totalWords = 0
        for t in tweets:

            print t
            t = t.lower()
            words = t.split()
            totalWords += len(words)
            for w in words:
                idf.setdefault(w, {})
                idf[w][u] = 0
                tf.setdefault(w, 0)
                tf[w] += 1
        tfFinal = {}
        idfFinal = {}
        for w in tf:
            value = tf[w]
            value = float(float(value) / float(totalWords))
            tfFinal[w] = value

        for w in idf:
            totalNumDocs = len(users)
            numDocWithW = len(idf[w])
            #print numDocWithW
            w1 = float(float(totalNumDocs) / float(numDocWithW))
            #np.log(x)
            value = logn(e, w1)
            idfFinal[w] = value
        tfIDF = {}
        for w in idfFinal:
            if w in tfFinal:
                wTF = tfFinal[w]
            else:
                wTF = 1
            wIDF = idfFinal[w]
            final = wTF * wIDF
            tfIDF[w] = final
        #for w in tfIDF:
        #	print w+","+str(tfIDF[w])
        sorted_tfIDF = sorted(tfIDF.items(),
                              key=operator.itemgetter(1),
                              reverse=True)
        t = 0
        for w, v in sorted_tfIDF:
            if t < 15:
                print w
            t += 1

        print
Ejemplo n.º 26
0
def sum_rate(H, n, r, tests):
    return (logn(2, 2 * Vc_size(H, n, r, tests)) + r) / (n + 1)
Ejemplo n.º 27
0
 def f(x,y):
     return ((y*y*logn(e, x)-y)/x)