Esempio n. 1
0
def naive_bayes_guess(lines, hockey, baseball):
    line_words = lines.split()
    total_count_H = count_all(hockey)
    total_count_B = count_all(baseball)
    PH = float(hockey[0])/(float(baseball[0]+hockey[0]))
    PB = 1-PH
    h = 0
    b = 0
    for i in range(1, len(line_words)):
        word = line_words[i]
        try:
            wh = hockey[word]
        except:
            wh = 0
        try:
            wb = baseball[word]
        except:
            wb = 0
        p_word = l(float(wh+wb+1)/float(total_count_B+total_count_H+2))
        p_h_word = l(float(wh+1)/float(2+total_count_H))+l(PH) - p_word
        p_b_word = l(float(wb+1)/float(2+total_count_B))+l(PB) - p_word
        h += p_h_word
        b += p_b_word
    if h>b :
        return "rec.sport.hockey"
    else:
        return "rec.sport.baseball"
Esempio n. 2
0
def isBeaut():
    if 0 in A:
        return True
    h = set()

    for i in xrange(N):
        t = l(abs(A[i])) / l(2)
        A[i] = t if A[i] > 0 else -t
    # log tables prepared
    print A
    for i in A:
        val, sgn = abs(i), i > 0
        if (val, sgn) in h:
            return True
        else:
            h.add((val, sgn))
        print h

    return False
Esempio n. 3
0
def compute_document_density(data):
    global tech_file, business_file, sport_file, entertainment_file, politics_file

    count = 0
    if data in politics_file:
        count += 1
    if data in entertainment_file:
        count += 1
    if data in sport_file:
        count += 1
    if data in tech_file:
        count += 1
    if data in politics_file:
        count += 1

    if count == 0:
        return 0
    else:
        return l(5 / count)
Esempio n. 4
0
def getMayor(n):
    digitos = int(l(n)) + 1
    arr_digitos = []
    aux = n
    for i in range(digitos):
        arr_digitos.append(int(aux % 10))
        aux = aux / 10

    metodo_burbuja(arr_digitos)

    numero = 0
    var = 1
    aux = 0
    i = 0
    while (i < digitos):
        aux = arr_digitos[i] * var
        numero = numero + aux
        var = var * 10
        i = i + 1
    return numero
        maxa = max(tbu, tent, tpol, tspo, ttec)

        # if maxa != 0.0:
        #         if tbu == maxa:
        #                 print (w, document_density, maxa, "business")
        #         elif tent == maxa:
        #                 print (w, document_density, maxa, "entertainment")
        #         elif tpol == maxa:
        #                 print (w, document_density, maxa, "politics")
        #         elif tspo == maxa:
        #                 print (w, document_density, maxa, "sports")
        #         else:
        #                 print (w, document_density, maxa, "tech")

        if document_density == l(5 / 1):
            t1 += maxa
        elif document_density == l(5 / 2):
            t2 += maxa
        elif document_density == l(5 / 3):
            t3 += maxa
        elif document_density == l(5 / 4):
            t4 += maxa
        else:
            t5 += maxa

        bu += tbu
        ent += tent
        pol += tpol
        spo += tspo
        tec += ttec
Esempio n. 6
0
#-----------------------------------
# 引入包
#-----------------------------------

# 直接调用时
# print(sqrt(5))
# NameError: name 'sqrt' is not defined

# 直接导入包中所有
import math
print(math.sqrt(2))

# 导入包中某个函数
from math import log10
print(log10(100))

# 导入包时重命名
import math as m
print(m.log(5, 2))

# 导入包中函数时重命名
from math import log as l
print(l(2, 5))
Esempio n. 7
0
from math import log as l
i = 1
c = 0
while True:
    d = 1
    for j in range(1, 10):
        if int(l(j**i, 10)) + 1 == i:
            c += 1
            d = 0
            print(j, i)
    if d == 1:
        break
    i += 1
print(c)
Esempio n. 8
0
def categorizeApi(text):
    stop_words = stopwords.words('english') + ['said' + 'v']
    ps = PorterStemmer()
    business_file = entertainment_file = politics_file = sport_file = tech_file = []

    business = getFreqDist("business")
    print("Classifing data")
    entertainment = getFreqDist("entertainment")
    # print ("entertainment")
    politics = getFreqDist("politics")
    # print ("politics")
    sport = getFreqDist("sport")
    # print ("sport")
    tech = getFreqDist("tech")
    # print ("tech")

    a = text[:]
    a = [w for w in word_tokenize(proc(a)) if w not in stop_words]

    bu = ent = pol = spo = tec = 1e-19

    #indicates the count of the document density
    t1 = t2 = t3 = t4 = t5 = 0

    for w in a:

        #included document density
        tbu = tent = tpol = tspo = ttec = 1e-19

        w = ps.stem(w)
        document_density = compute_document_density(w)

        if w in business:
            tbu = business.freq(w) * document_density
        if w in entertainment:
            tent = entertainment.freq(w) * document_density
        if w in politics:
            tpol = politics.freq(w) * document_density
        if w in sport:
            tspo = sport.freq(w) * document_density
        if w in tech:
            ttec = tech.freq(w) * document_density

        maxa = max(tbu, tent, tpol, tspo, ttec)

        # if maxa != 0.0:
        # 	if tbu == maxa:
        # 		# print (w, document_density, maxa, "business")
        # 	elif tent == maxa:
        # 		# print (w, document_density, maxa, "entertainment")
        # 	elif tpol == maxa:
        # 		# print (w, document_density, maxa, "politics")
        # 	elif tspo == maxa:
        # 		# print (w, document_density, maxa, "sports")
        # 	else:
        # 		# print (w, document_density, maxa, "tech")

        if document_density == l(5 / 1):
            t1 += maxa
        elif document_density == l(5 / 2):
            t2 += maxa
        elif document_density == l(5 / 3):
            t3 += maxa
        elif document_density == l(5 / 4):
            t4 += maxa
        else:
            t5 += maxa

        bu += tbu
        ent += tent
        pol += tpol
        spo += tspo
        tec += ttec

    print()
    tsum = t1 + t2 + t3 + t4 + t5
    if tsum != 0:
        t1 = t1 * 100 / tsum
        t2 = t2 * 100 / tsum
        t3 = t3 * 100 / tsum
        t4 = t4 * 100 / tsum
        t5 = t5 * 100 / tsum

    maxa = max(bu, ent, pol, spo, tec)

    if maxa != 1e-19:
        if bu == maxa:
            return "business"
        elif ent == maxa:
            return "entertainment"
        elif pol == maxa:
            return "politics"
        elif spo == maxa:
            return "sports"
        elif tec == maxa:
            return "tech"

    else:
        return "business"
Esempio n. 9
0
from math import log as l
n=input()
N=n*int(l(n)+l(l(n)))
a=range(2,N)
for i in range(int(n**.5)+1):
 a=filter(lambda x:x%a[i] or x==a[i],a)
print a[:n]
Esempio n. 10
0
m(6)

# In[39]:

from math import fmod as f
f(9, 3)

# In[45]:

from math import log as sq
sq(4)

# In[46]:

from math import log10 as l
l(4)

# In[48]:

import random

print(random.choice([1, 2, 3, 4]), end=" ")

# In[52]:

import random
print(random.randrange(20, 40, 2), end=" ")

# In[55]:

import random
Esempio n. 11
0
def ln(b):
    try:
        assert b > 0
        return l(b)
    except:
        print('Error!')
Esempio n. 12
0
def log(a, b):
    try:
        assert (a > 0) and (a != 1) and (b > 0)
        return l(b, a)
    except:
        print('Error!')
Esempio n. 13
0
def lg(b):
    try:
        assert b > 0
        return l(b, 10)
    except:
        print('Error!')
Esempio n. 14
0
def super_root(n):
    f = lambda w, x: w if (e(w) * w - x) / (w * e(w) + e(w)) <= 10e-15 else f(
        w - (e(w) * w - x) / (w * e(w) + e(w)), x)
    return e(f(*[l(n)] * 2))
Esempio n. 15
0
from math import log as l

d = 1
n = 1
c = 0
for i in range(1000):
    if int(l(n, 10)) > int(l(d, 10)):
        c += 1
    tmp = d
    d += n
    n += 2 * tmp
print(c)