Exemple #1
0
def classify_news():
    rows = s.query(News).filter(News.label != None).all()
    a = NaiveBayesClassifier()
    name = []
    lables = []
    for n in rows:
        name.append(n.title)
        lables.append(n.label)
    a.fit(name, lables)
    rows = s.query(News).filter(News.label == None).all()
    for n in rows:
        name.append(n.title)
    result = a.predict(name)
    for num,n in enumerate(rows):
         n.label = result[num]
    return template('classify_template', rows=rows)
def classify_news():
    s = session()
    labeled = s.query(News).filter(News.label != None).all()
    X, y = [], []
    for i in labeled:
        X.append(i.title)
        y.append(i.label)
    X = [clean(x).lower() for x in X]

    d = len(X) // 4
    #X_train, y_train, X_test, y_test = X[:3*d], y[:3*d], X[3*d:], y[3*d:]

    model = NaiveBayesClassifier()
    #model.fit(X_train, y_train) проверка точности
    #print(model.score(X_test, y_test))
    model.fit(X, y)

    no_label = s.query(News).filter(News.label == None).all()
    X_p = []
    for i in no_label:
        X_p.append(i.title)
    X_p = [clean(x).lower() for x in X_p]
    y_predict = model.predict(X_p)

    for j in range(len(no_label)):
        no_label[j].label = y_predict[j]

    classified_news = [
        no_label[j] for j in range(len(no_label)) if y_predict[j] == 'good'
    ]

    maybe = [
        no_label[j] for j in range(len(no_label)) if y_predict[j] == 'maybe'
    ]
    classified_news.extend(maybe)
    never = [
        no_label[j] for j in range(len(no_label)) if y_predict[j] == 'never'
    ]
    classified_news.extend(never)

    return template('news_recommendations', rows=classified_news)
Exemple #3
0
    ''' 解析文件中的数据
    '''
    vocabulary, word_vects, classes = [], [], []
    with open(filename, 'r', encoding=ENCODING) as f:
        for line in f:
            if line:
                word_vect, cls = parse_line(line)
                vocabulary.extend(word_vect)
                word_vects.append(word_vect)
                classes.append(cls)
    vocabulary = list(set(vocabulary))

    return vocabulary, word_vects, classes

if '__main__' == __name__:
    clf = NaiveBayesClassifier()
    vocabulary, word_vects, classes = parse_file('english_big.txt')

    # 训练数据 & 测试数据
    ntest = int(len(classes)*(1-TRAIN_PERCENTAGE))

    test_word_vects = []
    test_classes = []
    for i in range(ntest):
        idx = random.randint(0, len(word_vects)-1)
        test_word_vects.append(word_vects.pop(idx))
        test_classes.append(classes.pop(idx))

    train_word_vects = word_vects
    train_classes = classes
SPAM = (
    "offer is secret",
    "click secret link",
    "secret sports link",
)
HAM = (
    "play sports today",
    "went play sports",
    "secret sports event",
    "sports is today",
    "sports costs money",
)

print "=== Naive Bayes CLassifier ==="
c = NaiveBayesClassifier(SPAM, HAM)
print "Size of vocabulary: %d" % c.different_words
result("SPAM", c.spam.p, 0.3750)
result("secret|SPAM", c.spam.p_word("secret"), 0.3333)
result("secret|HAM",  c.ham.p_word("secret"), 0.0667)
result("SPAM|sports", c.p_spam_given_word("sports"), 0.1667)
result("SPAM|secret is secret)", c.p_spam_given_phrase("secret is secret"), 0.9615)
result("SPAM|today is secret)", c.p_spam_given_phrase("today is secret"), 0)

print "\n=== Naive Bayes CLassifier with Laplace Smoothing ==="
c = NaiveBayesClassifier(SPAM, HAM, 1)
result("SPAM", c.spam.p, 0.4)
result("HAM", c.ham.p, 0.6)
result("today|SPAM", c.spam.p_word("today"), 0.0476)
result("today|HAM",  c.ham.p_word("today"), 0.1111)
result("SPAM|today is secret)", c.p_spam_given_phrase("today is secret"), 0.4858)
n = BayesNetwork(TEST_NET)
P(n, {"B":True}, {"C":True})
P(n, {"C":True}, {"B":True})

print "\n=== Problem 8 ==="
from bayes import NaiveBayesClassifier, result
SPAM = (
    "Top Gun",
    "Shy People",
    "Top Hat",
)
HAM = (
    "Top Gear",
    "Gun Shy",
)
c = NaiveBayesClassifier(SPAM, HAM, 1)
result("OLD", c.spam.p)
result("Top|OLD", c.spam.p_word("Top"))
result("OLD|Top", c.p_spam_given_word("Top"))


print "\n=== Problem 10 ==="
from linear_regression import linear_regression, gaussian
x = [1.0, 3.0, 4.0, 5.0,  9.0]
y = [2.0, 5.2, 6.8, 8.4, 14.8]
(w0, w1), err = linear_regression(x, y)
print "(w0=%.1f, w1=%.1f) err=%.2f" % (w0, w1, err)


print "\n=== Problem 12 ==="
from logic import Proposition, implies
from bayes import NaiveBayesClassifier, result

MOVIE = (
    "a perfect world",
    "my perfect woman",
    "pretty woman"
)
SONG = (
    "a perfect day",
    "electric storm",
    "another rainy day"
)
c = NaiveBayesClassifier(MOVIE, SONG, 1)
print "Size of vocabulary: %d" % c.different_words

print "\n=== Homework 3.1 ==="
result("MOVIE", c.spam.p)
result("SONG", c.ham.p)
result("perfect|MOVIE", c.spam.p_word("perfect"))
result("perfect|SONG",  c.ham.p_word("perfect"))
result("storm|MOVIE", c.spam.p_word("storm"))
result("storm|SONG",  c.ham.p_word("storm"))

print "\n=== Homework 3.2 ==="
result("MOVIE|perfect storm)", c.p_spam_given_phrase("perfect storm"))

print "\n=== Homework 3.3 ==="
c = NaiveBayesClassifier(MOVIE, SONG)
result("MOVIE|perfect storm)", c.p_spam_given_phrase("perfect storm"))

print "\n=== Homework 3.4 ==="