def classify_news(): s = session() rows = s.query(News).filter(News.label == None).all() training_rows = s.query(News).filter(News.label != None).all() print('Received info from database') # Fit the classifier X, y = [], [] for news in training_rows: X.append(news.title) y.append(news.label) X = [clean(x).lower() for x in X] model = NaiveBayesClassifier(alpha=1) model.fit(X, y) print('Fitted the classifier') unclassified_news = [] for news in rows: unclassified_news.append(news.title) predicted_labels = model.predict(unclassified_news) print('labels predicted') for news, label in zip(rows, predicted_labels): news.label = label classified_news = sorted(rows, key=lambda news: news.label) print('news sorted') return template('C:\cs102\homework06\\news_recommendations.tpl', rows=classified_news)
def classify_news(): s = session() labeled_news = s.query(News).filter(News.label != None).filter( News.id < 1001).all() x = [clean(news.title) for news in labeled_news] x = [del_stops(news) for news in x] y = [news.label for news in labeled_news] classifier = NaiveBayesClassifier(1) classifier.fit(x, y) rows = s.query(News).filter(News.label == None).all() good, maybe, never = [], [], [] for row in rows: row.title = clean(row.title) row.title = del_stops(row.title) prediction = classifier.predict([row.title]) print(prediction) if prediction == ['good']: good.append(row) elif prediction == ['maybe']: maybe.append(row) else: never.append(row) return template('news_recs', good=good, maybe=maybe, never=never)
def recommendations(): TEMPLATE_PATH.insert(0, '') s = session() # 1. Classify labeled news rows = s.query(News).filter(News.label != None).all() X, y = [], [] for row in rows: X.append(row.title) y.append(row.label) X = [clean(x).lower() for x in X] model = NaiveBayesClassifier() model.fit(X, y) # 2. Get unlabeled news new_rows = s.query(News).filter(News.label == None).all() # 3. Get predictions marked = [] for row in new_rows: marked.append((model.predict(row.title.split()), row)) # 4. Print ranked table return template('news_ranked', rows=marked)
def classify_news(): # 1. Получить список неразмеченных новостей из БД # 2. Получить прогнозы для каждой новости # 3. Вывести ранжированную таблицу с новостями s = session() labeled = s.query(News).filter(News.label != None).all() X, y = [], [] for i in labeled: X.append(i.title) y.append(i.label) X = [clean(x).lower() for x in X] model = NaiveBayesClassifier() model.fit(X, y) no_label = s.query(News).filter(News.label == None).all() X_p = [] for i in no_label: X_p.append(i.title) X_p = [clean(x).lower() for x in X_p] y_predict = model.predict(X_p) classified_news = [] for j in range(len(no_label)): no_label[j].label = y_predict[j] if y_predict[j] == 'good': classified_news.append(no_label[j]) return template('news_recommendations', rows=classified_news)
def classify_news(): X, y, info = [], [], [] s = session() for i in range(1001): for item in s.query(News).get(News.id == i): X.append(item.title) y.append(item.label) X_test = [] for i in range(1001, len(s.query(News).all()) + 1): for item in s.query(News).filter(News.id == i).all(): X_test.append(item.title) info.append(News(author=item.author, points=item.points, comments=item.comments, url=item.url)) X = [x.translate(str.maketrans("", "", string.punctuation)).lower() for x in X] X_cleared = [x.translate(str.maketrans("", "", string.punctuation)).lower() for x in X_test] model = NaiveBayesClassifier(alpha=0.01) model.fit(X, y) predicted_news = model.predict(X_cleared) classified_news = [] for i in range(len(predicted_news)): classified_news.append([y[i], X_test[i], info[i]]) classified_news = sorted(classified_news, key=lambda item: item[0]) return template('homework06/news_recommendations', rows=classified_news)
def recommendations(): # 1. Получить список неразмеченных новостей из БД # 2. Получить прогнозы для каждой новости # 3. Вывести ранжированную таблицу с новостями # if define title_unclassified = title_classified = [] # 2 lists will receive the same values title_unclassified = [] title_classified = [] label_list = [] unclassified_news = s.query(News).filter(News.label.is_(None)).all() for news in unclassified_news: title_unclassified.append(news.title) classified_news = s.query(News).filter(News.label.isnot(None)).all() for news in classified_news: title_classified.append(news.title) label_list.append(news.label) data = NaiveBayesClassifier(alpha=1) data.fit(title_classified, label_list) label_classified = data.predict(title_unclassified) for i in range(len(unclassified_news)): unclassified_news[i].label = label_classified[i] s.commit() classified_news = s.query(News).filter(News.label == 'good').all() return template('recommendations_template', rows=classified_news)
def recomendations(): s = session() all_data = [] data = [[], []] length = s.query(News).count() for ID in range(1, length + 1): post = s.query(News).get(ID) all_data.append(post) for i in range(1, length): data[0].append(all_data[i].title) data[1].append(all_data[i].label) X, y = [], [] for i in range(len(data[0])): X.append(data[0][i]) y.append(data[1][i]) X = [clean(x).lower() for x in X] model = NaiveBayesClassifier() part = len(X)*7 // 10 X_train, y_train, X_test, y_test = X[:part], y[:part], X[part:], y[part:] model.fit(X_train, y_train) score=model.score(X_test, y_test) rec = NaiveBayesClassifier(alpha=0.01) rec.fit(X, y) rows = s.query(News).filter(News.label == None).all() X_new = [] for row in rows: X_new.append(row.title) classified_news = rec.predict(X_new) return template('news_recomendations', rows=classified_news, score=score)
def classify_news(): s = session() marked_news = s.query(News).filter(News.label != None).all() marked_news = [[new.title, new.label] for new in marked_news] X_train = [n[0] for n in marked_news] y_train = [n[1] for n in marked_news] model = NaiveBayesClassifier(alpha=1) model.fit(X_train, y_train) news = s.query(News).filter(News.label == None).all() news_ids = [new.id for new in news] news = [new.title for new in news] predicts = model.predict(news) classified_news = {'good': [], 'maybe': [], 'never': []} for i, predict in enumerate(predicts): classified_news[predict].append(news_ids[i]) rows = [] for label in ['good', 'maybe', 'never']: for id in classified_news[label]: rows.append(s.query(News).filter(News.id == id).first()) return template('classification_template', rows=rows)
def classify_news(): s = session() classifier = NaiveBayesClassifier() train_news = s.query(News).filter(News.label != None).options( load_only("title", "label")).all() x_train = [row.title for row in train_news] y_train = [row.label for row in train_news] classifier.fit(x_train, y_train) test_news = s.query(News).filter(News.label == None).all() x = [row.title for row in test_news] labels = classifier.predict(x) good = [test_news[i] for i in range(len(test_news)) if labels[i] == 'good'] maybe = [ test_news[i] for i in range(len(test_news)) if labels[i] == 'maybe' ] never = [ test_news[i] for i in range(len(test_news)) if labels[i] == 'never' ] return template('recommendations_template', { 'good': good, 'never': never, 'maybe': maybe })
def classify_news(): s = session() rows_teach = s.query(News).filter(News.label != None).all() rows_test = s.query(News).filter(News.label == None).all() X_test, y_test = [], [] X = [] for el in rows_teach: X_test.append(clean(el.title)) y_test.append(el.label) for el in rows_test: X.append(clean(el.title)) model = NaiveBayesClassifier(alpha=0.05) model.fit(X_test, y_test) result = model.predict(X) for i, row in enumerate(rows_test): if result[i][1] == "good": row.result = 0 elif result[i][1] == "maybe": row.result = 1 else: row.result = 2 rows_test.sort(key=lambda x: x.result) for i, row in enumerate(rows_test): print(row.title) print(row.result) return template('news_template', rows=rows_test)
def classify_news(train_titles, train_labels, test_titles): bayers = NaiveBayesClassifier() bayers.fit(train_titles, train_labels) predictions = bayers.predict(test_titles) return predictions
def classify_news(): # PUT YOUR CODE HERE s = get_session(engine) model = NaiveBayesClassifier() train_set = s.query(News).filter(News.label != None).all() model.fit([clean(news.title).lower() for news in train_set], [news.label for news in train_set]) test = s.query(News).filter(News.label == None).all() cell = list(map(lambda x: model.predict(x.title), test)) return template("color_template", rows=list(map(lambda x: (x[1], colors[cell[x[0]]]), enumerate(test))))
def classify_news(): s = session() model = NaiveBayesClassifier() train = s.query(News).filter(News.label != None).all() model.fit([clean(news.title).lower() for news in train], [news.label for news in train]) test = s.query(News).filter(News.label == None).all() return template( "news_template", rows=sorted( test, key=lambda news: get_l(model.predict(clean(news.title).lower()))))
def classify_news(): s = session() X_fit, y_fit, X_test = [], [], [] fitnews = s.query(News).filter(News.label != None).all() testnews = s.query(News).filter(News.label == None).all() for one in fitnews: X_fit.append(one.title) y_fit.append(one.label) for two in testnews: X_test.append(two.title) model = NaiveBayesClassifier() model.fit(X_fit, y_fit) labels = model.predict(X_test) rows = zip(testnews, labels) return template('classify_template', rows=rows)
def classify_news(): rows = s.query(News).filter(News.label != None).all() a = NaiveBayesClassifier() name = [] lables = [] for n in rows: name.append(n.title) lables.append(n.label) a.fit(name, lables) rows = s.query(News).filter(News.label == None).all() for n in rows: name.append(n.title) result = a.predict(name) for num,n in enumerate(rows): n.label = result[num] return template('classify_template', rows=rows)
def classify_news(): s = session() rows_test = s.query(News).filter(News.label == None).all() rows_train = s.query(News).filter(News.label != None).all() X_train, Y_train, Y_test = [], [], [] for el in rows_train: X_train.append(el.title) Y_train.append(el.label) y_train = [clean(y).lower() for y in Y_train] for el in rows_test: Y_test.append(el.title) y_test = [clean(c).lower() for c in Y_test] model = NaiveBayesClassifier() model.fit(X_train, y_train) metki = model.predict(y_test) sol = zip(rows_test, metki) return template('b_template', rows=sol)
def create_model(): s = session() labeled_news = s.query(News).filter(News.label != None).all() x_train = [clean(news.title) for news in labeled_news] y_train = [news.label for news in labeled_news] classifier = NaiveBayesClassifier(0.05) [labels, model] = classifier.fit(x_train, y_train) return template("news_model", labels=labels, model=model )
def classify_news(): train_news = s.query(News).filter(News.label != None).all() X_train = [new.title for new in train_news] y_train = [new.label for new in train_news] clf = NaiveBayesClassifier() clf.fit(X_train, y_train) classified_news = s.query(News).filter(News.label == None) labels = clf.predict([new.title for new in classified_news]) labels = [(labels[i], new) for i, new in enumerate(classified_news)] labels.sort(key=lambda x: x[0]) rows = [new[1] for new in labels] labels = [label[0] for label in labels] return template('news_template', rows=rows, labels=labels)
def recommendations(): s = session() list = s.query(News).filter(News.label != None).all() X, y = list, [news.label for news in list] X_train, y_train, X_test, y_test = train_test_split( X, y, param.SEED, train_size=param.TRAIN_SIZE) classifier = NaiveBayesClassifier(alpha=param.ALPHA) classifier.fit(X_train, y_train) data = [] unlabeled = s.query(News).filter(News.label == None).all() for record in classifier.predict(unlabeled): data.append((record[0], int(record[1][0]), record[1][1])) classified_news = sorted(data, key=lambda x: (x[1], x[2]), reverse=True) return template('news_recommendations', rows=classified_news)
def classify_news(): s = session() labeled_news = s.query(News).filter(News.label != None).all() x_train = [clean(news.title) for news in labeled_news] y_train = [news.label for news in labeled_news] classifier = NaiveBayesClassifier(0.05) classifier.fit(x_train, y_train) rows = s.query(News).filter(News.label == None).all() good, maybe, never = [], [], [] for row in rows: prediction = classifier.predict(clean(row.title)) if prediction == "good": good.append(row) elif prediction == "maybe": maybe.append(row) else: never.append(row) return template("news_recommendations", good=good, maybe=maybe, never=never)
def classify_news(): s = session() rows = s.query(News).filter(News.label == None).all() training_rows = s.query(News).filter(News.label != None).all() X, y = [], [] for news in training_rows: X.append(news.title) y.append(news.label) model = NaiveBayesClassifier() model.fit(X, y) unclassified_news = [] for news in rows: unclassified_news.append(news.title) predicted_labels = model.predict(unclassified_news) for news, label in zip(rows, predicted_labels): news.label = label classified_news = sorted(rows, key=lambda news: news.label) return template('predicted.tpl', rows=classified_news)
def recommendations(): s = session() classified_news = list() unmarked_rows = s.query(News).filter(News.label == None).all() marked_rows = s.query(News).filter(News.label != None).all() X = list() y = list() for row in marked_rows: title = row.title label = row.label X.append(title) y.append(label) model = NaiveBayesClassifier() model.fit(X, y) for row in unmarked_rows: title = row.title score = model.predict(title) if score == "good": score = 0 elif score == "maybe": score = 1 elif score == "never": score = 2 classified_news.append([score, row]) print("Before sort") print(classified_news[:10]) classified_news.sort(key=lambda x: x[0]) print("After sort") print(classified_news[:10]) classified_news = [result[1] for result in classified_news] # 1. Получить список неразмеченных новостей из БД # 2. Получить прогнозы для каждой новости # 3. Вывести ранжированную таблицу с новостями return template('news_template', rows=classified_news)
def main(): curr_dir = os.path.dirname(__file__) csv_file = os.path.join(curr_dir, 'data/play.csv') test_case = { 'Tempo': 'Chuva', 'Temperatura': 'Quente', 'Humidade': 'Normal', 'Vento': 'Forte' } df = pd.read_csv(csv_file, index_col='Dia') X, y = df.loc[:, df.columns != 'Jogar'], df['Jogar'] clf = NaiveBayesClassifier() clf.fit(X, y) print('resultado: ', 'Jogar' if clf.predict(test_case) else 'Não Jogar') print() print(clf.get_probs_str(test_case))
def classify_news(): bs = NaiveBayesClassifier(1) s = session() nolable = s.query(News).filter(News.label == None).all() X = processing(nolable) X_train = s.query(News).filter(News.label != None).all() y = [] for item in X_train: y.append(item.label) X_train = processing(X_train) bs.fit(X_train, y) predictions = bs.predict(X) counter = 0 for item in nolable: item.label = predictions[counter] counter += 1 nolable.sort(key=lambda x: x.label) nolable.reverse() return template('news_template', rows=nolable)
def classify_news(): s = Session() none_news = [] rows = s.query(News).filter(News.label == None).all() learn_news = s.query(News).filter(News.label != None).all() X, y = [], [] for news in learn_news: X.append(news.title) y.append(news.label) X = [clean(x).lower() for x in X] model = NaiveBayesClassifier(alpha=1) model.fit(X, y) for news in rows: none_news.append(news.title) predict_labels = model.predict(none_news) for news, label in zip(rows, predict_labels): news.label = label classified_news = sorted(rows, key=lambda news: news.label) return template('./classify.tpl', rows=classified_news)
def classify_news(): s = session() labeled = s.query(News).filter(News.label != None).all() X, y = [], [] for i in labeled: X.append(i.title) y.append(i.label) X = [clean(x).lower() for x in X] d = len(X) // 4 #X_train, y_train, X_test, y_test = X[:3*d], y[:3*d], X[3*d:], y[3*d:] model = NaiveBayesClassifier() #model.fit(X_train, y_train) проверка точности #print(model.score(X_test, y_test)) model.fit(X, y) no_label = s.query(News).filter(News.label == None).all() X_p = [] for i in no_label: X_p.append(i.title) X_p = [clean(x).lower() for x in X_p] y_predict = model.predict(X_p) for j in range(len(no_label)): no_label[j].label = y_predict[j] classified_news = [ no_label[j] for j in range(len(no_label)) if y_predict[j] == 'good' ] maybe = [ no_label[j] for j in range(len(no_label)) if y_predict[j] == 'maybe' ] classified_news.extend(maybe) never = [ no_label[j] for j in range(len(no_label)) if y_predict[j] == 'never' ] classified_news.extend(never) return template('news_recommendations', rows=classified_news)
def classify_news(): s = session() X_train = [news.title for news in s.query(News).filter(News.label != None)] y_train = [news.label for news in s.query(News).filter(News.label != None)] model = NaiveBayesClassifier() model.fit(X_train, y_train) no_label = s.query(News).filter(News.label == None) X = [news.title for news in no_label] y = model.predict(X) good, maybe, never = [], [], [] for i, label in enumerate(y): if label == 'good': good.append(no_label[i]) elif label == 'maybe': maybe.append(no_label[i]) elif label == 'never': never.append(no_label[i]) return template('news_recommendations', good=good, maybe=maybe, never=never)
def classify_news(): s = session() labeled_news = s.query(News).filter(News.label != None).all() x = [clean(new.title) for new in labeled_news] y = [new.label for new in labeled_news] model = NaiveBayesClassifier() model.fit(x, y) unlabeled_news = s.query(News).filter(News.label == None).all() good = [] maybe = [] never = [] titles = [clean(current_new.title) for current_new in unlabeled_news] predictions = model.predict(titles) for i, current_news in zip(range(len(unlabeled_news)), unlabeled_news): if predictions[i] == 'good': good.append(current_news) elif predictions[i] == 'maybe': maybe.append(current_news) else: never.append(current_news) return template('recommendations', good=good, maybe=maybe, never=never)
def classify_news(): s = session() # берем интересующие нас столбцы titles = [ str(t[0]) for t in s.query(News.title).filter(News.label != None).all() ] labels = [ str(l[0]) for l in s.query(News.label).filter(News.label != None).all() ] # нормализация normalized_titles = [] for title in titles: normalized_titles.append(normalize(title)) global classifier classifier = NaiveBayesClassifier(alpha=1) classifier.fit(normalized_titles[:800], labels[:800]) print('score:', classifier.score(normalized_titles[800:], labels[800:])) print('label freq:', classifier.y_frequency) return redirect('/recommendations')
def recommendations(): """ # 1. Получить список неразмеченных новостей из БД # 2. Получить прогнозы для каждой новости # 3. Вывести ранжированную таблицу с новостями """ s = session() rows = s.query(News).filter(News.label != None).all() labels = [row.label for row in rows] titles = [row.cleaned for row in rows] model = NaiveBayesClassifier() model.fit(titles, labels) rows = s.query(News).filter(News.label == None).all() titles = [row.cleaned for row in rows] classification = sorted(zip(rows, model.predict(titles)), key=lambda x: (x[1][0], abs(x[1][1]))) classified_news = [] for record in classification: classified_news.append(record[0]) return template('news_recommendations', rows=classified_news)