예제 #1
0
파일: bdconfig.py 프로젝트: corwinnn/myBot
def make_topic(ref, title, desc):
    """
    Добавляет новую тему в таблицу
    :param ref: ссылка
    :param title: название
    :param desc: описание
    """
    print('new topic')
    all_topic_text = ''
    topic_words_len = defaultdict(int)
    topic_words_freq = defaultdict(int)
    articles = Parser(ref)
    times_articles = articles.get_time()
    a_titles, a_description, a_refs = articles.get_titles()
    for j in range(len(a_titles)):
        print('new article')
        article_words_len = defaultdict(int)
        article_words_freq = defaultdict(int)
        article = Parser(a_refs[j])
        all_article_text = article.get_paragraphs()
        all_topic_text += ' ' + all_article_text
        fill_words(all_article_text.split(), article_words_freq, article_words_len)
        new_article = Article(topic=title, name=a_titles[j],
                              href=a_refs[j],
                              text=article.get_paragraphs(),
                              upd=dateparser.parse(times_articles[j].text),
                              stat_words_len=json.dumps(article_words_len),
                              stat_words_freq=json.dumps(article_words_freq))
        new_article.save()
        make_tags(article.get_tags(), a_titles[j])
    fill_words(all_topic_text.split(), topic_words_freq, topic_words_len)
    new_topic = Topic(name=title, description=desc, href=ref,
                      upd=dateparser.parse(times_articles[0].text),
                      stat_words_len=json.dumps(topic_words_len),
                      stat_words_freq=json.dumps(topic_words_freq))
    new_topic.save()
예제 #2
0
파일: bdupdate.py 프로젝트: corwinnn/myBot
while True:
    try:
        db.close()
        db.connect()
        for index in range(len(titles)):
            if len(Topic.select().where(Topic.name == titles[index])) == 0:
                make_topic(refs[index], titles[index], description[index])
            else:
                cur_topic = Topic.get(Topic.name == titles[index])
                last_upd = cur_topic.upd
                articles = Parser(refs[index])
                times_articles = articles.get_time()
                cur_topic.upd = dateparser.parse(times_articles[0].text)
                cur_topic.save()
                a_titles, a_description, a_refs = articles.get_titles()
                have_new = False
                for j in range(len(times_articles)):
                    if dateparser.parse(times_articles[j].text) > last_upd:
                        have_new = True
                        print('new article')
                        article = Parser(a_refs[j])
                        article_words_len = defaultdict(int)
                        article_words_freq = defaultdict(int)
                        all_article_text = article.get_paragraphs()
                        fill_words(all_article_text.split(), article_words_freq, article_words_len)
                        new_article = Article(topic=titles[index], name=a_titles[j],
                                              href=a_refs[j],
                                              text=article.get_paragraphs(),
                                              upd=dateparser.parse(times_articles[j].text),
                                              stat_words_len=json.dumps(article_words_len),
예제 #3
0
파일: bdconfig.py 프로젝트: corwinnn/myBot
from collections import defaultdict
import json
import requests
from myParser import Parser
from bd import Topic, Article, Tag, db
import dateparser
import config
session = requests.Session()
session.max_redirects = config.MAX_REDIRECTS
my_site = Parser(config.MY_SITE)
titles, description, refs = my_site.get_titles()
all_titles = set(titles)
db.connect()

def make_tags(tags, title):
    """
    Заполняет таблицу с тегами.
    :param tags: лист тегов
    :param title: статья, откуда мы взяли теги
    """
    for tag in tags:
        new_tag = Tag(name=tag.text, article=title, href=tag['href'])
        new_tag.save()


def fill_words(text, words_freq, words_len):
    """
    Заполняет данные словари для статистики словами
    :param text: слова
    :param words_freq: словарь для сохранения частот
    :param words_len: словарь для сохранения длин