Ejemplo n.º 1
0
 def simple(_date):
     """样本集
     :return: List<News>
     """
     session = Session(**database).session
     ret = session.query(
         News.title, News.abstract).filter(News.savedate >= _date).all()
     ret = [x[0] + x[1] for x in ret]
     session.close()
     return ret
Ejemplo n.º 2
0
def schedule_special_hibor():
    session = Session(**database)
    news = api.special_hibor()
    for n in news:
        session.insert_one(n)
    logs.info("慧博资讯导入数据库完成")
    print("慧博资讯导入数据库完成")
    session.close()
Ejemplo n.º 3
0
 def simple(_date):
     """样本集
     :return: List<News>
     """
     session = Session(**database).session
     ret = session.query(News.title, News.abstract).filter(
         News.savedate >= _date - timedelta(days=5)).all()
     if not ret:
         ret = session.query(News.title, News.abstract).filter(
             News.savedate >= _date - timedelta(days=1)).all()
     session.close()
     ret = [Similarity.reduce(x[0] + x[1]) for x in ret]
     return ret
Ejemplo n.º 4
0
def schedule(website_name):
    session = Session(**database)
    web = website[website_name]
    for k, v in web.items():
        for section in v:
            logs.info(
                f"{datetime.now().strftime('%Y-%m-%d %H:%M')}  执行任务<{website_name} {section['section']}>"
            )
            for i in range(1, 3):
                section["page"] = i
                try:
                    news = getattr(api, website_name)(**section)
                except Exception as e:
                    logs.error(e)
                    break
                for n in news:
                    n = api.revise(n)
                    if n:
                        session.insert_one(n)

    session.close()
Ejemplo n.º 5
0
def schedule_special_search_api():
    session = Session(**database)
    news = api.special_eastmoney_search_api()
    for n in news:
        session.insert_one(n)
    session.close()
Ejemplo n.º 6
0
from jieba import lcut
from jieba import posseg
from gensim.similarities import SparseMatrixSimilarity
from gensim.corpora import Dictionary
from gensim.models import TfidfModel

from app.config import database_remote as database
from app.database.insert import Session
from app.database.model import News

session = Session(**database).session
ret = session.query(News).filter(News.savedate >= "2019-10-28").all()
session.close()


def similar(aim):
    aim_text = aim.title + aim.abstract
    simple = [x.title + x.abstract for x in ret[0:-10]]
    text = [set(posseg.lcut(x)) for x in simple]
    text = list({y for x in text for y in x})
    dictionary = Dictionary(text)
    length = len(dictionary.token2id)
    corpus = [dictionary.doc2bow(lcut(src)) for src in simple]
    tfidf = TfidfModel(corpus)
    tf_texts = tfidf[corpus]
    sparse_matrix = SparseMatrixSimilarity(tf_texts, length)

    vector = dictionary.doc2bow(lcut(aim_text))
    tf_kw = tfidf[vector]
    similarities = sparse_matrix.get_similarities(tf_kw)