Esempio n. 1
0
def post_article():
    """
    发表新文章后,首先调用该函数进行文章分析和数据库存入
    :return:
    """
    try:
        jdata = json.loads(request.data)
        media_id = jdata['media_id']
        items = jdata['items']
        update_time = jdata['update_time']
        # admin_id = jdata['admin_id']
        for item in items:
            article_title = item['article_title']
            article_content = item['article_content']
            article_thumb_id = item['article_thumb_id']
            article_id = hashlib.md5(media_id + article_thumb_id).hexdigest()
            article_url = item['article_url']
            article_post_user = item['article_post_user']
            article_post_date = update_time
            a_topiclist = tagging_utils.passage_second_level_classify(article_content)
            atags = {}
            for topic in a_topiclist:
                atags[topic['topic_tag']] = topic['topic_prob']
            article = Article(a_id=article_id, a_title=article_title, post_user=article_post_user,
                              post_date=article_post_date, a_tags=atags, a_url=article_url, a_content=article_content)
            DAO_utils.mongo_insert_article(article)
        resp = make_response(json.dumps({'code': 0, 'msg': 'success'}), 200)
    except KeyError, ke:
        print ke
        resp = make_response(
            json.dumps({'code': 103,
                        'msg': 'request key error, details=%s' % str(ke)}), 500)
def tag_article_and_save(root_path):
    dir_list = os.listdir(root_path)
    for d in dir_list:
        if os.path.isdir(os.path.join(root_path, d)):
            flist = os.listdir(os.path.join(root_path, d))
            for f in flist:
                if os.path.isfile(os.path.join(root_path, d, f)):
                    print 'processing %s ...' % f
                    fjson = json.loads(open(os.path.join(root_path, d, f)).read())
                    content = fjson['post_content']
                    post_date = datetime.datetime.strptime(fjson['post_date'], '%Y-%m-%d')
                    post_title = fjson['post_title']
                    post_user = fjson['post_user']
                    a_tags_list = passage_second_level_classify(content)
                    a_tags = {}
                    for topic in a_tags_list:
                        a_tags[topic['topic_tag']] = topic['topic_prob']
                    a_id = hashlib.md5(post_title + fjson['post_date']).hexdigest()
                    article = Article.Article(a_id, post_title, post_user, a_tags, content, post_date)
                    wechat_analyzer.DAO_utils.mongo_insert_article(article)
Esempio n. 3
0
def classify_passage_boson_url():
    url = request.form['url']
    content = get_content(url)
    # print 'top_k=%s' % top_k
    classify_result = requests.post('http://bosonnlp.com/analysis/category', {'data': content}).content
    keyword_result = json.loads(
        requests.post('http://bosonnlp.com/analysis/key?top_k=%s' % 100, {'data': content}).content)
    class_dict = {0: u'体育', 1: u'教育', 2: u'财经', 3: u'社会',
                  4: u'娱乐', 5: u'军事', 6: u'国内',
                  7: u'科技', 8: '互联网', 9: u'房产', 10: u'国际',
                  11: u'女人', 12: u'汽车', 13: u'游戏'}
    print classify_result
    classify_result = int(re.compile('\d+').findall(classify_result)[0])
    jieba_textrank = jieba.analyse.textrank(content, topK=15)
    jieba_keywords = jieba.analyse.extract_tags(content, allowPOS=['n', 'vn', 'ns', 'v'], topK=15)
    topic_list = tagging_utils.passage_second_level_classify(content)
    resp = make_response(
        json.dumps({'code': 0, 'class': class_dict[classify_result], 'keyword': keyword_result,
                    'jieba_textrank': jieba_textrank, 'jieba_keywords': jieba_keywords,
                    'topic_list': topic_list},
                   ensure_ascii=False),
        200)
    return resp