Beispiel #1
0
def subeventpie():
    """子观点占比
    """
    topic_name = request.args.get('query', default_topic_name)  # 话题名
    # topic_name = u'APEC2014-微博'
    topicid = em.getEventIDByName(topic_name)

    eventcomment = EventComments(topicid)
    comments = eventcomment.getAllNewsComments()

    cluster_ratio = dict()
    for comment in comments:
        if 'clusterid' in comment:
            clusterid = comment['clusterid']

            try:
                cluster_ratio[clusterid] += 1
            except KeyError:
                cluster_ratio[clusterid] = 1

    results = dict()
    total_count = sum(cluster_ratio.values())
    for clusterid, ratio in cluster_ratio.iteritems():
        feature = eventcomment.get_feature_words(clusterid)
        if feature and len(feature):
            results[','.join(feature[:3])] = float(ratio) / float(total_count)

    return json.dumps(results)
Beispiel #2
0
def ratio():
    """子观点占比
    """
    topic_name = request.args.get('query', default_topic_name) # 话题名
    news_id = request.args.get('news_id', default_news_id)
    topicid = em.getEventIDByName(topic_name)

    eventcomment = EventComments(topicid)
    comments = eventcomment.getNewsComments(news_id)

    cluster_ratio = dict()
    for comment in comments:
        if 'clusterid' in comment:
            clusterid = comment['clusterid']

            try:
                cluster_ratio[clusterid] += 1
            except KeyError:
                cluster_ratio[clusterid] = 1

    results = dict()
    total_count = sum(cluster_ratio.values())
    for clusterid, ratio in cluster_ratio.iteritems():
        feature = eventcomment.get_feature_words(clusterid)
        if feature and len(feature):
            results[','.join(feature[:3])] = float(ratio) / float(total_count)

    return json.dumps(results)
Beispiel #3
0
def sentimentpie():
    """
    情绪占比
    """
    topic_name = request.args.get('query', default_topic_name)  # 话题名
    # topic_name = u'APEC2014-微博'
    topicid = em.getEventIDByName(topic_name)

    eventcomment = EventComments(topicid)
    comments = eventcomment.getAllNewsComments()

    senti_dict = {0: '中性', 1: '积极', 2: '愤怒', 3: '悲伤'}
    senti_ratio = dict()
    for comment in comments:
        if 'sentiment' in comment:
            sentiment = comment['sentiment']

            try:
                senti_ratio[sentiment] += 1
            except KeyError:
                senti_ratio[sentiment] = 1

    results = dict()
    total_count = sum(senti_ratio.values())
    for sentiment, ratio in senti_ratio.iteritems():
        label = senti_dict[sentiment]
        if label and len(label):
            results[label] = float(ratio) / float(total_count)

    return json.dumps(results)
Beispiel #4
0
def sentiratio():
    """
    情绪占比
    """
    topic_name = request.args.get('query', default_topic_name) # 话题名
    news_id = request.args.get('news_id', default_news_id)
    topicid = em.getEventIDByName(topic_name)

    eventcomment = EventComments(topicid)
    comments = eventcomment.getNewsComments(news_id)

    senti_dict = {
            0:'中性',
            1:'积极',
            2:'愤怒',
            3:'悲伤'
        }
    senti_ratio = dict()
    for comment in comments:
        if 'sentiment' in comment:
            sentiment = comment['sentiment']

            try:
                senti_ratio[sentiment] += 1
            except KeyError:
                senti_ratio[sentiment] = 1

    results = dict()
    total_count = sum(senti_ratio.values())
    for sentiment, ratio in senti_ratio.iteritems():
        label = senti_dict[sentiment]
        if label and len(label):
            results[label] = float(ratio) / float(total_count)

    return json.dumps(results)
Beispiel #5
0
def commments():
    """
    查看有无评论
    """
    topic_name = request.args.get('query', default_topic_name)
    news_id = request.args.get('news_id', default_news_id)
    topicid = em.getEventIDByName(topic_name)

    eventcomment = EventComments(topicid)
    comments = eventcomment.getNewsComments(news_id)
    if comments:
        return json.dumps({"status": "success"})
    else:
        return json.dumps({"status": "fail"})
Beispiel #6
0
def commments():
    """
    查看有无评论
    """
    topic_name = request.args.get('query', default_topic_name)
    news_id = request.args.get('news_id', default_news_id)
    topicid = em.getEventIDByName(topic_name)

    eventcomment = EventComments(topicid)
    comments = eventcomment.getNewsComments(news_id)
    if comments:
        return json.dumps({"status":"success"})
    else:
        return json.dumps({"status":"fail"})
Beispiel #7
0
def keywords():
    """关键词
    """
    topic_name = request.args.get('query', default_topic_name)  # 话题名
    news_id = request.args.get('news_id', default_news_id)
    topicid = em.getEventIDByName(topic_name)

    eventcomment = EventComments(topicid)
    clusterids = eventcomment.get_cluster_ids(news_id)

    results = dict()
    for clusterid in clusterids:
        fwords = eventcomment.get_feature_words(clusterid)
        results[clusterid] = [fwords[:5], fwords]

    return json.dumps(results)
Beispiel #8
0
def keywords():
    """关键词
    """
    topic_name = request.args.get('query', default_topic_name) # 话题名
    news_id = request.args.get('news_id', default_news_id)
    topicid = em.getEventIDByName(topic_name)

    eventcomment = EventComments(topicid)
    clusterids = eventcomment.get_cluster_ids(news_id)

    results = dict()
    for clusterid in clusterids:
        fwords = eventcomment.get_feature_words(clusterid)
        results[clusterid] = [fwords[:5], fwords]

    return json.dumps(results)
Beispiel #9
0
def index():
    """返回页面
    """
    topic_name = request.args.get('query', default_topic_name) # 话题名
    news_id = request.args.get('news_id', default_news_id)
    topicid = em.getEventIDByName(topic_name)
    eventcomment = EventComments(topicid)

    news = News(news_id, topicid)
    news_subeventid = news.get_news_subeventid()
    news_url = news.get_news_url()

    comments = eventcomment.getNewsComments(news_id)
    if not comments:
        return 'no comments'

    return render_template('index/comment.html', topic=topic_name, topic_id=topicid, \
            news_id=news_id, news_subeventid=news_subeventid, news_url=news_url)
Beispiel #10
0
def index():
    """返回页面
    """
    topic_name = request.args.get('query', default_topic_name)  # 话题名
    news_id = request.args.get('news_id', default_news_id)
    topicid = em.getEventIDByName(topic_name)
    eventcomment = EventComments(topicid)

    news = News(news_id, topicid)
    news_subeventid = news.get_news_subeventid()
    news_url = news.get_news_url()

    comments = eventcomment.getNewsComments(news_id)
    if not comments:
        return 'no comments'

    return render_template('index/comment.html', topic=topic_name, topic_id=topicid, \
            news_id=news_id, news_subeventid=news_subeventid, news_url=news_url)
Beispiel #11
0
def cluster():
    """展现聚类结果
    """
    topic_name = request.args.get('query', default_topic_name)  # 话题名
    news_id = request.args.get('news_id', default_news_id)
    sort_by = request.args.get('sort', 'weight')
    topicid = em.getEventIDByName(topic_name)

    eventcomment = EventComments(topicid)
    comments = eventcomment.getNewsComments(news_id)
    cluster_results = dict()
    for comment in comments:
        if 'clusterid' in comment:
            clusterid = comment['clusterid']
            try:
                cluster_results[clusterid].append(comment)
            except KeyError:
                cluster_results[clusterid] = [comment]
    '''
    sentiment_dict = dict()
    for clusterid, comments in cluster_results.iteritems():
        positive = 0
        negative = 0
        for c in comments:
            if c['sentiment'] == 1:
                positive += 1
            if c['sentiment'] in [2, 3]:
                negative += 1

        sentiment_dict[clusterid] = u'(积极:' + str(positive) + ',' + u'消极:' + str(negative) + ')'
    '''

    results = dict()
    for clusterid in cluster_results:
        feature = eventcomment.get_feature_words(clusterid)
        if feature and len(feature):
            cluster_results[clusterid].sort(key=lambda c: c[sort_by],
                                            reverse=True)
            results[clusterid] = [
                ','.join(feature[:5]), cluster_results[clusterid]
            ]

    return json.dumps(results)
Beispiel #12
0
def urlsearch():
    """返回页面
    """
    topic_name = request.args.get('query', default_topic_name) # 话题名
    topicid = em.getEventIDByName(topic_name)

    news_url = request.args.get('url', default_news_url) # news url
    news_url = 'http://news.sina.com.cn/c/2014-10-09/145630963839.shtml'

    event = Event(topicid)
    news_id = event.get_news_id_by_url(news_url)
    if not news_id:
        return json.dumps({"news_id":None})

    eventcomment = EventComments(topicid)
    comments = eventcomment.getNewsComments(news_id)
    if not comments:
        return json.dumps({"news_id":None})

    return json.dumps({"news_id":news_id})
Beispiel #13
0
def urlsearch():
    """返回页面
    """
    topic_name = request.args.get('query', default_topic_name)  # 话题名
    topicid = em.getEventIDByName(topic_name)

    news_url = request.args.get('url', default_news_url)  # news url
    news_url = 'http://news.sina.com.cn/c/2014-10-09/145630963839.shtml'

    event = Event(topicid)
    news_id = event.get_news_id_by_url(news_url)
    if not news_id:
        return json.dumps({"news_id": None})

    eventcomment = EventComments(topicid)
    comments = eventcomment.getNewsComments(news_id)
    if not comments:
        return json.dumps({"news_id": None})

    return json.dumps({"news_id": news_id})
Beispiel #14
0
def sentiment():
    """
    主观微博
    """
    topic_name = request.args.get('query', default_topic_name)
    # topic_name = u'APEC2014-微博'
    topicid = em.getEventIDByName(topic_name)

    eventcomment = EventComments(topicid)
    comments = eventcomment.getAllNewsComments()

    sentiment_comments = dict()
    for comment in comments:
        if 'sentiment' in comment:
            sentiment = comment['sentiment']
            try:
                sentiment_comments[sentiment].append(comment)
            except KeyError:
                sentiment_comments[sentiment] = [comment]
    return json.dumps(sentiment_comments)
Beispiel #15
0
def sentiment():
    """
    主观微博
    """
    topic_name = request.args.get('query', default_topic_name)
    # topic_name = u'APEC2014-微博'
    topicid = em.getEventIDByName(topic_name)

    eventcomment = EventComments(topicid)
    comments = eventcomment.getAllNewsComments()

    sentiment_comments = dict()
    for comment in comments:
        if 'sentiment' in comment:
            sentiment = comment['sentiment']
            try:
                sentiment_comments[sentiment].append(comment)
            except KeyError:
                sentiment_comments[sentiment] = [comment]
    return json.dumps(sentiment_comments)
Beispiel #16
0
def cluster():
    """展现聚类结果
    """
    topic_name = request.args.get('query', default_topic_name) # 话题名
    news_id = request.args.get('news_id', default_news_id)
    sort_by = request.args.get('sort', 'weight')
    topicid = em.getEventIDByName(topic_name)

    eventcomment = EventComments(topicid)
    comments = eventcomment.getNewsComments(news_id)
    cluster_results = dict()
    for comment in comments:
        if 'clusterid' in comment:
            clusterid = comment['clusterid']
            try:
                cluster_results[clusterid].append(comment)
            except KeyError:
                cluster_results[clusterid] = [comment]

    '''
    sentiment_dict = dict()
    for clusterid, comments in cluster_results.iteritems():
        positive = 0
        negative = 0
        for c in comments:
            if c['sentiment'] == 1:
                positive += 1
            if c['sentiment'] in [2, 3]:
                negative += 1

        sentiment_dict[clusterid] = u'(积极:' + str(positive) + ',' + u'消极:' + str(negative) + ')'
    '''

    results = dict()
    for clusterid in cluster_results:
        feature = eventcomment.get_feature_words(clusterid)
        if feature and len(feature):
            cluster_results[clusterid].sort(key=lambda c: c[sort_by], reverse=True)
            results[clusterid] = [','.join(feature[:5]), cluster_results[clusterid]]

    return json.dumps(results)
Beispiel #17
0
def index():
    """返回页面
    """
    topic_name = request.args.get('query', default_weibo_topic_name) # 话题名
    news_id = request.args.get('news_id', default_weibo_news_id)
    topicid = em.getEventIDByName(topic_name)
    cluster_num = request.args.get('cluster_num', default_cluster_num)
    cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size)
    vsm = request.args.get('vsm', default_vsm)
    calculation_label = int(request.args.get('calcu', 1)) # 默认进行重新计算, 0表示从从已有结果数据文件加载数据

    news = News(news_id, topicid)
    news_subeventid = news.get_news_subeventid()
    if not news_subeventid:
        news_subeventid = 'None'
    eventcomment = EventComments(topicid)
    comments = eventcomment.getNewsComments(news_id)

    return render_template('index/weibo.html', topic=topic_name, topic_id=topicid, \
            news_id=news_id, news_subeventid=news_subeventid, cluster_num=cluster_num,\
            cluster_eva_min_size=cluster_eva_min_size, vsm=vsm, calc_label=calculation_label)
Beispiel #18
0
def index():
    """返回页面
    """
    topic_name = request.args.get('query', default_weibo_topic_name) # 话题名
    # topic_name = u'APEC2014-微博'
    news_id = request.args.get('news_id', default_weibo_news_id)
    topicid = em.getEventIDByName(topic_name)
    min_cluster_num = request.args.get('min_cluster_num', default_min_cluster_num)
    max_cluster_num = request.args.get('max_cluster_num', default_max_cluster_num)
    cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size)
    vsm = request.args.get('vsm', default_vsm)

    news = News(news_id, topicid)
    news_subeventid = news.get_news_subeventid()
    if not news_subeventid:
        news_subeventid = 'None'
    eventcomment = EventComments(topicid)
    comments = eventcomment.getNewsComments(news_id)

    return render_template('index/weibo.html', topic=topic_name, topic_id=topicid, \
            news_id=news_id, news_subeventid=news_subeventid, min_cluster_num=min_cluster_num,\
            max_cluster_num=max_cluster_num, cluster_eva_min_size=cluster_eva_min_size, vsm=vsm)
Beispiel #19
0
def sentiment():
    """评论情绪
    """
    topic_name = request.args.get('query', default_topic_name) # 话题名
    news_id = request.args.get('news_id', default_news_id)
    sort_by = request.args.get('sort', 'weight')
    topicid = em.getEventIDByName(topic_name)

    eventcomment = EventComments(topicid)
    comments = eventcomment.getNewsComments(news_id)

    sentiment_comments = dict()
    for comment in comments:
        if 'sentiment' in comment:
            sentiment = comment['sentiment']
            try:
                sentiment_comments[sentiment].append(comment)
            except KeyError:
                sentiment_comments[sentiment] = [comment]
    for sentiment in sentiment_comments:
        sentiment_comments[sentiment].sort(key=lambda c:c[sort_by], reverse=True)

    return json.dumps(sentiment_comments)
Beispiel #20
0
def sentiment():
    """评论情绪
    """
    topic_name = request.args.get('query', default_topic_name)  # 话题名
    news_id = request.args.get('news_id', default_news_id)
    sort_by = request.args.get('sort', 'weight')
    topicid = em.getEventIDByName(topic_name)

    eventcomment = EventComments(topicid)
    comments = eventcomment.getNewsComments(news_id)

    sentiment_comments = dict()
    for comment in comments:
        if 'sentiment' in comment:
            sentiment = comment['sentiment']
            try:
                sentiment_comments[sentiment].append(comment)
            except KeyError:
                sentiment_comments[sentiment] = [comment]
    for sentiment in sentiment_comments:
        sentiment_comments[sentiment].sort(key=lambda c: c[sort_by],
                                           reverse=True)

    return json.dumps(sentiment_comments)
Beispiel #21
0
def comments_list():
    if os.path.exists(temp_file):
        os.remove(temp_file)
    AB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../public/')
    sys.path.append(AB_PATH)
    from comment_module import comments_calculation_v2

    topicid = request.args.get('topicid', default_topic_id)
    subeventid = request.args.get('subeventid', 'global')
    min_cluster_num = request.args.get('min_cluster_num', default_min_cluster_num)
    max_cluster_num = request.args.get('max_cluster_num', default_max_cluster_num)
    cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size)
    vsm = request.args.get('vsm', default_vsm)

    ec = EventComments(topicid)
    if subeventid == 'global':
        comments = ec.getAllNewsComments()
    else:
        comments = ec.getCommentsBySubeventid(subeventid)

    if not comments:
        return json.dumps({"status":"fail"})

    cal_results = comments_calculation_v2(comments, int(min_cluster_num), int(max_cluster_num), int(cluster_eva_min_size), vsm)
    features = cal_results['cluster_infos']['features']
    item_infos = cal_results['item_infos']

    senti_dict = {
            0:'中性',
            1:'积极',
            2:'愤怒',
            3:'悲伤'
        }

    cluster_ratio = dict()
    senti_ratio = dict()
    sentiment_results = dict()
    cluster_results = dict()
    for comment in item_infos:
        if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense') : 
            clusterid = comment['clusterid']

            try:
                cluster_ratio[clusterid] += 1
            except KeyError:
                cluster_ratio[clusterid] = 1
            try:
                cluster_results[clusterid].append(comment)
            except KeyError:
                cluster_results[clusterid] = [comment]

        if ('sentiment' in comment) and (comment['sentiment'] in senti_dict) and ('clusterid' in comment) \
                and (comment['clusterid'][:8] != 'nonsense'):
            sentiment = comment['sentiment']

            try:
                senti_ratio[sentiment] += 1
            except KeyError:
                senti_ratio[sentiment] = 1
            try:
                sentiment_results[sentiment].append(comment)
            except KeyError:
                sentiment_results[sentiment] = [comment]

    ratio_results = dict()
    ratio_total_count = sum(cluster_ratio.values())
    for clusterid, ratio in cluster_ratio.iteritems():
        if clusterid in features:
            feature = features[clusterid]
            if feature and len(feature):
                ratio_results[','.join(feature[:3])] = float(ratio) / float(ratio_total_count)

    sentiratio_results = dict()
    sentiratio_total_count = sum(senti_ratio.values())
    for sentiment, ratio in senti_ratio.iteritems():
        if sentiment in senti_dict:
            label = senti_dict[sentiment]
            if label and len(label):
                sentiratio_results[label] = float(ratio) / float(sentiratio_total_count)

    # 情感分类去重
    sentiment_dump_dict = dict()
    for sentiment, contents in sentiment_results.iteritems():
        dump_dict = dict()
        for comment in contents:
            same_from_sentiment = comment["same_from_sentiment"]
            try:
                dump_dict[same_from_sentiment].append(comment)
            except KeyError:
                dump_dict[same_from_sentiment] = [comment]
        sentiment_dump_dict[sentiment] = dump_dict


    # 子观点分类去重
    cluster_dump_dict = dict()
    for clusterid, contents in cluster_results.iteritems():
        if clusterid in features:
            feature = features[clusterid]
            if feature and len(feature):
                dump_dict = dict()
                for comment in contents:
                    same_from_cluster = comment["same_from"]
                    try:
                        dump_dict[same_from_cluster].append(comment)
                    except KeyError:
                        dump_dict[same_from_cluster] = [comment]
                    cluster_dump_dict[clusterid] = dump_dict

    dump_file = open(temp_file, 'w')
    dump_file.write(json.dumps({"features":features, "senti_dump_dict":sentiment_dump_dict,\
            "cluster_dump_dict":cluster_dump_dict}));
    dump_file.close();

    return json.dumps({"ratio":ratio_results, "sentiratio":sentiratio_results,})
Beispiel #22
0
def comments_list():

    taskid = request.args.get('taskid', default_task_id)
    cluster_num = request.args.get('cluster_num', '')  #若无此参数,取-1;否则取用户设定值
    if cluster_num == '':
        cluster_num = default_cluster_num
    cluster_eva_min_size = request.args.get('cluster_eva_min_size',
                                            default_cluster_eva_min_size)
    vsm = request.args.get('vsm', default_vsm)

    temp_file = taskid + temp_file_post
    if os.path.exists(temp_file):
        os.remove(temp_file)

    ec = EventComments(taskid)
    comments = ec.getAllNewsComments()

    if not comments:
        return json.dumps({"status": "fail"})

    cal_results = comments_calculation_v2(
        comments, cluster_eva_min_size=int(cluster_eva_min_size), version=vsm)
    features = cal_results['cluster_infos']['features']
    item_infos = cal_results['item_infos']

    senti_dict = {0: '中性', 1: '积极', 2: '愤怒', 3: '悲伤'}

    cluster_ratio = dict()
    senti_ratio = dict()
    sentiment_results = dict()
    cluster_results = dict()
    for comment in item_infos:
        if ('clusterid'
                in comment) and (comment['clusterid'][:8] != 'nonsense'):
            clusterid = comment['clusterid']

            try:
                cluster_ratio[clusterid] += 1
            except KeyError:
                cluster_ratio[clusterid] = 1
            try:
                cluster_results[clusterid].append(comment)
            except KeyError:
                cluster_results[clusterid] = [comment]

        if ('sentiment' in comment) and (comment['sentiment'] in senti_dict) and ('clusterid' in comment) \
                and (comment['clusterid'][:8] != 'nonsense'):
            sentiment = comment['sentiment']

            try:
                senti_ratio[sentiment] += 1
            except KeyError:
                senti_ratio[sentiment] = 1
            try:
                sentiment_results[sentiment].append(comment)
            except KeyError:
                sentiment_results[sentiment] = [comment]

    ratio_results = dict()
    ratio_total_count = sum(cluster_ratio.values())
    for clusterid, ratio in cluster_ratio.iteritems():
        if clusterid in features:
            feature = features[clusterid]
            if feature and len(feature):
                ratio_results[','.join(
                    feature[:3])] = float(ratio) / float(ratio_total_count)

    sentiratio_results = dict()
    sentiratio_total_count = sum(senti_ratio.values())
    for sentiment, ratio in senti_ratio.iteritems():
        if sentiment in senti_dict:
            label = senti_dict[sentiment]
            if label and len(label):
                sentiratio_results[label] = float(ratio) / float(
                    sentiratio_total_count)

    # 情感分类去重
    sentiment_dump_dict = dict()
    for sentiment, contents in sentiment_results.iteritems():
        dump_dict = dict()
        for comment in contents:
            same_from_sentiment = comment["same_from_sentiment"]
            try:
                dump_dict[same_from_sentiment].append(comment)
            except KeyError:
                dump_dict[same_from_sentiment] = [comment]
        sentiment_dump_dict[sentiment] = dump_dict

    # 子观点分类去重
    cluster_dump_dict = dict()
    for clusterid, contents in cluster_results.iteritems():
        if clusterid in features:
            feature = features[clusterid]
            if feature and len(feature):
                dump_dict = dict()
                for comment in contents:
                    same_from_cluster = comment["same_from"]
                    try:
                        dump_dict[same_from_cluster].append(comment)
                    except KeyError:
                        dump_dict[same_from_cluster] = [comment]
                    cluster_dump_dict[clusterid] = dump_dict

    dump_file = open(temp_file, 'w')
    dump_file.write(json.dumps({"features":features, "senti_dump_dict":sentiment_dump_dict,\
            "cluster_dump_dict":cluster_dump_dict, "ratio":ratio_results, "sentiratio": sentiratio_results}))
    dump_file.close()

    return json.dumps({
        "ratio": ratio_results,
        "sentiratio": sentiratio_results
    })
Beispiel #23
0
def comments_list():
    if os.path.exists(temp_file):
        os.remove(temp_file)
    AB_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                           '../../public/')
    sys.path.append(AB_PATH)
    from comment_module import comments_calculation_v2

    topicid = request.args.get('topicid', default_topic_id)
    subeventid = request.args.get('subeventid', 'global')
    min_cluster_num = request.args.get('min_cluster_num',
                                       default_min_cluster_num)
    max_cluster_num = request.args.get('max_cluster_num',
                                       default_max_cluster_num)
    cluster_eva_min_size = request.args.get('cluster_eva_min_size',
                                            default_cluster_eva_min_size)
    vsm = request.args.get('vsm', default_vsm)

    ec = EventComments(topicid)
    if subeventid == 'global':
        comments = ec.getAllNewsComments()
    else:
        comments = ec.getCommentsBySubeventid(subeventid)

    if not comments:
        return json.dumps({"status": "fail"})

    cal_results = comments_calculation_v2(comments, int(min_cluster_num),
                                          int(max_cluster_num),
                                          int(cluster_eva_min_size), vsm)
    features = cal_results['cluster_infos']['features']
    item_infos = cal_results['item_infos']

    senti_dict = {0: '中性', 1: '积极', 2: '愤怒', 3: '悲伤'}

    cluster_ratio = dict()
    senti_ratio = dict()
    sentiment_results = dict()
    cluster_results = dict()
    for comment in item_infos:
        if ('clusterid'
                in comment) and (comment['clusterid'][:8] != 'nonsense'):
            clusterid = comment['clusterid']

            try:
                cluster_ratio[clusterid] += 1
            except KeyError:
                cluster_ratio[clusterid] = 1
            try:
                cluster_results[clusterid].append(comment)
            except KeyError:
                cluster_results[clusterid] = [comment]

        if ('sentiment' in comment) and (comment['sentiment'] in senti_dict) and ('clusterid' in comment) \
                and (comment['clusterid'][:8] != 'nonsense'):
            sentiment = comment['sentiment']

            try:
                senti_ratio[sentiment] += 1
            except KeyError:
                senti_ratio[sentiment] = 1
            try:
                sentiment_results[sentiment].append(comment)
            except KeyError:
                sentiment_results[sentiment] = [comment]

    ratio_results = dict()
    ratio_total_count = sum(cluster_ratio.values())
    for clusterid, ratio in cluster_ratio.iteritems():
        if clusterid in features:
            feature = features[clusterid]
            if feature and len(feature):
                ratio_results[','.join(
                    feature[:3])] = float(ratio) / float(ratio_total_count)

    sentiratio_results = dict()
    sentiratio_total_count = sum(senti_ratio.values())
    for sentiment, ratio in senti_ratio.iteritems():
        if sentiment in senti_dict:
            label = senti_dict[sentiment]
            if label and len(label):
                sentiratio_results[label] = float(ratio) / float(
                    sentiratio_total_count)

    # 情感分类去重
    sentiment_dump_dict = dict()
    for sentiment, contents in sentiment_results.iteritems():
        dump_dict = dict()
        for comment in contents:
            same_from_sentiment = comment["same_from_sentiment"]
            try:
                dump_dict[same_from_sentiment].append(comment)
            except KeyError:
                dump_dict[same_from_sentiment] = [comment]
        sentiment_dump_dict[sentiment] = dump_dict

    # 子观点分类去重
    cluster_dump_dict = dict()
    for clusterid, contents in cluster_results.iteritems():
        if clusterid in features:
            feature = features[clusterid]
            if feature and len(feature):
                dump_dict = dict()
                for comment in contents:
                    same_from_cluster = comment["same_from"]
                    try:
                        dump_dict[same_from_cluster].append(comment)
                    except KeyError:
                        dump_dict[same_from_cluster] = [comment]
                    cluster_dump_dict[clusterid] = dump_dict

    dump_file = open(temp_file, 'w')
    dump_file.write(json.dumps({"features":features, "senti_dump_dict":sentiment_dump_dict,\
            "cluster_dump_dict":cluster_dump_dict}))
    dump_file.close()

    return json.dumps({
        "ratio": ratio_results,
        "sentiratio": sentiratio_results,
    })
Beispiel #24
0
def ratio():
    """子观点占比
    """
    if os.path.exists(temp_file):
        os.remove(temp_file)

    topic_name = request.args.get('query', default_weibo_topic_name) # 话题名
    news_id = request.args.get('news_id', default_weibo_news_id)
    topicid = em.getEventIDByName(topic_name)
    cluster_num = request.args.get('cluster_num', default_cluster_num)
    if cluster_num == default_cluster_num:
        cluster_num = -1
    cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size)
    vsm = request.args.get('vsm', default_vsm)
    calculation_label = int(request.args.get('calcu', 1)) # 默认进行重新计算, 0表示从从已有结果数据文件加载数据

    eventcomment = EventComments(topicid)
    comments = eventcomment.getNewsComments(news_id)
    if not comments:
        return json.dumps({"status":"fail"})

    cal_results = comments_calculation_v2(comments, cluster_num=cluster_num, \
            cluster_eva_min_size=int(cluster_eva_min_size), version=vsm)
    features = cal_results['cluster_infos']['features']
    item_infos = cal_results['item_infos']

    cluster_ratio = dict()
    senti_ratio = dict()
    sentiment_results = dict()
    cluster_results = dict()
    for comment in item_infos:
        if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense') : 
            clusterid = comment['clusterid']

            try:
                cluster_ratio[clusterid] += 1
            except KeyError:
                cluster_ratio[clusterid] = 1
            try:
                cluster_results[clusterid].append(comment)
            except KeyError:
                cluster_results[clusterid] = [comment]

        if ('sentiment' in comment) and (comment['sentiment'] in emotions_vk_v1) \
                and ('clusterid' in comment) \
                and (comment['clusterid'][:8] != 'nonsense'):
            sentiment = comment['sentiment']

            try:
                senti_ratio[sentiment] += 1
            except KeyError:
                senti_ratio[sentiment] = 1
            try:
                sentiment_results[sentiment].append(comment)
            except KeyError:
                sentiment_results[sentiment] = [comment]

    ratio_results = dict()
    ratio_total_count = sum(cluster_ratio.values())
    for clusterid, ratio in cluster_ratio.iteritems():
        if clusterid in features:
            feature = features[clusterid]
            if feature and len(feature):
                ratio_results[','.join(feature[:3])] = float(ratio) / float(ratio_total_count)

    sentiratio_results = dict()
    sentiratio_total_count = sum(senti_ratio.values())
    for sentiment, ratio in senti_ratio.iteritems():
        if sentiment in emotions_vk_v1:
            label = emotions_vk_v1[sentiment]
            if label and len(label):
                sentiratio_results[label] = float(ratio) / float(sentiratio_total_count)

    # 情感分类去重
    sentiment_dump_dict = dict()
    for sentiment, contents in sentiment_results.iteritems():
        dump_dict = dict()
        for comment in contents:
            same_from_sentiment = comment["same_from_sentiment"]
            try:
                dump_dict[same_from_sentiment].append(comment)
            except KeyError:
                dump_dict[same_from_sentiment] = [comment]
        sentiment_dump_dict[sentiment] = dump_dict


    # 子观点分类去重
    cluster_dump_dict = dict()
    for clusterid, contents in cluster_results.iteritems():
        if clusterid in features:
            feature = features[clusterid]
            if feature and len(feature):
                dump_dict = dict()
                for comment in contents:
                    same_from_cluster = comment["same_from"]
                    try:
                        dump_dict[same_from_cluster].append(comment)
                    except KeyError:
                        dump_dict[same_from_cluster] = [comment]
                    cluster_dump_dict[clusterid] = dump_dict

    dump_file = open(temp_file, 'w')
    dump_file.write(json.dumps({"features":features, "senti_dump_dict":sentiment_dump_dict,\
            "cluster_dump_dict":cluster_dump_dict}));
    dump_file.close();

    return json.dumps({"ratio":ratio_results, "sentiratio":sentiratio_results,})
Beispiel #25
0
def comments_list():
    """计算饼图数据,并将饼图数据和去重后的推荐文本写到文件
    """
    topicid = request.args.get('topicid', default_topic_id)
    subeventid = request.args.get('subeventid', 'global')
    cluster_num = request.args.get('cluster_num', default_cluster_num)
    if cluster_num == default_cluster_num:
        cluster_num = -1
    cluster_eva_min_size = request.args.get('cluster_eva_min_size', default_cluster_eva_min_size)
    vsm = request.args.get('vsm', default_vsm)

    ec = EventComments(topicid)
    if subeventid == 'global':
        comments = ec.getAllNewsComments()
    else:
        comments = ec.getCommentsBySubeventid(subeventid)

    if not comments:
        return json.dumps({"status":"fail"})

    cal_results = comments_calculation_v2(comments, cluster_num=int(cluster_num), \
            cluster_eva_min_size=int(cluster_eva_min_size), version=vsm)
    features = cal_results['cluster_infos']['features']
    item_infos = cal_results['item_infos']

    cluster_ratio = dict()
    senti_ratio = dict()
    sentiment_results = dict()
    cluster_results = dict()
    for comment in item_infos:
        if ('clusterid' in comment) and (comment['clusterid'][:8] != 'nonsense') : 
            clusterid = comment['clusterid']

            try:
                cluster_ratio[clusterid] += 1
            except KeyError:
                cluster_ratio[clusterid] = 1
            try:
                cluster_results[clusterid].append(comment)
            except KeyError:
                cluster_results[clusterid] = [comment]

        if ('sentiment' in comment) and (comment['sentiment'] in emotions_vk_v1) and ('clusterid' in comment) \
                and (comment['clusterid'][:8] != 'nonsense'):
            sentiment = comment['sentiment']

            try:
                senti_ratio[sentiment] += 1
            except KeyError:
                senti_ratio[sentiment] = 1
            try:
                sentiment_results[sentiment].append(comment)
            except KeyError:
                sentiment_results[sentiment] = [comment]

    ratio_results = dict()
    ratio_total_count = sum(cluster_ratio.values())
    for clusterid, ratio in cluster_ratio.iteritems():
        if clusterid in features:
            feature = features[clusterid]
            if feature and len(feature):
                ratio_results[','.join(feature[:3])] = float(ratio) / float(ratio_total_count)

    sentiratio_results = dict()
    sentiratio_total_count = sum(senti_ratio.values())
    for sentiment, ratio in senti_ratio.iteritems():
        if sentiment in emotions_vk_v1:
            label = emotions_vk_v1[sentiment]
            if label and len(label):
                sentiratio_results[label] = float(ratio) / float(sentiratio_total_count)

    # 情感分类去重
    sentiment_dump_dict = dict()
    for sentiment, contents in sentiment_results.iteritems():
        dump_dict = dict()
        for comment in contents:
            same_from_sentiment = comment["same_from_sentiment"]
            try:
                dump_dict[same_from_sentiment].append(comment)
            except KeyError:
                dump_dict[same_from_sentiment] = [comment]
        sentiment_dump_dict[sentiment] = dump_dict


    # 子观点分类去重
    cluster_dump_dict = dict()
    for clusterid, contents in cluster_results.iteritems():
        if clusterid in features:
            feature = features[clusterid]
            if feature and len(feature):
                dump_dict = dict()
                for comment in contents:
                    same_from_cluster = comment["same_from"]
                    try:
                        dump_dict[same_from_cluster].append(comment)
                    except KeyError:
                        dump_dict[same_from_cluster] = [comment]
                    cluster_dump_dict[clusterid] = dump_dict

    dump_file = open(temp_file, 'w')
    dump_file.write(json.dumps({"features":features, "senti_dump_dict":sentiment_dump_dict,\
            "cluster_dump_dict":cluster_dump_dict}));
    dump_file.close();

    return json.dumps({"ratio":ratio_results, "sentiratio":sentiratio_results,})