Example #1
0
def main(topic, start_time, end_time):
    start_ts = datetime2ts(start_time)
    end_ts = datetime2ts(end_time) + 24 * 3600
    ##    datestrlist = []
    ##    for datestr in datestr_list:
    ##        datestr_new = datestr.replace('-', '')
    ##        datestrlist.append(datestr_new)
    query_dict = {
        'timestamp': {
            '$gt': start_ts,
            '$lt': end_ts
        },
    }
    ##    t = topic.split(',')
    ##    for ctopic in t:
    ##        query_dict['$and'].append({'topics': ctopic})
    start = time.time()
    ##    statuses_search = getXapianWeiboByDuration(datestrlist)
    ##    count, get_results = statuses_search.search(query=query_dict, fields=fields_list)
    topic_id = getTopicByName(topic)['_id']
    xapian_search_weibo = getXapianWeiboByTopic(topic_id)
    count, get_results = xapian_search_weibo.search(query=query_dict,
                                                    fields=fields_list)
    end = time.time()

    #print count
    print 'search takes %s s' % (end - start)
    weibo = []
    for r in get_results():
        weibo.append([
            r['_id'], r['user'], r['text'].encode('utf-8'), r['timestamp'],
            r['reposts_count'], r['comments_count']
        ])

    ad_main(topic, weibo, '0914', 10)  #开始进行微博数据的观点挖掘
Example #2
0
                mtype_count[v] = [end_ts, count]
                mtype_kcount[v] = [end_ts, kcount]
                mtype_weibo[v] = [end_ts, top_ws]

            save_pc_results(topic, mtype_count, during)
            save_kc_results(topic, mtype_kcount, during, k_limit)
            save_ws_results(topic, mtype_weibo, during, w_limit)


if __name__ == '__main__':
    topic = sys.argv[
        1]  # u'香港自由行' u'张灵甫遗骨疑似被埋羊圈' u'高校思想宣传' u'高校宣传思想工作' u'外滩踩踏' 'APEC' u'全军政治工作会议'
    start_date = sys.argv[2]  # '2015-02-23'
    end_date = sys.argv[3]  # '2015-03-02'

    topic = topic.decode('utf-8')
    topic_id = getTopicByName(topic)['_id']
    start_ts = datetime2ts(start_date)
    end_ts = datetime2ts(end_date)

    duration = Fifteenminutes
    xapian_search_weibo = getXapianWeiboByTopic(topic_id)

    print 'topic: ', topic.encode('utf8'), 'from %s to %s' % (start_ts, end_ts)
    propagateCronTopic(topic,
                       xapian_search_weibo,
                       start_ts,
                       end_ts,
                       during=duration)
Example #3
0
                mset = xapian_search_weibo.search(query=query_dict, sort_by=[sort_field], \
                                                  max_offset=w_limit, mset_direct=True)

                kcount = top_keywords(gen_mset_iter(xapian_search_weibo, mset, fields=['terms']), top=k_limit)
                top_ws = top_weibos(results, top=w_limit)

                mtype_count[v] = [end_ts, count]
                mtype_kcount[v] = [end_ts, kcount]
                mtype_weibo[v] = [end_ts, top_ws]

            save_pc_results(topic, mtype_count, during)
            save_kc_results(topic, mtype_kcount, during, k_limit)
            save_ws_results(topic, mtype_weibo, during, w_limit)


if __name__ == '__main__':
    topic = sys.argv[1] # u'香港自由行' u'张灵甫遗骨疑似被埋羊圈' u'高校思想宣传' u'高校宣传思想工作' u'外滩踩踏' 'APEC' u'全军政治工作会议'
    start_date = sys.argv[2] # '2015-02-23'
    end_date = sys.argv[3] # '2015-03-02'

    topic = topic.decode('utf-8')
    topic_id = getTopicByName(topic)['_id']
    start_ts = datetime2ts(start_date)
    end_ts = datetime2ts(end_date)

    duration = Fifteenminutes
    xapian_search_weibo = getXapianWeiboByTopic(topic_id)

    print 'topic: ', topic.encode('utf8'), 'from %s to %s' % (start_ts, end_ts)
    propagateCronTopic(topic, xapian_search_weibo, start_ts, end_ts, during=duration)
Example #4
0
            location_dict["mid"] = r["_id"]
            location_dict["topic"] = topic
            location_dict["ts"] = r["timestamp"]
            location_dict["origin_location"] = origin_location.split("\t")[1]
            location_dict["repost_location"] = None
            return location_dict

    return None


if __name__ == "__main__":
    START_TS = datetime2ts("2015-03-02")
    END_TS = datetime2ts("2015-03-15")

    topic = u"两会2015"
    topic_id = getTopicByName(topic)["_id"]
    print "topic: ", topic.encode("utf8")
    print topic_id, START_TS, END_TS

    xapian_search = getXapianWeiboByTopic(topic_id)
    repost_search(topic, START_TS, END_TS)
    """
    item_exist = db.session.query(CityRepost).filter(CityRepost.topic == topic).all()

    if item_exist:
        for item in item_exist:
            db.session.delete(item)
    db.session.commit()
    print 'commited'
    """