Beispiel #1
0
def cal_propage_work(item):
    uid = item['uid']
    timestamp = item['timestamp']
    text = item['text']
    sw_list = searchWord(text.encode('utf-8'))
    sensitive = len(sw_list)

    #ip = item['geo']
    ip = item['send_ip']
    # attribute location
    if ip:
        save_city(uid, ip, timestamp, sensitive)

    # attribute activity
    date = ts2datetime(timestamp)
    ts = datetime2ts(date)
    date = date.replace('-', '')
    time_segment = (timestamp - ts) / Fifteenminutes
    save_activity(uid, date, time_segment, sensitive)

    # attribute mention
    at_uname_list = extract_uname(text)
    try:
        at_uname = at_uname_list[0]
        save_at(uid, at_uname, timestamp, sensitive)
    except:
        pass
def cal_propage_work(item):
    uid = item['uid']
    timestamp = item['timestamp']
    text = item['text']
    sw_list = searchWord(text.encode('utf-8'))
    sensitive = len(sw_list)

    #ip = item['geo']
    ip = item['send_ip']
    # attribute location
    if ip:
        save_city(uid, ip, timestamp, sensitive)

    # attribute activity
    date = ts2datetime(timestamp)
    ts = datetime2ts(date)
    date = date.replace('-','')
    time_segment = (timestamp - ts) / Fifteenminutes
    save_activity(uid, date, time_segment, sensitive)

    # attribute mention
    at_uname_list = extract_uname(text)
    try:
        at_uname = at_uname_list[0]
        save_at(uid, at_uname, timestamp, sensitive)
    except:
        pass
def cal_propage_work(item):
    uid = item['uid']
    timestamp = item['timestamp']
    text = item['text']
    sensitive_words_dict = searchWord(text.encode('utf-8', 'ignore'), DFA)
    sensitive = len(sensitive_words_dict)

    #if sensitive:
    #    r.sadd('sensitive_user', uid) # 敏感微博用户集合

    #ip = item['geo']
    ip = item['send_ip']
    # attribute location
    if ip:
        save_city(uid, ip, timestamp, sensitive)
    """
    # attribute activity
    date = ts2datetime(timestamp)
    ts = datetime2ts(date)
    time_segment = (timestamp - ts) / Fifteenminutes
    save_activity(uid, timestamp, time_segment, sensitive)

    # attribute mention
    at_uname_list = extract_uname(text)
    try:
        at_uname = at_uname_list[0]
        save_at(uid, at_uname, timestamp, sensitive)
    except:
        pass
    """

    # hashtag
    hashtag_list = extract_hashtag(text)
    if hashtag_list:
        cal_hashtag_work(uid, hashtag_list, timestamp, sensitive)
Beispiel #4
0
def accumulate_ip(item2dict):
    weibo = item2dict
    #print weibo
    ip = weibo['geo']
    uid = weibo['user']
    timestamp = weibo['timestamp']
    if ip:
        save_city(uid, ip, timestamp)
def cal_propage_work(item):
    uid = item['uid']
    timestamp = item['timestamp']
    text = item['text']
    sensitive_words_dict = searchWord(text.encode('utf-8', 'ignore'), DFA)
    sensitive = len(sensitive_words_dict)

    #if sensitive:
    #    r.sadd('sensitive_user', uid) # 敏感微博用户集合

    #ip = item['geo']
    ip = item['send_ip']
    # attribute location
    if ip:
        save_city(uid, ip, timestamp, sensitive)

    """
    # attribute activity
    date = ts2datetime(timestamp)
    ts = datetime2ts(date)
    time_segment = (timestamp - ts) / Fifteenminutes
    save_activity(uid, timestamp, time_segment, sensitive)

    # attribute mention
    at_uname_list = extract_uname(text)
    try:
        at_uname = at_uname_list[0]
        save_at(uid, at_uname, timestamp, sensitive)
    except:
        pass
    """

    # hashtag
    hashtag_list = extract_hashtag(text)
    if hashtag_list:
        cal_hashtag_work(uid, hashtag_list, timestamp, sensitive)
def cal_propage_work(item):
    
    uid = item['uid']
    print 'uid:', uid
    
    timestamp = item['timestamp']
    #ip = item['geo']
    ip = item['send_ip']
    # attribute location
    if ip:
        save_city(uid, ip, timestamp)
    # attribute activity
    date = ts2datetime(timestamp)
    ts = datetime2ts(date)
    time_segment = (timestamp - ts) / Fifteenminutes
    save_activity(uid, ts, time_segment)
    # attribute mention
    text = item['text']
    at_uname_list = extract_uname(text)
    try:
        at_uname = at_uname_list[0]
        save_at(uid, at_uname, timestamp)
    except:
        pass