Example #1
0
def test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict, online_pattern_dict, character_start_ts,filter_keywords_dict):
    status = False
    print 'start cron_text_attribute'
    uid_list = user_keywords_dict.keys()
    
    #get user flow information: hashtag, activity_geo, keywords
    print 'get flow result'
    flow_result = get_flow_information_v2(uid_list, user_keywords_dict)
    print 'flow result len:', len(flow_result)
    
    #get user profile information
    print 'get register result'
    register_result = get_profile_information(uid_list)
    print 'register result len:', len(register_result)
    #print user_keywords_dict
    
    #get user topic and domain by bulk action
    print 'get topic and domain'
    topic_results_dict, topic_results_label = topic_classfiy(uid_list, user_keywords_dict)
    print topic_results_dict,topic_results_label

    domain_results = domain_classfiy(uid_list, user_keywords_dict)
    domain_results_dict = domain_results[0]
    domain_results_label = domain_results[1]
    print 'topic result len:', len(topic_results_dict)
    print 'domain result len:', len(domain_results_dict)
    
    #get user character attribute
    print 'get character result'
    #type_mark = 0/1 for identify the task input status---just sentiment or text
    character_start_time = ts2datetime(character_start_ts)
    character_end_time = ts2datetime(character_start_ts + DAY * CHARACTER_TIME_GAP - DAY)
    print 'character_start_time:', character_start_time
    print 'character_end_time:', character_end_time
    character_sentiment_result_dict = classify_sentiment(uid_list, user_weibo_dict, character_start_time, character_end_time, WEIBO_API_INPUT_TYPE)
    character_text_result_dict = classify_topic(uid_list, user_keywords_dict)
    print 'character result len:', len(character_sentiment_result_dict), len(character_text_result_dict)
    
    #get user fansnum max
    fansnum_max = get_fansnum_max()
    #get user activeness by bulk_action
    print 'get activeness results'
    activeness_results = get_activity_time(uid_list)
    print 'activeness result len:', len(activeness_results)
    #get user inlfuence by bulk action
    print 'get influence'
    influence_results = get_influence(uid_list)
    print 'influence results len:', len(influence_results)
    #get user sensitive by bulk action
    print 'get sensitive'
    sensitive_results, sensitive_string_results, sensitive_dict_results = get_sensitive(uid_list)
    print 'sensitive results len:', len(sensitive_results)
    # compute text attribute
    bulk_action = []
    count = 0
    for user in uid_list:
        count += 1
        results = {}       
        #get user text attribute: online_pattern
        results['online_pattern'] = json.dumps(online_pattern_dict[user])
        try:
            results['online_pattern_aggs'] = '&'.join(online_pattern_dict[user].keys())
        except:
            results['online_pattern_aggs'] = ''
        results['uid'] = str(user)
        #add user flow information: hashtag, activity_geo, keywords
        flow_dict = flow_result[str(user)]
        results = dict(results, **flow_dict)
        #jln filter keyword
        results['filter_keywords'] = json.dumps(filter_keywords_dict[user])

        #add user topic attribute
        user_topic_dict = topic_results_dict[user]
        user_label_dict = topic_results_label[user]
        results['topic'] = json.dumps(user_topic_dict)         # {'topic1_en':pro1, 'topic2_en':pro2...}
        results['topic_string'] = topic_en2ch(user_label_dict) # 'topic1_ch&topic2_ch&topic3_ch'
        
        #add user domain attribute
        user_domain_dict = domain_results_dict[user]
        user_label_dict = domain_results_label[user]
        results['domain_v3'] = json.dumps(user_domain_dict) # [label1_en, label2_en, label3_en]
        results['domain'] = domain_en2ch(user_label_dict)      # label_ch
        
        #add user character_sentiment attribute
        character_sentiment = character_sentiment_result_dict[user]
        results['character_sentiment'] = character_sentiment
        #add user character_text attribtue
        character_text = character_text_result_dict[user]
        results['character_text'] = character_text
        
        #add user profile attribute
        register_dict = register_result[str(user)]
        results = dict(results, **register_dict)
        #add user_evaluate attribute---importance
        results['importance'] = get_importance(results['domain'], results['topic_string'], results['fansnum'], fansnum_max)
        #add user_evaluate attribute---activeness
        user_activeness_time = activeness_results[user]
        user_activeness_geo = json.loads(results['activity_geo_dict'])[-1]
        results['activeness'] = get_activeness(user_activeness_geo, user_activeness_time)
        #add user_evaluate attribute---influence
        results['influence'] = influence_results[user]
        #add user sensitive attribute
        results['sensitive'] = sensitive_results[user]
        results['sensitive_dict'] = sensitive_dict_results[user]
        results['sensitive_string'] = sensitive_string_results[user]
        #bulk_action
        action = {'index':{'_id': str(user)}}
        bulk_action.extend([action, results])
        
    status = save_user_results(bulk_action)
    
    return status
Example #2
0
def test_cron_text_attribute_v2(user_keywords_dict, user_weibo_dict,
                                online_pattern_dict, character_start_ts,
                                relation_mark_dict, task_mark,
                                submit_user_dict, submit_ts_dict):
    #mark index or update
    if submit_user_dict and submit_ts_dict:
        save_type = 'index'
    else:
        save_type = 'update'
    status = False
    print 'start cron_text_attribute'
    uid_list = user_keywords_dict.keys()

    #get user flow information: hashtag, activity_geo, keywords, ip
    print 'get flow result'
    flow_result = get_flow_information_v2(uid_list, user_keywords_dict)
    print 'flow result len:', len(flow_result)

    #get user profile information
    print 'get register result'
    register_result = get_profile_information(uid_list)
    print 'register result len:', len(register_result)

    #get user topic and domain by bulk action
    print 'get topic and domain'
    topic_results_dict, topic_results_label = topic_classfiy(
        uid_list, user_keywords_dict)
    domain_results = domain_classfiy(uid_list, user_keywords_dict)
    domain_results_dict = domain_results[0]
    domain_results_label = domain_results[1]
    print 'topic result len:', len(topic_results_dict)
    print 'domain result len:', len(domain_results_dict)

    #get user fansnum max
    fansnum_max, user_fansnum_dict = get_fansnum_max(uid_list)
    print 'fansnum len:', len(user_fansnum_dict)
    #get user activeness by bulk_action
    print 'get activeness results'
    activeness_results = get_activity_time(uid_list)
    print 'activeness result len:', len(activeness_results)
    #get user inlfuence by bulk action
    print 'get influence'
    influence_results = get_influence(uid_list)
    print 'influence results len:', len(influence_results)

    # compute text attribute
    bulk_action = []
    count = 0
    for user in uid_list:
        count += 1
        results = {}
        #add submit_user and submit_ts
        if save_type == 'index':
            results['submit_user'] = submit_user_dict[user]
            results['submit_ts'] = submit_ts_dict[user]
        #get user text attribute: online_pattern
        results['online_pattern'] = json.dumps(online_pattern_dict[user])
        try:
            results['online_pattern_aggs'] = '&'.join(
                online_pattern_dict[user].keys())
        except:
            results['online_pattern_aggs'] = ''
        results['uid'] = str(user)
        #add user flow information: hashtag, activity_geo, keywords, ip
        flow_dict = flow_result[str(user)]
        results = dict(results, **flow_dict)

        #add user topic attribute
        user_topic_dict = topic_results_dict[user]
        user_label_dict = topic_results_label[user]
        results['topic'] = json.dumps(
            user_topic_dict)  # {'topic1_en':pro1, 'topic2_en':pro2...}
        results['topic_string'] = topic_en2ch(
            user_label_dict)  # 'topic1_ch&topic2_ch&topic3_ch'

        #add user domain attribute
        user_domain_dict = domain_results_dict[user]
        user_label_dict = domain_results_label[user]
        results['domain_v3'] = json.dumps(
            user_domain_dict)  # [label1_en, label2_en, label3_en]
        results['domain'] = domain_en2ch(user_label_dict)  # label_ch

        #add user profile attribute
        register_dict = register_result[str(user)]
        results = dict(results, **register_dict)
        #add user_evaluate attribute---importance
        results['importance'] = get_importance(results['domain'],
                                               results['topic_string'],
                                               user_fansnum_dict[user],
                                               fansnum_max)
        #add user_evaluate attribute---activeness
        user_activeness_time = activeness_results[user]
        user_activeness_geo = json.loads(results['activity_geo_dict'])[-1]
        results['activeness'] = get_activeness(user_activeness_geo,
                                               user_activeness_time)
        #add user_evaluate attribute---influence
        results['influence'] = influence_results[user]

        #bulk_action
        if save_type == 'index':
            action = {'index': {'_id': str(user)}}
            bulk_action.extend([action, results])
        else:
            action = {'update': {'_id': str(user)}}
            bulk_action.extend([action, {'doc': results}])

    status = save_user_results(bulk_action)
    print 'save es_user_portrait:', status
    #compute relation

    if task_mark == 'user':
        save_status = person_organization(uid_list, relation_mark_dict)
        print 'save_status:', save_status
        if status and save_status:
            status = True
        else:
            status = False
    #print 'save neo4j:', save_status

    return status