Exemple #1
0
def create_date_warning(today_datetime):

    query_body = {
        'query': {
            'match_all': {}
        },
        'size': MAX_VALUE,
        'sort': {
            'date_time': {
                'order': 'asc'
            }
        }
    }
    #try:
    result = es_xnr.search(index=weibo_date_remind_index_name,
                           doc_type=weibo_date_remind_index_type,
                           body=query_body)['hits']['hits']
    date_result = []
    for item in result:
        #计算距离日期
        date_time = item['_source']['date_time']
        year = ts2yeartime(today_datetime)
        warming_date = year + '-' + date_time
        today_date = ts2datetime(today_datetime)
        countdown_num = (datetime2ts(warming_date) -
                         datetime2ts(today_date)) / DAY

        if abs(countdown_num) < WARMING_DAY:
            #根据给定的关键词查询预警微博
            keywords = item['_source']['keywords']
            date_warming = lookup_weibo_date_warming(keywords, today_datetime)
            item['_source']['weibo_date_warming_content'] = json.dumps(
                date_warming)
            item['_source']['validity'] = 0
            item['_source']['timestamp'] = today_datetime
            now_time = int(time.time())
            task_id = str(item['_source']['create_time']) + '_' + str(now_time)
            #print 'task_id',task_id
            #写入数据库

            weibo_timing_warning_index_name = weibo_timing_warning_index_name_pre + warming_date
            print weibo_timing_warning_index_name
            mark = False
            if date_warming:
                try:
                    es_xnr.index(index=weibo_timing_warning_index_name,
                                 doc_type=weibo_timing_warning_index_type,
                                 body=item['_source'],
                                 id=task_id)
                    mark = True
                except:
                    mark = False
            else:
                pass
            date_result.append(mark)
    else:
        pass

    #except:
    #    date_result=[]
    return date_result
Exemple #2
0
def create_personal_warning(xnr_user_no, today_datetime):
    #查询关注列表
    lookup_type = 'followers_list'
    followers_list = lookup_xnr_fans_followers(xnr_user_no, lookup_type)

    #查询虚拟人uid
    xnr_uid = lookup_xnr_uid(xnr_user_no)

    #计算敏感度排名靠前的用户
    query_body = {
        # 'query':{
        #     'filtered':{
        #         'filter':{
        #             'terms':{'uid':followers_list}
        #         }
        #     }
        # },
        'aggs': {
            'followers_sensitive_num': {
                'terms': {
                    'field': 'uid'
                },
                'aggs': {
                    'sensitive_num': {
                        'sum': {
                            'field': 'sensitive'
                        }
                    }
                }
            }
        },
        'size': MAX_SEARCH_SIZE
    }

    flow_text_index_name = get_day_flow_text_index_list(today_datetime)

    try:
        first_sum_result=es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,\
        body=query_body)['aggregations']['followers_sensitive_num']['buckets']
    except:
        first_sum_result = []

    #print first_sum_result
    top_userlist = []
    for i in xrange(0, len(first_sum_result)):
        user_sensitive = first_sum_result[i]['sensitive_num']['value']
        if user_sensitive > 0:
            user_dict = dict()
            user_dict['uid'] = first_sum_result[i]['key']
            followers_mark = judge_user_type(user_dict['uid'], followers_list)
            user_dict['sensitive'] = user_sensitive * followers_mark
            top_userlist.append(user_dict)
        else:
            pass

    ####################################
    #如果是关注者则敏感度提升
    ####################################
    #查询敏感用户的敏感微博内容
    results = []
    for user in top_userlist:
        #print user
        user_detail = dict()
        user_detail['uid'] = user['uid']
        user_detail['user_sensitive'] = user['sensitive']
        # user_lookup_id=xnr_uid+'_'+user['uid']
        # print user_lookup_id
        # try:
        #     #user_result=es_xnr.get(index=weibo_feedback_follow_index_name,doc_type=weibo_feedback_follow_index_type,id=user_lookup_id)['_source']
        #     user_result=es_user_profile.get(index=profile_index_name,doc_type=profile_index_type,id=user['uid'])['_source']
        #     user_detail['user_name']=user_result['nick_name']
        # except:
        user_detail['user_name'] = get_user_nickname(user['uid'])

        query_body = {
            'query': {
                'filtered': {
                    'filter': {
                        'bool': {
                            'must': [{
                                'term': {
                                    'uid': user['uid']
                                }
                            }, {
                                'range': {
                                    'sensitive': {
                                        'gte': 1
                                    }
                                }
                            }]
                        }
                    }
                }
            },
            'size': MAX_WARMING_SIZE,
            'sort': {
                'sensitive': {
                    'order': 'desc'
                }
            }
        }

        try:
            second_result = es_flow_text.search(
                index=flow_text_index_name,
                doc_type=flow_text_index_type,
                body=query_body)['hits']['hits']
        except:
            second_result = []

        s_result = []
        #tem_word_one = '静坐'
        #tem_word_two = '集合'
        for item in second_result:
            #sensitive_words=item['_source']['sensitive_words_string']
            #if ((sensitive_words==tem_word_one) or (sensitive_words==tem_word_two)):
            #    pass
            #else:
            #查询用户昵称
            item['_source']['nick_name'] = get_user_nickname(
                item['_source']['uid'])
            s_result.append(item['_source'])

        s_result.sort(key=lambda k: (k.get('sensitive', 0)), reverse=True)
        user_detail['content'] = json.dumps(s_result)

        user_detail['xnr_user_no'] = xnr_user_no
        user_detail['validity'] = 0
        user_detail['timestamp'] = today_datetime

        #写入数据库
        today_date = ts2datetime(today_datetime)
        weibo_user_warning_index_name = weibo_user_warning_index_name_pre + today_date

        task_id = xnr_user_no + '_' + user_detail['uid']
        #print weibo_user_warning_index_name
        #print user_detail
        if s_result:
            try:
                es_xnr.index(index=weibo_user_warning_index_name,
                             doc_type=weibo_user_warning_index_type,
                             body=user_detail,
                             id=task_id)
                mark = True
            except:
                mark = False
        else:
            pass

        results.append(mark)

    return results
Exemple #3
0
def create_speech_warning(xnr_user_no, today_datetime):
    #查询关注列表
    lookup_type = 'followers_list'
    followers_list = lookup_xnr_fans_followers(xnr_user_no, lookup_type)

    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': {
                            'range': {
                                'sensitive': {
                                    'gte': 1
                                }
                            }
                        }
                    }
                }
            }
        },
        'size': MAX_SEARCH_SIZE,
        'sort': {
            'sensitive': {
                'order': 'desc'
            }
        }
    }

    flow_text_index_name = get_day_flow_text_index_list(today_datetime)

    results = es_flow_text.search(index=flow_text_index_name,
                                  doc_type=flow_text_index_type,
                                  body=query_body)['hits']['hits']
    result = []
    for item in results:
        item['_source']['nick_name'] = get_user_nickname(
            item['_source']['uid'])
        if item['_source']['uid'] in followers_list:
            item['_source']['content_type'] = 'follow'
        else:
            item['_source']['content_type'] = 'unfollow'

        item['_source']['validity'] = 0
        item['_source']['xnr_user_no'] = xnr_user_no

        task_id = xnr_user_no + '_' + item['_source']['mid']

        #写入数据库
        today_date = ts2datetime(today_datetime)
        weibo_speech_warning_index_name = weibo_speech_warning_index_name_pre + today_date
        try:
            es_xnr.index(index=weibo_speech_warning_index_name,
                         doc_type=weibo_speech_warning_index_type,
                         body=item['_source'],
                         id=task_id)
            mark = True
        except:
            mark = False

        result.append(mark)
    return result
Exemple #4
0
            #print 'save_bot_info'
            wxxnr_data = {
                'wx_id': wx_id,
                'puid': self.self.puid,
                'user_no': wxbot_id2user_no(self.wxbot_id),
                'xnr_user_no': self.wxbot_id,
                'wxbot_port': wxbot_port,
                'create_ts': int(time.time()),
                'nickname': self.self.name,
                'remark': remark,
                'submitter': submitter,
                'mail': mail,
                'access_id': access_id
            }
            es_xnr.index(index=wx_xnr_index_name,
                         doc_type=wx_xnr_index_type,
                         id=self.wxbot_id,
                         body=wxxnr_data)

    def set_default_groups(self):
        try:
            d = r.get(self.wxbot_id)
            if d:
                data = eval(d)
                create_flag = data['create_flag']
                if create_flag:
                    group_list = []
                    groups = self.groups(update=True)
                    for group in groups:
                        #load members details
                        group.update_group(members_details=True)
                        group_list.append(group.puid)
Exemple #5
0
    def proc_msg(self, msg):
        group_puid = msg.sender.puid
        if group_puid in self.groups_list:
            msg_type = msg.type
            save_flag = 0
            data = {}
            if msg_type in ['Text', 'Picture', 'Recording']:
                save_flag = 1
                data = {
                    'xnr_id': self.self.puid,
                    'xnr_name': self.self.name,
                    'group_id': group_puid,
                    'group_name': msg.sender.name,
                    'timestamp': msg.raw['CreateTime'],
                    # 'speaker_id': msg.member.puid,
                    'speaker_id': self.load_member_id(msg.member),
                    'speaker_name': msg.member.name,
                    'msg_type': msg_type
                }

                nowDate = datetime.datetime.now().strftime('%Y-%m-%d')
                index_name = wx_group_message_index_name_pre + str(nowDate)
            if msg_type == 'Text':
                text = msg.text
                data['text'] = text
                try:
                    sen_value, sen_words = sensitive_check(text.encode('utf8'))
                    if sen_value != 0:
                        sen_flag = 1  #该条信息是敏感信息
                    else:
                        sen_flag = 0
                    if msg.is_at:
                        at_flag = 1  #被@到
                    else:
                        at_flag = 0
                    data['at_flag'] = at_flag
                    data['sensitive_flag'] = sen_flag
                    data['sensitive_value'] = sen_value
                    data['sensitive_words_string'] = sen_words[
                        'sensitive_words_string']
                except Exception, e:
                    print e
            elif msg_type == 'Picture':
                '''
                #保存到七牛(已弃用,2018-1-2,hanmc)
                try:
                    #save picture
                    filename = str(msg.id) + '.png'
                    filepath = os.path.join(self.data_path, filename)
                    msg.get_file(filepath)
                    #upload picture to qiniu.com
                    token = self.qiniu.upload_token(qiniu_bucket_name, filename, 3600)
                    ret, info = put_file(token, filename, filepath,)
                    data['text'] = qiniu_bucket_domain + '/' + filename 
                    os.remove(filepath)
                except Exception,e:
                    print e
                '''
                #保存到本地
                # filename = str(msg.id) + '.png'
                filename = str(msg.id) + str(
                    msg.file_name)  #按图片类型.png .jpg .gif来存储
                filepath = os.path.join(WX_IMAGE_ABS_PATH,
                                        ts2datetime(time.time()))
                if not os.path.isdir(filepath):
                    os.mkdir(filepath)
                    #定期清理放在timed_python_files/wx_regular_cleaning.py定时文件中
                    # remove_wx_media_old_files(WX_IMAGE_ABS_PATH, period=30)
                save_path = os.path.join(filepath, filename)
                msg.get_file(save_path)
                #对图片进行压缩
                #.gif图片不处理, .png图片处理, .jpg图片处理
                image_type = filename.split('.')[-1]
                if image_type == 'gif':
                    pass
                elif image_type == 'png':
                    os.popen("optipng " + save_path + " -snip")
                elif image_type == 'jpg':
                    os.popen("jpegoptim " + save_path)
                ####
                data['text'] = os.path.join(filepath, filename)
            elif msg_type == 'Recording':
                filename = str(msg.id) + '.mp3'
                filepath = os.path.join(WX_VOICE_ABS_PATH,
                                        ts2datetime(time.time()))
                if not os.path.isdir(filepath):
                    os.mkdir(filepath)
                    #定期清理放在timed_python_files/wx_regular_cleaning.py定时文件中
                    # remove_wx_media_old_files(WX_VOICE_ABS_PATH, period=30)
                msg.get_file(save_path=os.path.join(filepath, filename))
                data['text'] = os.path.join(filepath, filename)
            #存储msg到es中
            if save_flag:
                if not es_xnr.indices.exists(index=index_name):
                    #print 'get mapping'
                    wx_group_message_mappings(index_name)
                es_xnr.index(index=index_name,
                             doc_type=wx_group_message_index_type,
                             body=data)
            #自动回复监听的群组中@自己的消息
            if msg.is_at:
                time.sleep(random.random())
                m = msg.reply(u'知道啦~')
                self.save_sent_msg(m=m,
                                   to_puid=msg.sender.puid,
                                   to_name=msg.sender.name)
Exemple #6
0
def save_to_fans_follow_ES(xnr_user_no,
                           uid,
                           save_type,
                           follow_type,
                           trace_type='ordinary_follow'):

    if save_type == 'followers':

        try:
            results = es_xnr.get(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\
                    id=xnr_user_no)

            results = results["_source"]
            if follow_type == 'follow':
                if trace_type == 'trace_follow':
                    # 添加追随关注
                    try:
                        trace_follow_uids = results['trace_follow_list']
                        trace_follow_uids_set = set(trace_follow_uids)
                        trace_follow_uids_set.add(uid)
                        trace_follow_uids = list(trace_follow_uids_set)
                    except:
                        trace_follow_uids = [uid]

                    # 添加普通关注
                    try:
                        followers_uids = results['followers_list']
                        followers_uids_set = set(followers_uids)
                        followers_uids_set.add(uid)
                        followers_uids = list(followers_uids_set)
                    except:
                        followers_uids = [uid]

                    results['followers_list'] = followers_uids
                    results['trace_follow_list'] = trace_follow_uids
                    es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\
                                id=xnr_user_no,body={'doc':results})

                else:

                    try:
                        followers_uids = results['followers_list']
                        followers_uids_set = set(followers_uids)
                        followers_uids_set.add(uid)
                        followers_uids = list(followers_uids_set)
                    except:
                        followers_uids = [uid]

                    results['followers_list'] = followers_uids

                    es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\
                                id=xnr_user_no,body={'doc':results})

            elif follow_type == 'unfollow':
                try:
                    followers_uids = results['followers_list']
                    followers_uids = list(
                        set(followers_uids).difference(set([uid])))
                    results['followers_list'] = followers_uids

                    es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\
                                id=xnr_user_no,body={'doc':results})
                except:
                    return False

        except:
            #if follow_type == 'follow':
            body_info = {}
            body_info['followers_list'] = [uid]
            body_info['xnr_user_no'] = xnr_user_no

            es_xnr.index(index=weibo_xnr_fans_followers_index_name, doc_type=weibo_xnr_fans_followers_index_type,\
                    id=xnr_user_no, body=body_info)
            #elif follow_type == 'unfollow':

    elif save_type == 'fans':
        try:
            results = es_xnr.get(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\
                    id=xnr_user_no)

            results = results["_source"]

            try:
                fans_uids = results['fans_list']
                fans_uids_set = set(fans_uids)
                fans_uids_set.add(uid)
                fans_uids = list(fans_uids_set)
                results['fans_list'] = fans_uids
            except:
                results['fans_list'] = [uid]

            es_xnr.update(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\
                        id=xnr_user_no,body={'doc':results})

        except:
            body_info = {}
            body_info['fans_list'] = [uid]
            body_info['xnr_user_no'] = xnr_user_no
            es_xnr.index(index=weibo_xnr_fans_followers_index_name, doc_type=weibo_xnr_fans_followers_index_type,\
                    id=xnr_user_no, body=body_info)

    return True
def cron_compute_mark_qq(current_time):

    current_date = ts2datetime(current_time)
    current_time_new = datetime2ts(current_date)

    xnr_results = es.search(index=qq_xnr_index_name,doc_type=qq_xnr_index_type,\
                body={'query':{'match_all':{}},'size':MAX_SEARCH_SIZE})['hits']['hits']

    if S_TYPE == 'test':
        xnr_results = [{
            '_source': {
                'xnr_user_no': 'QXNR0007',
                'qq_number': '1039598173'
            }
        }]

    for result in xnr_results:
        print 'result....', result
        xnr_user_no = result['_source']['xnr_user_no']
        qq_number = result['_source']['qq_number']
        #xnr_user_no = 'WXNR0004'
        influence_dict = get_influence_at_num(xnr_user_no, qq_number,
                                              current_time)
        penetration_dict = get_penetration_num(xnr_user_no, qq_number,
                                               current_time)
        safe_dict = qq_history_count(xnr_user_no, qq_number, current_time)

        #_id = xnr_user_no + '_' + current_date
        _id = xnr_user_no

        xnr_user_detail = {}
        xnr_user_detail['influence'] = influence_dict['mark']
        xnr_user_detail['penetration'] = penetration_dict['mark']
        xnr_user_detail['safe'] = safe_dict['mark']

        xnr_user_detail['daily_be_at_num'] = influence_dict['daily_be_at_num']
        xnr_user_detail['total_be_at_num'] = influence_dict['total_be_at_num']

        xnr_user_detail['daily_sensitive_num'] = penetration_dict[
            'sensitive_info']
        #xnr_user_detail['daily_sensitive_num'] = penetration_dict['daily_sensitive_num']

        xnr_user_detail['total_post_num'] = safe_dict['total_post_num']
        xnr_user_detail['daily_post_num'] = safe_dict['daily_post_num']

        xnr_user_detail['date_time'] = current_date
        xnr_user_detail['timestamp'] = current_time_new
        xnr_user_detail['xnr_user_no'] = xnr_user_no
        xnr_user_detail['qq_number'] = qq_number

        qq_xnr_history_count_index_name = qq_xnr_history_count_index_name_pre + current_date

        try:
            #print 'xnr_user_detail...',xnr_user_detail
            print 'qq_xnr_history_count_index_name...', qq_xnr_history_count_index_name
            qq_xnr_history_count_mappings(qq_xnr_history_count_index_name)
            es.index(index=qq_xnr_history_count_index_name,doc_type=qq_xnr_history_count_index_type,\
                id=_id,body=xnr_user_detail)

            mark = True

        except:
            mark = False

        return mark
Exemple #8
0
def create_personal_warning(xnr_user_no, today_datetime):
    #查询好友列表
    friends_list = lookup_xnr_friends(xnr_user_no)

    #查询虚拟人uid
    xnr_uid = lookup_xnr_uid(xnr_user_no)

    #计算敏感度排名靠前的用户
    query_body = {
        # 'query':{
        #     'filtered':{
        #         'filter':{
        #             'terms':{'uid':friends_list}
        #         }
        #     }
        # },
        'aggs': {
            'friends_sensitive_num': {
                'terms': {
                    'field': 'uid'
                },
                'aggs': {
                    'sensitive_num': {
                        'sum': {
                            'field': 'sensitive'
                        }
                    }
                }
            }
        },
        'size': MAX_SEARCH_SIZE
    }

    facebook_flow_text_index_name = get_timets_set_indexset_list(
        facebook_flow_text_index_name_pre, today_datetime, today_datetime)

    try:
        first_sum_result=es_xnr.search(index=facebook_flow_text_index_name,doc_type=facebook_flow_text_index_type,\
        body=query_body)['aggregations']['friends_sensitive_num']['buckets']
    except:
        first_sum_result = []

    #print 'first_sum_result',first_sum_result
    top_userlist = []
    for i in xrange(0, len(first_sum_result)):
        user_sensitive = first_sum_result[i]['sensitive_num']['value']
        if user_sensitive > 0:
            user_dict = dict()
            user_dict['uid'] = first_sum_result[i]['key']
            friends_mark = judge_user_type(user_dict['uid'], friends_list)
            user_dict['sensitive'] = user_sensitive * friends_mark
            top_userlist.append(user_dict)
        else:
            pass
    #####################
    #如果是好友,则用户敏感度计算值增加1.5倍
    #####################
    #查询敏感用户的敏感内容
    results = []
    for user in top_userlist:
        #print user
        user_detail = dict()
        user_detail['uid'] = user['uid']
        user_detail['user_sensitive'] = user['sensitive']
        user_lookup_id = user['uid']
        print user_lookup_id
        # try:
        #     #user_result=es_xnr.get(index=facebook_feedback_friends_index_name,doc_type=facebook_feedback_friends_index_type,id=user_lookup_id)['_source']
        #     user_result=es_xnr.get(index=facebook_user_index_name,doc_type=facebook_user_index_type,id=user['uid'])['_source']
        #     user_detail['user_name']=user_result['nick_name']
        # except:
        #     user_detail['user_name']=''
        user_detail['user_name'] = get_user_nickname(user['uid'])

        query_body = {
            'query': {
                'filtered': {
                    'filter': {
                        'bool': {
                            'must': [{
                                'term': {
                                    'uid': user['uid']
                                }
                            }, {
                                'range': {
                                    'sensitive': {
                                        'gte': 1
                                    }
                                }
                            }]
                        }
                    }
                }
            },
            'size': MAX_WARMING_SIZE,
            'sort': {
                'sensitive': {
                    'order': 'desc'
                }
            }
        }

        try:
            second_result = es_xnr.search(
                index=facebook_flow_text_index_name,
                doc_type=facebook_flow_text_index_type,
                body=query_body)['hits']['hits']
        except:
            second_result = []

        s_result = []
        for item in second_result:
            #查询三个指标字段
            fid_result = lookup_fid_attend_index(item['_source']['fid'],
                                                 today_datetime)
            if fid_result:
                item['_source']['comment'] = fid_result['comment']
                item['_source']['share'] = fid_result['share']
                item['_source']['favorite'] = fid_result['favorite']
            else:
                item['_source']['comment'] = 0
                item['_source']['share'] = 0
                item['_source']['favorite'] = 0
            #查询用户昵称
            item['_source']['nick_name'] = get_user_nickname(
                item['_source']['uid'])

            s_result.append(item['_source'])

        s_result.sort(key=lambda k: (k.get('sensitive', 0)), reverse=True)
        user_detail['content'] = json.dumps(s_result)

        user_detail['xnr_user_no'] = xnr_user_no
        user_detail['validity'] = 0
        user_detail['timestamp'] = today_datetime

        #写入数据库
        today_date = ts2datetime(today_datetime)
        facebook_user_warning_index_name = facebook_user_warning_index_name_pre + today_date

        task_id = xnr_user_no + '_' + user_detail['uid']
        if s_result:
            try:
                es_xnr.index(index=facebook_user_warning_index_name,
                             doc_type=facebook_user_warning_index_type,
                             body=user_detail,
                             id=task_id)
                mark = True
            except:
                mark = False
        else:
            pass

        results.append(mark)

    return results
Exemple #9
0
    u'451016634935094', u'359574464219603', u'100000353049421', u'717234834',
    u'100011204707611', u'100000065704494', u'780790723', u'100005960898332',
    u'206986566009728', u'100003481030289', u'100008144074564',
    u'135252119870284', u'100007426740391', u'1517589828', u'100014335805805',
    u'100018206347610', u'100018794590981', u'100012225906969',
    u'1466849490028320', u'100005004039054', u'1196435997092687',
    u'100010967027774', u'152100711485335', u'1768200884', u'100000960373995',
    u'100004783215425', u'100014321793964', u'100002433998672',
    u'100000042158598', u'1359383878', u'100006736002878', u'100001469904363',
    u'100011257748826', u'100021891726122', u'706676622729838',
    u'100003491408719', u'812623535531819', u'852067068172077',
    u'100012258524129', u'1140849537', u'100010739386824', u'100006590973401',
    u'100009377185598', u'1478123819', u'100010559224139', u'100000657330094',
    u'100006740970861', u'1640482902830291', u'100017177435135',
    u'767067873371162', u'100004017334041', u'366243453719070',
    u'100004666743754', u'115631625122669', u'317012365084676', u'1302631509',
    u'100011911669425', u'100001359256884'
]

user = "******"

task_detail = dict()
task_detail["task_name"] = fb_id_sensing
task_detail["remark"] = "感知热门事件"
task_detail["social_sensors"] = json.dumps(list(social_sensors))
task_detail["history_status"] = json.dumps([])
print es.index(index=fb_id_sensing,
               doc_type=fb_type_sensing,
               id=fb_id_sensing,
               body=task_detail)
Exemple #10
0
def save_event_warning(xnr_user_no,start_time,end_time):
    #判断数据库是否存在:
    today_date=ts2datetime(end_time)
    today_datetime = datetime2ts(today_date)
    weibo_event_warning_index_name = weibo_event_warning_index_name_pre+today_date
    if not es_xnr.indices.exists(index=weibo_event_warning_index_name):
        weibo_event_warning_mappings(weibo_event_warning_index_name)

    new_event_warning = create_event_warning(xnr_user_no,start_time,end_time)    

    today_history_event_warning,old_name_list = lookup_history_event_warming(xnr_user_no,today_datetime,end_time)
    print 'warning!!!',len(new_event_warning)
    results = [] 
    if new_event_warning:
        for item in new_event_warning:
            event_mark = set_intersection(item['event_name'],old_name_list)
            if event_mark == 1:
                task_id = xnr_user_no+'_'+ item['event_name']
                old_event = es_xnr.get(index=weibo_event_warning_index_name,doc_type=weibo_event_warning_index_type,id=task_id)['_source']

                #用户合并
                old_event_main_info = json.loads(old_event['main_user_info'])
                old_event_uid_list = [user['uid'] for user in old_main_user_info]

                new_event_main_info = json.loads(item['main_user_info'])
                new_event_uid_list = [user['uid'] for user in new_event_main_info]

                add_uid_list = list(set(new_event_uid_list) - set(old_event_uid_list)&set(new_event_uid_list))

                new_main_user_info = []
                for uid in add_uid_list:
                    uid_info = [u for u in item['main_user_info'] if u['uid'] == uid]
                    if uid_info:
                        new_main_user_info.append(uid_info[0])
                    else:
                        pass
                old_event['main_user_info'].extend(new_main_user_info)


                old_event_weibo_info = json.loads(old_event['main_weibo_info'])
                old_event_mid_list = [content['mid'] for content in old_event_weibo_info]

                new_event_weibo_info = json.loads(item['main_weibo_info'])
                new_event_mid_list = [content['mid'] for content in new_event_weibo_info]

                add_weibo_list = list(set(new_event_mid_list) - set(new_event_mid_list)&set(old_event_mid_list))     

                new_main_weibo_info = []
                for mid in add_weibo_list:
                    mid_info = [t for t in item['main_weibo_info'] if t['mid'] == mid]
                    if mid_info:
                        new_main_weibo_info.append(mid_info[0])
                    else:
                        pass
                old_event['main_weibo_info'].extend(new_main_weibo_info)

                old_event['event_influence']=old_event['event_influence']+item['event_influence']
               
                try:
                    es_xnr.update(index=weibo_event_warning_index_name,doc_type=weibo_event_warning_index_type,id=task_id)
                    mark=True
                except:
                    mark=False

            else:
                #直接存储
                task_id=xnr_user_no+'_'+ item['event_name']
                try:
                    es_xnr.index(index=weibo_event_warning_index_name,doc_type=weibo_event_warning_index_type,body=item,id=task_id)
                    mark=True
                except:
                    mark=False
            results.append(mark)
    else:
        pass
    print 'event_waring::',results
    return results
Exemple #11
0
def create_speech_warning(xnr_user_no, today_datetime):
    #查询好友列表
    friends_list = lookup_xnr_friends(xnr_user_no)

    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': {
                            'range': {
                                'sensitive': {
                                    'gte': 1
                                }
                            }
                        }
                    }
                }
            }
        },
        'size': MAX_SEARCH_SIZE,
        'sort': {
            'sensitive': {
                'order': 'desc'
            }
        }
    }
    facebook_flow_text_index_name = get_timets_set_indexset_list(
        facebook_flow_text_index_name_pre, today_datetime, today_datetime)
    #print facebook_flow_text_index_name
    results = es_xnr.search(index=facebook_flow_text_index_name,
                            doc_type=facebook_flow_text_index_type,
                            body=query_body)['hits']['hits']
    #print results
    result = []
    for item in results:
        if item['_source']['uid'] in friends_list:
            item['_source']['content_type'] = 'friends'
        else:
            item['_source']['content_type'] = 'unfriends'

        item['_source']['validity'] = 0
        item['_source']['xnr_user_no'] = xnr_user_no

        #查询三个指标字段
        fid_result = lookup_fid_attend_index(item['_source']['fid'],
                                             today_datetime)
        if fid_result:
            item['_source']['comment'] = fid_result['comment']
            item['_source']['share'] = fid_result['share']
            item['_source']['favorite'] = fid_result['favorite']
        else:
            item['_source']['comment'] = 0
            item['_source']['share'] = 0
            item['_source']['favorite'] = 0

        #查询用户昵称
        item['_source']['nick_name'] = get_user_nickname(
            item['_source']['uid'])

        task_id = xnr_user_no + '_' + item['_source']['fid']

        #写入数据库
        today_date = ts2datetime(today_datetime)
        facebook_speech_warning_index_name = facebook_speech_warning_index_name_pre + today_date
        #facebook_speech_warning_index_name=facebook_speech_warning_index_name_pre+FACEBOOK_FLOW_START_DATE
        # try:
        es_xnr.index(index=facebook_speech_warning_index_name,
                     doc_type=facebook_speech_warning_index_type,
                     body=item['_source'],
                     id=task_id)
        mark = True
        # except:
        #     mark=False

        result.append(mark)
    return result
def read_flow_text(flow_text_index_name,current_date):
	
	#flow_text_index_name = flow_text_index_name_pre + current_date

	i = 0
	
	label_count_dict = {}
	content_dict = {}

	while True:
		
		query_body = {
			'query':{
				'bool':{
					'must':[
						{'term':{'message_type':1}},
						{'term':{'sensitive':0}}
					]
				}
			},
			'size':1000,
			'from':i*1000,
			'sort':{'user_fansnum':{'order':'desc'}}
		}
		# 原创、sensitive为0
		search_results = es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,\
				body=query_body)['hits']['hits']

		print es_flow_text,flow_text_index_name
		weibo_list = []
		print 'len..',len(search_results)		
		for result in search_results:
			result = result['_source']
			weibo_list.append(result['text'].encode('utf-8'))

		label_list = triple_classifier_new(weibo_list)

		label_count = Counter(label_list)

		for j in range(len(search_results)):
			
			label = label_list[j]
			search_results[j]['_source']['label'] = label
			try:
				if label_count_dict[label] < 20:
					content_dict[label].append(search_results[j]['_source'])
					label_count_dict[label] += 1

			except:
				content_dict[label] = [search_results[j]['_source']]

				label_count_dict[label] = 1

		i += 1

		print 'i..',i

		# 循环终止条件
		min_label_count = min(label_count_dict, key=label_count_dict.get)
		if label_count_dict[min_label_count] >= 20:
			break
	print 'label_count_dict::',label_count_dict

	for content_label,content_weibo in content_dict.iteritems():
		#_id = content_label
		index_name = daily_interest_index_name_pre +'_'+ current_date
		daily_inerests_flow_text_mappings(index_name)
		#item_dict = {}
		#item_dict['timestamp'] = datetime2ts(current_date)
		#item_dict['content'] = json.dumps(content_weibo)
		for daily_weibo in content_weibo:
			mid = daily_weibo['mid']
			print es_xnr.index(index=index_name,doc_type=daily_interest_index_type,id=mid,body=daily_weibo)
		
		print content_label,'====',len(content_weibo)
Exemple #13
0
def read_flow_text(flow_text_index_name,current_date):
	
	#flow_text_index_name = facebook_flow_text_index_name_pre + current_date

	i = 0
	
	label_count_dict = {}
	content_dict = {}

	print '!!!'


	while True:
		
		query_body = {
			'query':{
				'bool':{
					'must':[
						
						{'term':{'sensitive':0}}
					]
				}
			},
			'size':1000,
			'from':i*1000
		}

		# 原创、sensitive为0
		#print '222'
		search_results = es_xnr.search(index=flow_text_index_name,doc_type=facebook_flow_text_index_type,\
				body=query_body)['hits']['hits']

		weibo_list = []
		
		for result in search_results:
			result = result['_source']
			weibo_list.append(result['text'].encode('utf-8'))

		label_list = triple_classifier_new(weibo_list)

		label_count = Counter(label_list)
		#print '333'
		for j in range(len(search_results)):
			
			label = label_list[j]

			try:
				if label_count_dict[label] < 20:
					content_dict[label].append(search_results[j]['_source'])
					label_count_dict[label] += 1

			except:
				content_dict[label] = [search_results[j]['_source']]

				label_count_dict[label] = 1

		i += 1

		if i % 1000 == 0:
			print 'i...',i
			print 'label_count_dict...',label_count_dict

		# 循环终止条件
		min_label_count = min(label_count_dict, key=label_count_dict.get)
		if label_count_dict[min_label_count] >= 20:
			break
	print 'label_count_dict::',label_count_dict

	for content_label,content_weibo in content_dict.iteritems():
		_id = content_label
		index_name = fb_daily_interest_index_name_pre +'_'+ current_date
		fb_daily_inerests_flow_text_mappings(index_name)
		item_dict = {}
		item_dict['timestamp'] = datetime2ts(current_date)
		item_dict['content'] = json.dumps(content_weibo)
		print es_xnr.index(index=index_name,doc_type=fb_daily_interest_index_type,id=_id,body=item_dict)
	
		print content_label,'====',len(content_weibo)
Exemple #14
0
from elasticsearch import Elasticsearch


social_sensors = ["1738004582", "1784473157", "2286908003", "1717833412", "1314608344", "1644114654",\
        "1686546714", "1656737654", "2028810631", "1677991972", "3881380517", "1847582585", "1651428902",\
        "1420157965", "1913382117", "1884334303", "1734530730", "1893278624", "1720962692", "1700648435",\
        "3288875501", "1672519561", "2034347300", "1688864597", "2615417307", "1191965271", "1643971635", \
        "1778758223", "1216431741", "1698823241", "1977460817", "1644729004", "1231759973", "1231759973",\
        "1315591982", "1656831930", "1926909715", "1699432410", "1660452532", "1722628512", "1267454277",\
        "1640601392", "2443459455", "3921730119", "1867571077", "1718493627", "1653460650", "1737737970",\
        "2616293707", "3271121353", "1642591402", "1326410461", "1645705403", "1985593262", "1654164742",\
        "1638781994", "2993049293", "1653944045", "5977555696", "1992613670", "1726393244", "1216431741",\
        "1724367710", "1880087643", "2827102952", "1974808274", "1700720163", "3164957712", "3266943013",\
        "2127460165", "2083844833", "5305757517", "2803301701", "2656274875", "1618051664", "1974576991", \
        "1642512402", "1649173367", "1658388624", "1697601814", "1703371307", "1638782947", "1402977920", \
        "1893801487", "2108053230", "1649469284", "1975995305", "2810373291", "1749990115", "1663937380", \
        "1497087080", "1652484947", "2162541102", "2462605080", "1650111241", "1265998927", "1698857957", \
        "1887790981", "1698233740", "3712035812", "5044281310", "1701401324", "1571497285", "1635764393"]

user = "******"

task_detail = dict()
task_detail["task_name"] = id_sensing
task_detail["remark"] = "感知热门事件"
task_detail["social_sensors"] = json.dumps(list(social_sensors))
task_detail["history_status"] = json.dumps([])
print es.index(index=index_sensing,
               doc_type=type_sensing,
               id=id_sensing,
               body=task_detail)
Exemple #15
0
def read_tracing_followers_tweet():

    if S_TYPE == 'test':
        query_body = {
            'query': {
                'term': {
                    'xnr_user_no': 'WXNR0004'
                }
            },
            'size': MAX_SEARCH_SIZE
        }

    else:
        query_body = {'query': {'match_all': {}}, 'size': MAX_SEARCH_SIZE}


    results = es_xnr.search(index=weibo_xnr_fans_followers_index_name,doc_type=weibo_xnr_fans_followers_index_type,\
                body=query_body)['hits']['hits']
    if results:
        for result in results:
            result = result['_source']

            try:
                xnr_user_no = result['xnr_user_no']
                print xnr_user_no
                trace_follow_list = result['trace_follow_list']

            except:
                continue
            print 'trace_follow_list...', trace_follow_list
            if S_TYPE == 'test':
                current_time = datetime2ts(S_DATE)
                #trace_follow_list = TRACE_FOLLOW_LIST
            else:
                current_time = int(time.time())

            current_date = ts2datetime(current_time)

            flow_text_index_name = flow_text_index_name_pre + current_date
            print flow_text_index_name

            query_body_flow = {
                'query': {
                    'filtered': {
                        'filter': {
                            'terms': {
                                'uid': trace_follow_list
                            }
                        }
                    }
                },
                'size': MAX_SEARCH_SIZE
            }

            results_flow = es_flow_text.search(index=flow_text_index_name,doc_type=flow_text_index_type,\
                            body=query_body_flow)['hits']['hits']
            print 'results_flow..', results_flow
            if results_flow:
                for result_flow in results_flow:

                    result_flow = result_flow['_source']
                    mid = result_flow['mid']

                    #先判断 之前是否已经存过该mid

                    task_id = xnr_user_no + '_' + mid
                    try:
                        # 如果已添加则跳过
                        es_xnr.get(index=weibo_xnr_retweet_timing_list_index_name,doc_type=\
                            weibo_xnr_retweet_timing_list_index_type,id=task_id)['_source']
                        continue

                    except:
                        # 如果未添加过则加入列表
                        task_detail = {}
                        task_detail['xnr_user_no'] = xnr_user_no
                        task_detail['mid'] = mid
                        task_detail['text'] = result_flow['text']
                        task_detail['uid'] = result_flow['uid']
                        task_detail['nick_name'], task_detail[
                            'photo_url'] = uid2nick_name_photo(
                                result_flow['uid'])
                        task_detail['timestamp'] = result_flow['timestamp']
                        task_detail['timestamp_set'] = result_flow[
                            'timestamp'] + random.randint(
                                RETWEET_START_TS, RETWEET_END_TS)
                        task_detail['compute_status'] = 0
                        print 'insert new!!!!'
                        print 'es_xnr...', es_xnr
                        print es_xnr.index(index=weibo_xnr_retweet_timing_list_index_name,doc_type=\
                            weibo_xnr_retweet_timing_list_index_type,body=task_detail,id=task_id)
Exemple #16
0
def lookup_timestamp_posts(start_time, end_time):
    start_date = ts2datetime(start_time)
    end_date = ts2datetime(end_time)

    flow_text_index_name_list = []
    if start_date == end_date:
        print '11'
        index_name = flow_text_index_name_pre + end_date
        flow_text_index_name_list.append(index_name)
        sensitive_index_name = weibo_sensitive_post_index_name_pre + end_date
        if es_xnr.indices.exists(index=sensitive_index_name):
            pass
        else:
            weibo_sensitive_post_mappings(sensitive_index_name)
            print '111'
    else:
        start_index_name = flow_text_index_name_pre + start_date
        end_index_name = flow_text_index_name_pre + end_date
        flow_text_index_name_list.append(start_index_name)
        flow_text_index_name_list.append(end_index_name)

        sensitive_start_index = weibo_sensitive_post_index_name_pre + start_date
        sensitive_end_index = weibo_sensitive_post_index_name_pre + end_date
        if not es_xnr.indices.exists(index=sensitive_start_index):
            weibo_sensitive_post_mappings(sensitive_start_index)
        if not es_xnr.indices.exists(index=sensitive_end_index):
            weibo_sensitive_post_mappings(sensitive_end_index)

    query_body = {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'must': [{
                            'range': {
                                'timestamp': {
                                    'gte': start_time,
                                    'lte': end_time
                                }
                            }
                        }]
                    }
                }
            }
        },
        'sort': {
            'timestamp': {
                'order': 'desc'
            }
        },
        'size': 50
    }
    print 'start search!!!'
    print flow_text_index_name_list
    try:
        es_result=es_flow_text.search(index=flow_text_index_name_list,doc_type=flow_text_index_type,\
            body=query_body)['hits']['hits']

        warning_type = 'user'
        print 'repeat!!!'
        hot_result = remove_repeat(es_result, warning_type)
        print 'save!!!'
        for item in hot_result:
            task_id = item['mid']
            # item['order_type']='timestamp'
            post_index_name = weibo_sensitive_post_index_name_pre + ts2datetime(
                item['timestamp'])
            es_xnr.index(index=post_index_name,
                         doc_type=weibo_sensitive_post_index_type,
                         body=item,
                         id=task_id)
        # hot_result=[]
        # for item in es_result:
        #     item['_source']['nick_name']=get_user_nickname(item['_source']['uid'])
        #     hot_result.append(item['_source'])
        mark_result = True
        print 'finish!'
    except:
        mark_result = False

    return mark_result
Exemple #17
0
def save_role_feature_analysis(role_results, role_label, domain, role_id,
                               task_id):
    mark = False

    try:
        item_exist = es_xnr.get(index=weibo_role_index_name,
                                doc_type=weibo_role_index_type,
                                id=role_id)['_source']
        item_exist['role_pinyin'] = role_id
        item_exist['role_name'] = role_label
        item_exist['domains'] = domain
        item_exist['personality'] = json.dumps(role_results['personality'])
        item_exist['political_side'] = json.dumps(
            role_results['political_side'])
        item_exist['geo'] = json.dumps(role_results['geo'])
        item_exist['active_time'] = json.dumps(
            list(role_results['active_time']))
        item_exist['day_post_num'] = json.dumps(
            list(role_results['day_post_num']))
        item_exist['psy_feature'] = json.dumps(role_results['psy_feature'])
        item_exist['member_uids'] = json.dumps(role_results['member_uids'])

        es_xnr.update(index=weibo_role_index_name,
                      doc_type=weibo_role_index_type,
                      id=role_id,
                      body={'doc': item_exist})

        item_domain = dict()
        item_domain['compute_status'] = 3  # 存入角色分析结果
        es_xnr.update(index=weibo_domain_index_name,
                      doc_type=weibo_domain_index_type,
                      id=task_id,
                      body={'doc': item_domain})

    except Exception, e:
        item_exist = dict()
        item_exist['role_pinyin'] = role_id
        item_exist['role_name'] = role_label
        item_exist['domains'] = domain
        item_exist['personality'] = json.dumps(role_results['personality'])
        item_exist['political_side'] = json.dumps(
            role_results['political_side'])
        item_exist['geo'] = json.dumps(role_results['geo'])
        item_exist['active_time'] = json.dumps(
            list(role_results['active_time']))
        item_exist['day_post_num'] = json.dumps(
            list(role_results['day_post_num']))
        item_exist['psy_feature'] = json.dumps(role_results['psy_feature'])
        item_exist['member_uids'] = json.dumps(role_results['member_uids'])

        es_xnr.index(index=weibo_role_index_name,
                     doc_type=weibo_role_index_type,
                     id=role_id,
                     body=item_exist)

        item_domain = dict()
        item_domain['compute_status'] = 3  # 存入角色分析结果
        es_xnr.update(index=weibo_domain_index_name,
                      doc_type=weibo_domain_index_type,
                      id=task_id,
                      body={'doc': item_domain})
def onQQMessage(bot, contact, member, content):
    # 当收到 QQ 消息时被调用
    # bot     : QQBot 对象,提供 List/SendTo/GroupXXX/Stop/Restart 等接口,详见文档第五节
    # contact : QContact 对象,消息的发送者
    # member  : QContact 对象,仅当本消息为 群或讨论组 消息时有效,代表实际发消息的成员
    # content : str 对象,消息内容
    INFO('test groups %s', bot.List('group'))
    INFO('bot.conf %s', bot.conf)
    print 'contact.============.',contact
    if contact.ctype == 'group':
        INFO('群的 QQ.. %s', contact.qq)  # #NULL
        INFO('群的昵称.. %s', contact.nick) # 嘿哼哈
        INFO('成员的 QQ.. %s', member.qq)   # #NULL
        INFO('成员的昵称.. %s', member.nick) # /石沫沫
        INFO('最后发言时间.. %s', member.last_speak_time) # -1
        INFO('消息.. %s', content) # test内容

        last_speak_time = int(time.time())
        print 'last_speak_time..',last_speak_time
        if content == '':
            INFO('您发了一张图片或假消息... %s', content)
        else:
            sen_value,sen_words = sensitive_check(content)      # sen_words包含sensitive_words_string:北京&达赖和sensitive_words_dict
            if sen_value !=0:
                sen_flag = 1    #该条信息是敏感信息
            else:
                sen_flag = 0
            # qq_item = {
            #     'xnr_qq_number': bot.session.qq,
            #     'xnr_nickname': bot.session.nick,
            #     'timestamp': member.last_speak_time,
            #     'speaker_qq_number': member.qq,
            #     'text': content,
            #     'sensitive_flag':sen_flag,
            #     'sensitive_value': sen_value,
            #     'sensitive_words_string': sen_words['sensitive_words_string'],
            #     'speaker_nickname': member.nick,
            #     'qq_group_number': contact.qq,
            #     'qq_group_nickname': contact.nick
            # }
            qq_item = {
                'xnr_qq_number': bot.session.qq,
                'xnr_nickname': bot.session.nick,
                'timestamp': last_speak_time,
                'speaker_qq_number': '',
                'text': content,
                'sensitive_flag':sen_flag,
                'sensitive_value': sen_value,
                'sensitive_words_string': sen_words['sensitive_words_string'],
                'speaker_nickname': member.nick,
                'qq_group_number': '',
                'qq_group_nickname': contact.nick
            }
            qq_json = json.dumps(qq_item)
            print 'qq_json=====:',qq_json
            # 判断该qq群是否在redis的群set中
            #qq_number  = qq_item['xnr_qq_number']
            #qq_group_number = qq_item['qq_group_number']

            # r_qq_group_set = r_qq_group_set_pre + qq_number
            # qq_group_set = r.smembers(r_qq_group_set)
            #test
            #qq_group_set = set(['531811289'])
           
            #if qq_group_number in qq_group_set:
            
            conMD5 = string_md5(content)
            
            nowDate = datetime.datetime.now().strftime('%Y-%m-%d')
            index_name = group_message_index_name_pre+ str(nowDate)
            #index_id = bot.conf.qq + '_' + contact.qq + '_' + str(member.last_speak_time) + '_' + conMD5
            # 让系统随机分配 _id
            if not es.indices.exists(index=index_name):
                print 'get mapping'
                print group_message_mappings(bot.session.qq,nowDate)
            print 'qq_item.....',qq_item
            print es.index(index=index_name, doc_type=group_message_index_type,body=qq_item)