result = es_cluster.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits'] sensitive_uid = [] for item in result: sensitive_uid.append(item['_source']['uid']) return sensitive_uid if __name__ == '__main__': ''' f = open('sensitive_uid_list.txt', 'wb') uid_list = search_sensitive_weibo('20130904') for uid in uid_list: f.write(str(uid) + '\n') f.close() ''' f = open('sensitive_uid_list.txt', 'rb') for line in f: uid = line.strip() try: result = es_cluster.get(index='sensitive_user_portrait', doc_type='user', id=uid)['_source'] except: print uid continue if result['sensitive_words_string']: es.update(index='sensitive_user_portrait', doc_type='user', id=uid, body={"doc":{"type":1}}) else: es.update(index='sensitive_user_portrait', doc_type='user', id=uid, body={"doc":{"type":0}})
result = dict() results = dict() try: for i in range(1,13): item = line[i].split('*') number = item[0].split('E') result[item[1]] = int(number[1][1:]) except: print number, count, uid continue sort_list = sorted(result.items(), key=lambda x:x[1], reverse=False) results['domain'] = domain_dict[sort_list[0][0]] action = {"update": {'_id': uid}} bulk_action.extend([action, {'doc': results}]) if count % 100 == 0: es.bulk(bulk_action, index='sensitive_user_portrait', doc_type='user', timeout=60) bulk_action = [] print count if bulk_action: es.bulk(bulk_action, index='sensitive_user_portrait', doc_type='user', timeout=60) ''' ''' es.delete(index= 'custom_attribute', doc_type='attribute', id='AVBa32QMGk0Kt7GIxCQW') es.delete(index= 'custom_attribute', doc_type='attribute', id='AVBa32rwGk0Kt7GIxCQX') es.delete(index= 'custom_attribute', doc_type='attribute', id='AVBa4Qx7y3nG3t1ED6gr') ''' #es.update(index="sensitive_user_portrait", doc_type="user", id=1408848023, body={"doc": {"domain": "公知分子"}}) es.update(index="sensitive_user_portrait", doc_type="user", id=1892680725, body={"doc":{"domain": "公职人员"}})