예제 #1
0
    result = es_cluster.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits']
    sensitive_uid = []
    for item in result:
        sensitive_uid.append(item['_source']['uid'])

    return sensitive_uid


if __name__ == '__main__':
    '''
    f = open('sensitive_uid_list.txt', 'wb')
    uid_list = search_sensitive_weibo('20130904')
    for uid in uid_list:
        f.write(str(uid) + '\n')
    f.close()
    '''

    f = open('sensitive_uid_list.txt', 'rb')
    for line in f:
        uid = line.strip()
        try:
            result = es_cluster.get(index='sensitive_user_portrait', doc_type='user', id=uid)['_source']
        except:
            print uid
            continue
        if result['sensitive_words_string']:
            es.update(index='sensitive_user_portrait', doc_type='user', id=uid, body={"doc":{"type":1}})
        else:
            es.update(index='sensitive_user_portrait', doc_type='user', id=uid, body={"doc":{"type":0}})

    result = dict()
    results = dict()
    try:
        for i in range(1,13):
            item = line[i].split('*')
            number = item[0].split('E')
            result[item[1]] = int(number[1][1:])
    except:
        print number, count, uid
        continue
    sort_list =  sorted(result.items(), key=lambda x:x[1], reverse=False)
    results['domain'] = domain_dict[sort_list[0][0]]
    action = {"update": {'_id': uid}}
    bulk_action.extend([action, {'doc': results}])
    if count % 100 == 0:
        es.bulk(bulk_action, index='sensitive_user_portrait', doc_type='user', timeout=60)
        bulk_action = []
        print count
if bulk_action:
    es.bulk(bulk_action, index='sensitive_user_portrait', doc_type='user', timeout=60)
'''

'''
es.delete(index= 'custom_attribute', doc_type='attribute', id='AVBa32QMGk0Kt7GIxCQW')
es.delete(index= 'custom_attribute', doc_type='attribute', id='AVBa32rwGk0Kt7GIxCQX')
es.delete(index= 'custom_attribute', doc_type='attribute', id='AVBa4Qx7y3nG3t1ED6gr')
'''

#es.update(index="sensitive_user_portrait", doc_type="user", id=1408848023, body={"doc": {"domain": "公知分子"}})
es.update(index="sensitive_user_portrait", doc_type="user", id=1892680725, body={"doc":{"domain": "公职人员"}})