コード例 #1
0
def get_view_profile_rowkeys(begin, end, mode='news'):
    # topic一个月大约30w,news一个大约80w,不需要快
    print 'begin', begin, timestamp2datetime(begin)
    print 'end', end, timestamp2datetime(end)

    # 确定solr表
    solr = solr_np if mode == 'news' else solr_tp

    # 调整时间戳为毫秒格式
    begin = ensure_m_timestamp(begin)
    end = ensure_m_timestamp(end)
    print begin
    print end

    # 估算rows
    days = (end - begin) / 86400 / 1000
    rows = days * every_day_topic_cnt if mode == 'topic' else days * every_day_news_cnt
    print 'rows', rows
    # 构建索引
    solr_query = SolrQuery()
    q = '*:*'
    solr_query.set('q', q)
    solr_query.add('fq', 'event_time:[%s TO %s]' % (begin, end))
    solr_query.set('rows', rows)
    solr_query.set('fl', ['id'])

    rowkey_list = [
        item['id'] for item in solr.search(**solr_query.get_query_dict())
    ]
    print len(rowkey_list)
    return rowkey_list
コード例 #2
0
def test21():
    '''
    尝试连接各种测试solr
    :return:
    '''
    from add_data_to_solr.cy_solr_local.solr_base import SolrHelper, SolrCloud, ZooKeeper
    from general_utils.solr_utils import SolrQuery
    for table in [
            "biztest_hospital_search", "biztest_main_doctors",
            "biztest_personal_doctors", "biztest_robot_news",
            "biztest_problem", "biztest_dialog", "biztest_full_problem",
            "biztest_drug", "biztest_topics", "biztest_pedia"
    ]:

        print 'tablename', table, '=' * 30
        try:
            solr = SolrCloud(ZooKeeper("rd1:2181,rd2:2181"), table)
            solr_query = SolrQuery()
            solr_query.set('q', '*:*')
            solr_query.set('fl', ['*', 'score'])
            solr_query.set('rows', 10)
            for item in solr.search(**solr_query.get_query_dict()):
                print item.get('id')
        except Exception, e:
            print e
コード例 #3
0
def get_query_uids(begin, end, mode='search'):
    from general_utils.solr_utils import zk_md4, SolrQuery, pysolr
    from general_utils.time_utils import ensure_m_timestamp
    # 更改时间戳格式
    begin = ensure_m_timestamp(begin)
    end = ensure_m_timestamp(end)
    # 构建索引
    q = '*:*'
    solr_query = SolrQuery()
    solr_query.set('q', q)
    solr_query.add('fq', 'event_time:[%s TO %s]' % (begin, end))
    solr_query.set('fl', ['uid'])
    solr_query.set('rows', 1000000)

    if mode == 'search':
        solr = pysolr.SolrCloud(zk_md4, 'search_event', timeout=25)
    elif mode == 'news':
        solr = pysolr.SolrCloud(zk_md4, 'news_profile', timeout=25)
    elif mode == 'topic':
        solr = pysolr.SolrCloud(zk_md4, 'topic_profile', timeout=25)
    else:
        return set()

    uid_list = [
        int(item['uid']) for item in solr.search(**solr_query.get_query_dict())
    ]
    uid_set = set(uid_list)
    print 'len uids', len(uid_set)
    return uid_set
コード例 #4
0
def get_click_data():
    '''
    (2017.10.31-2018.02.27)120天
    分批从solr获取rowkey_list,每天一批(几十万)
    :return:
    '''
    begin = 1509379200
    end = 1519747200

    # output file
    row_key_list_file = open(solr_rowkeylist_filename, 'w')

    for i in range((end - begin) / 86400):
        begin_i = begin + i * 86400
        end_i = begin_i + 86400
        begin_i_m = int(begin_i * 1000)
        end_i_m = int(end_i * 1000)
        print 'begin_i_m', begin_i_m, 'end_i_m', end_i_m
        # 构造query
        solrquery = SolrQuery()
        q = '*:*'
        solrquery.set('q', q)
        solrquery.add('fq', 'event_time:[%s TO %s]' % (begin_i_m, end_i_m))
        solrquery.set('fl', ['id'])
        solrquery.set('rows', 100000)

        rowkey_list = [
            item['id'] for item in solr_np.search(**solrquery.get_query_dict())
        ]
        row_key_list_file.write('\n'.join(rowkey_list))
        row_key_list_file.write('\n')
        print '%s row keys are saved' % len(rowkey_list)
    row_key_list_file.close()

    # output file2
    connection = happybase.Connection('hbase_server',
                                      compat='0.90',
                                      port=19090,
                                      timeout=30000)
    table = connection.table("cy_event")

    click_action_file = open(click_actions_filename, 'w')
    row_key_list_file = open(solr_rowkeylist_filename, 'r')

    data_lines = row_key_list_file.readlines()
    data_lines = [item.strip('\n') for item in data_lines]
    row_key_list_file.close()
    cols = ['info:uid', 'info:news_id']
    for batch_data in yield_batch_data(data_lines, 1000):

        row = table.rows(batch_data, columns=cols)
        for key, value in row:
            uid = int(value['info:uid'])
            newsid = int(value['info:news_id'])
            a, ts, b = key.split('|')
            outputstr = '|'.join([str(uid), str(ts), str(newsid)]) + '\n'
            click_action_file.write(outputstr)
    click_action_file.close()
    connection.close()
コード例 #5
0
def ask_solr(sort_field_name, solr):
    solrquery = SolrQuery()
    solrquery.set('q', '*:*')
    solrquery.set('sort', '%s desc' % sort_field_name)
    solrquery.set('rows', 1)
    solrquery.set('fl', [sort_field_name])

    res = [item for item in solr.search(**solrquery.get_query_dict())]

    last_value = res[0][sort_field_name]
    return {
        'last_value': last_value
    }
コード例 #6
0
def test27():
    from add_data_to_solr.cy_solr_local.solr_base import SolrHelper, SolrCloud, ZooKeeper
    from general_utils.solr_utils import SolrQuery
    solr = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "main_doctors")
    for id in ("clinic_web_98f32ad0d4461af8", "clinic_web_63da2e8135fabfb1",
               "clinic_zhongyike_zhouyuchun", "clinic_web_7ff76f1118d806e6",
               "4647c810af1ee0850bf2"):
        sq = SolrQuery()
        sq.set('q', '*:*')
        sq.add('fq', 'id:%s' % id)
        sq.set('rows', 1)
        sq.set('fl', ['*'])
        res = [item for item in solr.search(**sq.get_query_dict())][0]
        if 'name' not in res:
            continue
        print '=' * 30
        print id, res.get('name2', '')
        for key in res:
            if 'score' in key or 'rate' in key or 'star' in key:
                print key, res[key]
コード例 #7
0
 def get_ids():
     solrQuery = SolrQuery()
     solrQuery.set("q", "*:*")
     solrQuery.add("fq", "doctor_id:clinic_web_c383b3a7e6db1f1d")
     solrQuery.set("fl", ["id"])
     solrQuery.set("rows", 200)
     res = [
         item["id"] for item in solr.search(**solrQuery.get_query_dict())
     ]
     return res