예제 #1
0
    def __init__(self):

        self.host = configs.get('redis', {}).get('host', 'localhost')
        self.port = configs.get('redis', {}).get('port', 6379)
        try:
            self.pool = redis.ConnectionPool(host=self.host, port=self.port)
        except Exception as e:
            logging.error('redis pool init err:{}'.format(e))
예제 #2
0
    def __init__(self):

        self.capacity = configs.get('bloom_filter', {}).get('capacity', 3000)
        self.error_rate = configs.get('bloom_filter',
                                      {}).get('error_rate', 0.01)
        self.rebuild_time = configs.get('bloom_filter',
                                        {}).get('rebuild_time', 86400)
        self.redis_obj = UserLog()
        self.redis_base_obj = Redis()
예제 #3
0
    def __init__(self, db_name, is_prod=1):

        self.db_name = db_name
        self.read_db = None
        self.write_db = None

        if is_prod:
            self.read_uri = configs.get('mongo').get('prod_read_uri')
            self.write_uri = configs.get('mongo').get('prod_write_uri')

        else:
            self.read_uri = configs.get('mongo').get('dev_read_uri')
            self.write_uri = configs.get('mongo').get('dev_write_uri')
예제 #4
0
def getRecServerPort():
    # In this function read the configuration file and get the port number for the server
    try:
        port = int(configs.get('server').get('port', 9090))
        return port
    # Exit if you did not get blockserver information
    except Exception as e:
        log.error("cannot read server port.")
        exit(1)
예제 #5
0
 def getNumThread(self):
     # In this function read the configuration file and get the port number for the server
     log.info("Get the server thread num by config file")
     try:
         port = int(configs.get('server').get('thread', 5))
         return port
     # Exit if you did not get blockserver information
     except Exception as e:
         log.error("cannot read server thread number.")
         exit(1)
예제 #6
0
def main():
    log.info("Initializing recamendation server")
    handler = RecommenderServerHandler()
    port = int(configs.get('server').get('port', 9090))
    number_thread = int(configs.get('server').get('thread', 5))
    processor = Recommender.Processor(handler)
    transport = TSocket.TServerSocket(port=port)
    tfactory = TTransport.TBufferedTransportFactory()
    pfactory = TBinaryProtocol.TBinaryProtocolFactory()
    server = TServer.TThreadPoolServer(processor, transport, tfactory,
                                       pfactory)
    log.info("Starting server on port : {}".format(str(port)))
    try:
        server.setNumThreads(number_thread)
        server.serve()
    except (Exception, KeyboardInterrupt) as e:
        print e
        # log.error("Execption / Keyboard interrupt occured: ", e)
        log.error(e)
        exit(0)
예제 #7
0
    def __init__(self):

        self.consumer = []

        self.host = configs.get('kafka_online_tag',
                                {}).get("host", 'bj2-kafka01:9092')
        self.group_id = configs.get('kafka_online_tag',
                                    {}).get("group_id", 'read_traffic')
        self.timeout = configs.get('kafka_online_tag', {}).get("timeout", 1000)
        self.topic = configs.get('kafka_online_tag',
                                 {}).get("topic", 'eventlog')
        self.consume_num = configs.get('kafka_online_tag',
                                       {}).get("consume_num", 1)
        self.mysql_api = configs.get('kafka_online_tag', {}).get("mysql_api")
        self.black_list_set = get_black_list()
예제 #8
0
def fetch_batch_userrec(user_id,
                        first_category,
                        second_category,
                        city=None,
                        size=3):
    """
    :param user_id: 
    :param first_category: 
    :param second_category: 
    :param city: 
    :param size: post size to return
    :return: [{'rec_id':rec_id,'sim':sim,'rec_name':rec_name},{} ...]
    """
    log.info('{},{},{},{},{}'.format(user_id, first_category, second_category,
                                     city, size))
    result_list = []

    # get tags size from config
    kw_size = int(configs.get('user_profile').get('kw_size', 4))
    log.info('{} key word will be user to fetch vad data.'.format(kw_size))
    try:
        off_tag_data = up.read_tag('RecommendationUserTagsOffline',
                                   {'_id': user_id},
                                   top=size)
    except Exception as e:
        log.error('获取离线用户标签失败,{}'.format(e))
        off_tag_data = {}

    try:
        on_tag_data = up.read_tag('RecommendationUserTagsOnline',
                                  {'_id': user_id},
                                  top=size)
    except Exception as e:
        log.error('获取在线用户标签失败,{}'.format(e))
        on_tag_data = {}

    contact_tags = []
    online_tags = []
    total_tag = []
    try:
        contact_tags = off_tag_data[first_category][second_category][
            'contact_meta'][:(kw_size * 10)]
    except KeyError:
        pass
    try:
        online_tags = on_tag_data[first_category][second_category][
            'content'][:(kw_size * 10)]
    except KeyError:
        pass
    print len(contact_tags), kw_size * 10
    print '----- off_tag_data ----- \n', off_tag_data
    if len(contact_tags) >= kw_size * 10 and len(online_tags) >= kw_size * 10:
        total_tag = contact_tags[:(kw_size * 10)] + online_tags[:(kw_size *
                                                                  10)]
    elif len(contact_tags) < kw_size * 10:
        contact_tags_size = len(contact_tags)
        total_tag = contact_tags[:contact_tags_size] + online_tags[:(
            kw_size * 10 - contact_tags_size)]
    else:
        contact_tags_size = online_tags[:(kw_size * 10)]
        total_tag = online_tags[:contact_tags_size] + contact_tags[:(
            kw_size * 20 - contact_tags_size)]
    if len(total_tag) < kw_size * 20:
        try:
            total_tag_size = len(total_tag[:(kw_size * 10)])
            total_tag += off_tag_data[first_category][second_category][
                'meta'][:(kw_size * 20 - total_tag_size)]
        except KeyError as e:
            log.error('离线用户标签meta数据为空,{}'.format(e))

        try:
            total_tag += off_tag_data[first_category][second_category][
                'content'][:(kw_size * 20 - len(total_tag))]
        except KeyError as e:
            log.error('离线用户标签content数据为空,{}'.format(e))

    # if tag is None the return
    if len(total_tag) == 0:
        log.warning("获取关键词为空!")
        return result_list
    # 根据用户标签来获取帖子
    try:
        tmp_list = []
        for info_tuple in total_tag:
            k, v = info_tuple
            k = k.encode('utf-8')
            v = float(v)
            tmp_list.append((k, v))

        tmp_list_sample = random.sample(tmp_list, kw_size)
        second_category = second_category.encode('utf-8')
        begin = datetime.datetime.now()
        kwdata = {
            "num": size,
            "city": city,
            "category": second_category,
            "tag": "_".join([x[0] for x in tmp_list_sample]),
            "weight": [x[1] for x in tmp_list_sample],
            "days": 60,
            'cut': 1000
        }
        user_profile_ad = fetchKwData(kwdata)
        if len(user_profile_ad) < size:
            kwdata = {
                "num": size,
                "city": city,
                "category": second_category,
                "tag": "_".join([x[0] for x in tmp_list_sample]),
                "weight": [x[1] for x in tmp_list_sample],
                "days": 270,
                'cut': 1000
            }
            user_profile_ad.extend(fetchKwData(kwdata))
        end = datetime.datetime.now()
    except Exception as e:
        log.error("获取用户画像失败, {}".format(e))
        user_profile_ad = []

    for info_tuple in user_profile_ad:
        k, v = info_tuple['ad_id'], info_tuple['score']
        result_list.append(({
            "rec_id": k,
            "sim": v,
            "rec_name": "user_profile"
        }))
    return result_list