def __init__(self): self.host = configs.get('redis', {}).get('host', 'localhost') self.port = configs.get('redis', {}).get('port', 6379) try: self.pool = redis.ConnectionPool(host=self.host, port=self.port) except Exception as e: logging.error('redis pool init err:{}'.format(e))
def __init__(self): self.capacity = configs.get('bloom_filter', {}).get('capacity', 3000) self.error_rate = configs.get('bloom_filter', {}).get('error_rate', 0.01) self.rebuild_time = configs.get('bloom_filter', {}).get('rebuild_time', 86400) self.redis_obj = UserLog() self.redis_base_obj = Redis()
def __init__(self, db_name, is_prod=1): self.db_name = db_name self.read_db = None self.write_db = None if is_prod: self.read_uri = configs.get('mongo').get('prod_read_uri') self.write_uri = configs.get('mongo').get('prod_write_uri') else: self.read_uri = configs.get('mongo').get('dev_read_uri') self.write_uri = configs.get('mongo').get('dev_write_uri')
def getRecServerPort(): # In this function read the configuration file and get the port number for the server try: port = int(configs.get('server').get('port', 9090)) return port # Exit if you did not get blockserver information except Exception as e: log.error("cannot read server port.") exit(1)
def getNumThread(self): # In this function read the configuration file and get the port number for the server log.info("Get the server thread num by config file") try: port = int(configs.get('server').get('thread', 5)) return port # Exit if you did not get blockserver information except Exception as e: log.error("cannot read server thread number.") exit(1)
def main(): log.info("Initializing recamendation server") handler = RecommenderServerHandler() port = int(configs.get('server').get('port', 9090)) number_thread = int(configs.get('server').get('thread', 5)) processor = Recommender.Processor(handler) transport = TSocket.TServerSocket(port=port) tfactory = TTransport.TBufferedTransportFactory() pfactory = TBinaryProtocol.TBinaryProtocolFactory() server = TServer.TThreadPoolServer(processor, transport, tfactory, pfactory) log.info("Starting server on port : {}".format(str(port))) try: server.setNumThreads(number_thread) server.serve() except (Exception, KeyboardInterrupt) as e: print e # log.error("Execption / Keyboard interrupt occured: ", e) log.error(e) exit(0)
def __init__(self): self.consumer = [] self.host = configs.get('kafka_online_tag', {}).get("host", 'bj2-kafka01:9092') self.group_id = configs.get('kafka_online_tag', {}).get("group_id", 'read_traffic') self.timeout = configs.get('kafka_online_tag', {}).get("timeout", 1000) self.topic = configs.get('kafka_online_tag', {}).get("topic", 'eventlog') self.consume_num = configs.get('kafka_online_tag', {}).get("consume_num", 1) self.mysql_api = configs.get('kafka_online_tag', {}).get("mysql_api") self.black_list_set = get_black_list()
def fetch_batch_userrec(user_id, first_category, second_category, city=None, size=3): """ :param user_id: :param first_category: :param second_category: :param city: :param size: post size to return :return: [{'rec_id':rec_id,'sim':sim,'rec_name':rec_name},{} ...] """ log.info('{},{},{},{},{}'.format(user_id, first_category, second_category, city, size)) result_list = [] # get tags size from config kw_size = int(configs.get('user_profile').get('kw_size', 4)) log.info('{} key word will be user to fetch vad data.'.format(kw_size)) try: off_tag_data = up.read_tag('RecommendationUserTagsOffline', {'_id': user_id}, top=size) except Exception as e: log.error('获取离线用户标签失败,{}'.format(e)) off_tag_data = {} try: on_tag_data = up.read_tag('RecommendationUserTagsOnline', {'_id': user_id}, top=size) except Exception as e: log.error('获取在线用户标签失败,{}'.format(e)) on_tag_data = {} contact_tags = [] online_tags = [] total_tag = [] try: contact_tags = off_tag_data[first_category][second_category][ 'contact_meta'][:(kw_size * 10)] except KeyError: pass try: online_tags = on_tag_data[first_category][second_category][ 'content'][:(kw_size * 10)] except KeyError: pass print len(contact_tags), kw_size * 10 print '----- off_tag_data ----- \n', off_tag_data if len(contact_tags) >= kw_size * 10 and len(online_tags) >= kw_size * 10: total_tag = contact_tags[:(kw_size * 10)] + online_tags[:(kw_size * 10)] elif len(contact_tags) < kw_size * 10: contact_tags_size = len(contact_tags) total_tag = contact_tags[:contact_tags_size] + online_tags[:( kw_size * 10 - contact_tags_size)] else: contact_tags_size = online_tags[:(kw_size * 10)] total_tag = online_tags[:contact_tags_size] + contact_tags[:( kw_size * 20 - contact_tags_size)] if len(total_tag) < kw_size * 20: try: total_tag_size = len(total_tag[:(kw_size * 10)]) total_tag += off_tag_data[first_category][second_category][ 'meta'][:(kw_size * 20 - total_tag_size)] except KeyError as e: log.error('离线用户标签meta数据为空,{}'.format(e)) try: total_tag += off_tag_data[first_category][second_category][ 'content'][:(kw_size * 20 - len(total_tag))] except KeyError as e: log.error('离线用户标签content数据为空,{}'.format(e)) # if tag is None the return if len(total_tag) == 0: log.warning("获取关键词为空!") return result_list # 根据用户标签来获取帖子 try: tmp_list = [] for info_tuple in total_tag: k, v = info_tuple k = k.encode('utf-8') v = float(v) tmp_list.append((k, v)) tmp_list_sample = random.sample(tmp_list, kw_size) second_category = second_category.encode('utf-8') begin = datetime.datetime.now() kwdata = { "num": size, "city": city, "category": second_category, "tag": "_".join([x[0] for x in tmp_list_sample]), "weight": [x[1] for x in tmp_list_sample], "days": 60, 'cut': 1000 } user_profile_ad = fetchKwData(kwdata) if len(user_profile_ad) < size: kwdata = { "num": size, "city": city, "category": second_category, "tag": "_".join([x[0] for x in tmp_list_sample]), "weight": [x[1] for x in tmp_list_sample], "days": 270, 'cut': 1000 } user_profile_ad.extend(fetchKwData(kwdata)) end = datetime.datetime.now() except Exception as e: log.error("获取用户画像失败, {}".format(e)) user_profile_ad = [] for info_tuple in user_profile_ad: k, v = info_tuple['ad_id'], info_tuple['score'] result_list.append(({ "rec_id": k, "sim": v, "rec_name": "user_profile" })) return result_list