class RedisCache: def __init__(self, host=None, port=None, db=None, password=None, key_prefix=""): dotenv_location = dotenv.find_dotenv() if len(dotenv_location): logging.info(f'Loading dotenv config from {dotenv_location}') dotenv.load_dotenv(dotenv_location) else: logging.warning("Failed to load dotenv config!") self.key_prefix = key_prefix self.host = host or os.environ.get("REDIS_HOST", "localhost") self.port = port or int(os.environ.get("REDIS_PORT", 6379)) self.db = db or int(os.environ.get("REDIS_DB", 0)) self.password = password or os.environ.get("REDIS_PASSWORD", "") self.redis_conn = StrictRedis(host=self.host, port=self.port, db=self.db, password=self.password) # def query_results(self, key_pattern: str) -> Tuple[List[ScheduledResult], List[str]]: # result_list = [] # keys = [] # with self.make_client() as c: # for key in c.scan_iter(key_pattern): # result_bytes = c.get(key) # if result_bytes: # result_list.append(ScheduledResult.loads(result_bytes)) # keys.append(key) # return result_list, keys # def read_results(self, solver: SolveStrategy, cost_file: str) -> Tuple[List[ScheduledResult], List[str]]: # cost_file = cost_file if cost_file is not None else "flops" # key_pattern = self.join(self.key_prefix, solver.value, SolveStrategy.get_version(solver), cost_file + "*") # print("key pattern", key_pattern) # return self.query_results(key_pattern) def read_result(self, cache_key: RedisCacheKey, ilp_time_limit: int = -1) -> Optional[ScheduledResult]: key = cache_key.key(ilp_time_limit) result_bytes = self.redis_conn.get(key) if result_bytes: res = ScheduledResult.loads(result_bytes) if res.solve_strategy == SolveStrategy.OPTIMAL_ILP_GC: if res.ilp_aux_data is not None and (res.ilp_aux_data.ilp_time_limit >= ilp_time_limit): return res elif res.schedule_aux_data is not None: return res return None def write_result(self, key: RedisCacheKey, result: ScheduledResult, ilp_time_limit: int = -1): return self.redis_conn.set(key.key(ilp_time_limit), result.dumps()) def __del__(self): self.redis_conn.close()
def expire_uniformly(expire_in, match="*"): redis_client = StrictRedis(host=getenv("RedisURL"), port=int(getenv("RedisPort")), db=0, password=getenv("RedisKey"), ssl=True, ssl_ca_certs=certifi.where()) for key in redis_client.scan_iter(match): redis_client.expire(key, randint(1, expire_in)) redis_client.close() return True
class RedisDBManager(object): """ redis数据库操作 """ def __init__(self, host): self._host = host self.db_connect = None def __enter__(self): self.db_connect = StrictRedis(host=self._host, port=6379, db=0) return self.db_connect def __exit__(self, exc_type, exc_val, exc_tb): if self.db_connect: self.db_connect.close()
def __init__(self): rhost = settings.REDIS_HOST rport = settings.REDIS_PORT rpassword = settings.REDIS_PASSWORD pool = ConnectionPool(host=rhost, port=rport, password=rpassword) redis = StrictRedis(connection_pool=pool) self.urls = list(redis.smembers("lpushed_shop_url")) print("urls number is",len(self.urls)) redis.close() self.host = settings.DB_HOST self.user = settings.DB_USER self.password = settings.DB_PASSWORD self.database = settings.DB_DATABASE self.sum=0 threading.Thread.__init__(self) self.lock = threading.Lock()
def transactions_count_per_minute(): try: redis_db = StrictRedis(host="localhost", port=6379, db=0, decode_responses=True) keys = redis_db.keys() values = redis_db.mget(keys) print(values) resp = jsonify(values) resp.status_code = 200 return resp except Exception as e: print(e) finally: redis_db.close()
def load_area_spu_to_redis(self): self._logger.info('开始加载小区数据到redis...') redis_conf = config_dict['redis'] redis_conn = StrictRedis(**redis_conf) pl = redis_conn.pipeline() flag_key = 'area_code_refresh_flag' # if redis_conn.exists(flag_key): # self._logger.info('已有小区数据无需预加载') # return redis_conn.setex(flag_key, 3600, 1) start_time = time.time() sqyn_mysql_conf = config_dict['mysql_sqyn'] sqyn_conn = connect(**sqyn_mysql_conf) with sqyn_conn.cursor() as cursor: # 获取全量商品根据销量的排序 select_sql = ''' SELECT scope.area_code, scope.spu_code FROM cb_goods_spu_for_filter AS filter INNER JOIN cb_goods_scope AS scope ON filter.spu_code = scope.spu_code WHERE filter.goods_status = 1 AND filter.store_status = 1 AND filter.spu_name NOT REGEXP "链接|差价" ORDER BY filter.sale_month_count DESC ''' cursor.execute(select_sql) area_spu_dict = {} for area, spu in cursor.fetchall(): area_key = area + '_A' spu_li = area_spu_dict.setdefault(area_key, []) spu_li.append(spu) for area_key, spu_li in area_spu_dict.items(): self._logger.info(f'小区{area_key}下有{len(spu_li)}个商品') # 挨个地区更新,先删除再push pl.delete(area_key) pl.rpush(area_key, *spu_li) pl.execute() self._logger.info('加载小区商品到redis完成,耗时:' + str(time.time() - start_time)) pl.close() sqyn_conn.close() redis_conn.close()
def get_rss_link_db_key(): """ 获取RSSSubscribeLink下的所有rss标注,即keys :return: [rss_name...] """ global redis try: redis = StrictRedis(host=host, port=port, db=db, password=password) keys = [ bytes2string(item) for item in redis.hkeys(string2bytes(rss_link_db)) ] redis.close() return keys except: print("get_rss_link_db_key throw exception, please check it.") if redis: redis.close() return []
def get_rss_link(key): """ 获取RSSSubscribeLink下具体rss_link :param key: rss的作者,即key :return: key对应的rss_link """ global redis try: redis = StrictRedis(host=host, port=port, db=db, password=password) obj = bytes2string(redis.hget(rss_link_db, string2bytes(key))) obj = json.loads(obj) # rss_link = { # "author":key, # "url":obj['url'] # } return obj except: print("get_rss_link_db_key throw exception, please check it.") if redis: redis.close() return {}
def set_rss_content(author, content): """ 以author为key更新订阅解析对象content到redis :param author: :param content: :return: """ global redis try: redis = StrictRedis(host=host, port=port, db=db, password=password) redis.hset(rss_content_db, author, json.dumps(content)) redis.close() return True except: print("set_rss_content %s %s throw exception, please check it." % (author, content)) if redis: redis.close() else: pass return False
def get_rss_content(author): """ 获取订阅内容库下author的内容 :param author: :return: """ global redis try: redis = StrictRedis(host=host, port=port, db=db, password=password) str = bytes2string(redis.hget(rss_content_db, string2bytes(author))) content = json.loads(str) # rss_link = { # "author":key, # "url":obj['url'] # } return content except: print("get_rss_link_db_key throw exception, please check it.") if redis: redis.close() return {}
def insert_rss_link(name, url): """ 向RSSSubscribeLink插入标注和rss链接 :param name: rss标注 :param url: rss链接 :return: True / False """ global redis try: redis = StrictRedis(host=host, port=port, db=db, password=password) rss_link = {"author": name, "url": url} redis.hset(rss_link_db, rss_link["author"], json.dumps(rss_link)) redis.close() return True except: print("insert_rss_link %s %s throw exception, please check it." % (name, url)) if redis: redis.close() else: pass return False
def on_message(self, basic_deliver, body): self.main_consumer_obj.logger.info(body) if body['funcName'] != 'updateShopArea': return self.main_consumer_obj.acknowledge_message( basic_deliver.delivery_tag) redis_conf = config_dict['redis'] redis_conf['db'] = int(redis_conf['db']) redis_conn = StrictRedis(**redis_conf) pl = redis_conn.pipeline() # 考虑到可能会有小区不存在mysql的情况所以选用insert语句 insert_sql = ''' INSERT INTO cb_area_shop_list( area_code, shop_code_list ) VALUES(%s, %s) ON DUPLICATE KEY UPDATE shop_code_list = VALUES(shop_code_list) ''' insert_param_li = [] for shop_code, area_code_li in body['data'].items(): for area_code in area_code_li: url = ( config_dict['spring_cloud']['url'] + '/xwj-commerce-shop/shopManage/getShopListByArea?areaCode={}&flag=4' ) # 线上用 # https: // testxwj.juhaolian.cn/xwj-commerce-shop/shopManage/getShopListByArea?areaCode={} # url = 'http://10.18.222.105:6200/shopManage/getShopListByArea?areaCode={}' # 联调用 res = requests.get(url.format(area_code)) data = json.loads(res.text)['data'] shop_code_list = [shop_info['shopCode'] for shop_info in data] shop_count = len(shop_code_list) self.main_consumer_obj.logger.info( '给小区{}更新,获取到{}家商铺,商铺列表:{}'.format(area_code, shop_count, shop_code_list)) # 无论什么操作都是查询接口然后批量更新mysql insert_param_li.append((area_code, ','.join(shop_code_list))) # 以下是对redis的操作 area_key = area_code + '_shop' # TODO: 下面这两个判断移动到循环外以提高效率 # 删除店铺 if int(body['cmd']) == 0: pl.lrem(area_key, 0, shop_code) # 添加店铺 else: pl.delete(area_key) if shop_count: pl.rpush(area_key, *shop_code_list) sqyn_conn = self.get_mysql_conn('mysql_sqyn') with sqyn_conn.cursor() as cur: try: cur.executemany(insert_sql, insert_param_li) sqyn_conn.commit() except: self.main_consumer_obj.logger.exception( '商铺可见范围mysql变更失败, sendTime: {}'.format(body['sendTime'])) sqyn_conn.rollback() else: self.main_consumer_obj.logger.info( '商铺可见范围mysql变更成功, sendTime: {}'.format(body['sendTime'])) finally: sqyn_conn.close() try: pl.execute() except: self.main_consumer_obj.logger.exception( '商铺可见范围redis变更失败, sendTime: {}'.format(body['sendTime'])) else: self.main_consumer_obj.logger.info( '商铺可见范围redis变更成功, sendTime: {}'.format(body['sendTime'])) finally: pl.close() redis_conn.close() self.main_consumer_obj.acknowledge_message(basic_deliver.delivery_tag)
class RedisCache(Cache): def __init__(self, maxsize, client=None, host=None, port=None, password=None, db=None, ttl=15 * 60, clear_on_exit=False, key_prefix='RedisCache'): Cache.__init__(self, maxsize, None) self.client_ = client self.host = host or redis_config.HOST self.port = port or redis_config.PORT self.password = password or redis_config.PASSWORD self.db = db or redis_config.DB self.ttl = ttl self.key_prefix = key_prefix if clear_on_exit: atexit.register(self.clear()) @property def client(self): if self.client_ is None: self.client_ = StrictRedis(host=self.host, port=self.port, password=self.password, db=self.db) return self.client_ def close(self): try: self.client_.close() except: pass self.client_ = None def __setitem__(self, key, value): try: self.set(f'{self.key_prefix}_{key}', value) except: pass def set(self, key, value): ttl = self.ttl value = pickle.dumps(value) self.client.set(key, value, ex=ttl) def __getitem__(self, key): try: if not self.client.exists(f'{self.key_prefix}_{key}'): raise KeyError() else: result = self.client.get(f'{self.key_prefix}_{key}') return pickle.loads(result) except Exception as e: raise e def delete_keys(self, items): pipeline = self.client.pipeline() for item in items: pipeline.delete(item) pipeline.execute() def __delitem__(self, key): try: self.delete_keys([key]) except: pass def clear_all_cache(self): match = '{}*'.format(self.key_prefix) keys = [] for key in self.client.scan_iter(match, count=100): keys.append(key) if len(keys) >= 100: self.delete_keys(keys) keys = [] time.sleep(0.01) if len(keys) > 0: self.delete_keys(keys) def clear(self): atexit.register(self.clear_all_cache)
# coding=utf-8 # 创建redis连接 from redis import ConnectionPool, StrictRedis, Redis if __name__ == '__main__': # rc = Redis(host='127.0.0.1', port=6379) # 直接连接 # pool = ConnectionPool(host='127.0.0.1', port=6379) # 连接池 # rc = StrictRedis(connection_pool=pool) pool = ConnectionPool.from_url('redis://127.0.0.1:6379') # url rc = StrictRedis(connection_pool=pool) rc.flushdb() rc.set('hao', 123) print rc.get('hao') rc.flushdb() rc.close()
def run(self): redis_conf = self.__conf['redis'] redis_conf['db'] = int(redis_conf['db']) redis_conn = StrictRedis(**redis_conf) # 在导入商铺数据到redis前存入一个标志表明正在初始化,以防重复执行 # loading_shop_flag_key = 'loading_shop_flag' # redis_conn.set(loading_shop_flag_key, 1) pl = redis_conn.pipeline() self.__logger.info('开始同步小区下商铺数据') sqyn_sql_conn = self.__conf['mysql_sqyn'] mysql_conn = connect(**sqyn_sql_conn) # 查出所有小区 select_sql = ''' SELECT DISTINCT area_code FROM cb_goods_scope ''' with mysql_conn.cursor() as cur: cur.execute(select_sql) area_list = [row_tuple[0] for row_tuple in cur.fetchall()] # 循环areaCode请求其他接口获得小区下可见的shopCode url = self.__conf['spring_cloud'][ 'url'] + '/xwj-commerce-shop/shopManage/getShopListByArea?areaCode={}&flag=4' # 线上用 # url = 'http://10.18.222.105:6200/shopManage/getShopListByArea?areaCode={}&flag=4' # 联调用 insert_sql = ''' INSERT INTO cb_area_shop_list( area_code, shop_code_list ) VALUES(%s, %s) ON DUPLICATE KEY UPDATE shop_code_list = VALUES(shop_code_list) ''' insert_param_li = [] for area_code in area_list: res = requests.get(url.format(area_code)) data = json.loads(res.text)['data'] shop_code_list = [shop_info['shopCode'] for shop_info in data] shop_count = len(shop_code_list) self.__logger.info('小区{}获取到{}家商铺'.format(area_code, shop_count)) insert_param_li.append((area_code, ','.join(shop_code_list))) pl.delete(area_code + '_shop') if shop_count: pl.rpush(area_code + '_shop', *shop_code_list) # 导入完毕删掉标志位的key # pl.delete(loading_shop_flag_key) with mysql_conn.cursor() as cur: try: cur.executemany(insert_sql, insert_param_li) mysql_conn.commit() except: self.__logger.exception('小区商铺数据同步到mysql异常') mysql_conn.rollback() else: self.__logger.info('小区商铺数据同步到mysql成功') finally: mysql_conn.close() try: pl.execute() except: self.__logger.exception('小区商铺数据同步到redis异常') else: self.__logger.info('小区商铺数据同步到redis成功') finally: pl.close() redis_conn.close()
class PFUI_Firewall(Service): """ Main PFUI Firewall Service Class. """ def __init__(self, *args, **kwargs): """ Load Yaml configuration and Init logger """ super(PFUI_Firewall, self).__init__(*args, **kwargs) self.threads = [] self.soc = None self.db = None # Load YAML Configuration try: self.cfg = safe_load(open(CONFIG_LOCATION)) if "LOGGING" not in self.cfg: self.cfg['LOGGING'] = True if "LOG_LEVEL" not in self.cfg: self.cfg['LOG_LEVEL'] = "DEBUG" if "SOCKET_LISTEN" not in self.cfg: self.cfg['SOCKET_LISTEN'] = "0.0.0.0" if "SOCKET_PORT" not in self.cfg: self.cfg['SOCKET_PORT'] = 10001 if "SOCKET_TIMEOUT" not in self.cfg: self.cfg['SOCKET_TIMEOUT'] = 2 if "SOCKET_BUFFER" not in self.cfg: self.cfg['SOCKET_BUFFER'] = 1024 if "SOCKET_BACKLOG" not in self.cfg: self.cfg['SOCKET_BACKLOG'] = 5 if "REDIS_HOST" not in self.cfg: self.cfg['REDIS_HOST'] = "127.0.0.1" if "REDIS_PORT" not in self.cfg: self.cfg['REDIS_PORT'] = 6379 if "REDIS_DB" not in self.cfg: self.cfg['REDIS_DB'] = 1024 if "SCAN_PERIOD" not in self.cfg: self.cfg['SCAN_PERIOD'] = 60 if "TTL_MULTIPLIER" not in self.cfg: self.cfg['TTL_MULTIPLIER'] = 1 if "CTL" not in self.cfg: self.cfg['CTL'] = "IOCTL" if "DEVPF" not in self.cfg: self.cfg['DEVPF'] = "/dev/pf" if "AF4_TABLE" not in self.cfg: print("AF4_TABLE not found in YAML Config File. Exiting.") sys.exit(2) if "AF4_FILE" not in self.cfg: print("AF4_FILE not found in YAML Config File. Exiting.") sys.exit(2) if "AF6_TABLE" not in self.cfg: print("AF6_TABLE not found in YAML Config File. Exiting.") sys.exit(2) if "AF6_FILE" not in self.cfg: print("AF6_FILE not found in YAML Config File. Exiting.") sys.exit(2) except Exception as e: print("YAML Config File not found or cannot load. {}".format(e)) sys.exit(2) # Init Logging self.logger.addHandler( SysLogHandler(address=find_syslog(), facility=SysLogHandler.LOG_DAEMON)) if self.cfg['LOG_LEVEL'] == 'DEBUG' or self.cfg['LOG_LEVEL'] == 'INFO': self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.ERROR) def run(self): """ Connect to Redis, start sync threads, and watch socket (spawn Receiver each session) (PFUI_Unbound)). """ # Connect to Redis DB try: redisdb = (str(self.cfg['REDIS_HOST']), int(self.cfg['REDIS_PORT']), int(self.cfg['REDIS_DB'])) self.db = StrictRedis(*redisdb) except Exception as e: self.logger.error( "PFUIFW: Failed to connect to Redis DB. {}".format(e)) sys.exit(3) # Start background scan and sync threads try: af4_thread = ScanSync(logger=self.logger, cfg=self.cfg, db=self.db, af=socket.AF_INET, table=self.cfg['AF4_TABLE'], file=self.cfg['AF4_FILE']) af4_thread.start() af6_thread = ScanSync(logger=self.logger, cfg=self.cfg, db=self.db, af=socket.AF_INET6, table=self.cfg['AF6_TABLE'], file=self.cfg['AF6_FILE']) af6_thread.start() self.threads.append(af4_thread) self.threads.append(af6_thread) except Exception as e: self.logger.error("PFUIFW: Scanning thread failed. {}".format(e)) sys.exit(4) self.logger.info("PFUIFW: [+] PFUI_Firewall Service Started.") # Listen for connections if self.cfg['SOCKET_PROTO'] == "UDP": self.soc = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # UDP Datagram Socket self.soc.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, True) self.soc.setsockopt(socket.SOL_SOCKET, socket.SO_SNDBUF, 36) # 'ACK' = 36bytes self.soc.settimeout(self.cfg['SOCKET_TIMEOUT'] ) # accept() & recv() blocking timeouts self.soc.bind((self.cfg['SOCKET_LISTEN'], self.cfg['SOCKET_PORT'])) while not self.got_sigterm(): # Watch Socket until Signal try: dgram, (ip, port) = self.soc.recvfrom(1400) try: Thread(target=self.receiver_thread, kwargs={ "proto": "UDP", "dgram": dgram, "ip": ip, "port": port }).start() except Exception as e: self.logger.error( "PFUIFW: Error starting receiver thread: {}". format(e)) except socket.timeout: continue except Exception as e: self.logger.error( "PFUIFW: UDP socket exception {}".format(e)) continue elif self.cfg['SOCKET_PROTO'] == "TCP": self.soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # TCP Stream Socket self.soc.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, True) # Disable Nagle self.soc.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, True) # Fast Listen Socket reuse self.soc.setsockopt(socket.SOL_SOCKET, socket.SO_SNDBUF, 0) # Zero-size send Buffer (Send immediately) self.soc.settimeout(self.cfg['SOCKET_TIMEOUT'] ) # accept() & recv() blocking timeouts self.soc.bind((self.cfg['SOCKET_LISTEN'], self.cfg['SOCKET_PORT'])) self.soc.listen(self.cfg['SOCKET_BACKLOG']) while not self.got_sigterm(): # Watch Socket until Signal try: conn, (ip, port) = self.soc.accept() try: Thread(target=self.receiver_thread, kwargs={ "proto": "TCP", "conn": conn, "ip": ip, "port": port }).start() except Exception as e: self.logger.error( "PFUIFW: Error starting receiver thread: {}". format(e)) except socket.timeout: continue # Shut down for t in self.threads: t.join() self.db.close() self.logger.info("PFUIFW: [-] PFUI_Firewall Service Stopped.") def receiver_thread(self, proto, conn=None, dgram=None, ip=None, port=None): """ Receive all data, update PF Table, and update Redis DB entry. Data Structure: {'AF4': [{"ip": ipv4_addr, "ttl": ip_ttl }], 'AF6': [{"ip": ipv6_addr, "ttl": ip_ttl }]} For performance, we want entire message sent in a single segment, and a small socket buffer (packet size). Ensure SOCKET_BUFFER is small, but large enough for maximum expected record size. """ def disconnect(proto, soc, conn): if proto == "UDP": try: soc.sendto(b"ACK", (ip, port)) except: pass # PFUI_Unbound may not be waiting (non-blocking) elif proto == "TCP": try: conn.sendall(b"ACK") except: pass # PFUI_Unbound may have already disconnected (non-blocking) conn.close() if self.cfg['LOGGING']: stime = time() if proto == "UDP": try: data = loads(dgram) except Exception as e: self.logger.error( "PFUIFW: Failed to decode datagram {}:{} {} {}".format( ip, port, dgram, e)) disconnect(proto, self.soc, conn) return elif proto == "TCP": chunks, stream = [], b"" while True: # Receive all TCP stream chunks and build data try: payload = conn.recv(int(self.cfg['SOCKET_BUFFER'])) if payload: chunks.append(payload) stream = b''.join(chunks) if stream[-3:] == b"EOT": # End of Transmission try: data = loads(stream[:-3]) break except Exception as e: self.logger.error( "PFUIFW: Failed to decode stream {}:{} {} {}" .format(ip, port, stream, e)) disconnect(proto, self.soc, conn) return else: self.logger.error("PFUIFW: None payload {}:{}".format( ip, port)) disconnect(proto, self.soc, conn) return except socket.timeout: self.logger.error( "PFUIFW: Socket recv timeout {}:{}".format(ip, port)) break if isinstance(data, str): try: data = ast.literal_eval(data) except Exception as e: self.logger.error("PFUIFW: Failed to parse {} {} {}".format( type(data), data, e)) disconnect(proto, self.soc, conn) return if self.cfg['LOGGING']: ntime = time() self.logger.info("PFUIFW: Received {} from {}:{} ({})".format( data, ip, port, proto)) # Guard Statements if isinstance(data, dict): try: af4_data = [(rr['ip'], rr['ttl']) for rr in data['AF4'] if is_ipv4(rr['ip']) and rr['ttl']] af6_data = [(rr['ip'].lower(), rr['ttl']) for rr in data['AF6'] if is_ipv6(rr['ip']) and rr['ttl']] except Exception as e: self.logger.error( "PFUIFW: Cannot extract meta from data {} {} {}".format( type(data), data, e)) disconnect(proto, self.soc, conn) return else: self.logger.error("PFUIFW: Invalid datatype received {} {}".format( type(data), data)) disconnect(proto, self.soc, conn) return if self.cfg['LOGGING']: vtime = time() # Update PF Tables if af4_data: table_push(logger=self.logger, log=self.cfg['LOGGING'], cfg=self.cfg, af=socket.AF_INET, table=self.cfg['AF4_TABLE'], ip_list=[ip for ip, _ in af4_data]) if af6_data: table_push(logger=self.logger, log=self.cfg['LOGGING'], cfg=self.cfg, af=socket.AF_INET6, table=self.cfg['AF6_TABLE'], ip_list=[ip for ip, _ in af6_data]) if self.cfg['LOGGING']: ttime = time() # Unblock DNS Client disconnect(proto, self.soc, conn) if self.cfg['LOGGING']: n1time = time() # Update Redis DB if af4_data: # Always update Redis DB db_push(logger=self.logger, log=self.cfg['LOGGING'], db=self.db, table=self.cfg['AF4_TABLE'], data=af4_data) if af6_data: db_push(logger=self.logger, log=self.cfg['LOGGING'], db=self.db, table=self.cfg['AF6_TABLE'], data=af6_data) if self.cfg['LOGGING']: rtime = time() # Update PF Table Persist Files if af4_data: # Update if new records file_push(logger=self.logger, log=self.cfg['LOGGING'], file=self.cfg['AF4_FILE'], ip_list=[ip for ip, _ in af4_data]) if af6_data: file_push(logger=self.logger, log=self.cfg['LOGGING'], file=self.cfg['AF6_FILE'], ip_list=[ip for ip, _ in af6_data]) # Print statistics if self.cfg['LOGGING']: etime = time() tntime = (ntime - stime) * (10**6) # Network Receive Time tvtime = (vtime - ntime) * (10**6) # Data Valid Time tptime = (ttime - vtime) * (10**6) # PF Table Write Time tn1time = (n1time - ttime) * (10**6) # Network ACK Time trtime = (rtime - n1time) * (10**6) # Redis Write Time tftime = (etime - rtime) * (10**6) # File Write Time ttime = (etime - stime) * (10**6) # Total Time self.logger.info( "PFUIFW: Network Latency {0:.2f} microsecs".format(tntime)) self.logger.info( "PFUIFW: Data Valid Latency {0:.2f} microsecs".format(tvtime)) self.logger.info( "PFUIFW: PF Table Latency {0:.2f} microsecs".format(tptime)) self.logger.info( "PFUIFW: ACK Latency {0:.2f} microsecs".format(tn1time)) self.logger.info( "PFUIFW: Redis Latency {0:.2f} microsecs".format(trtime)) self.logger.info( "PFUIFW: File Latency {0:.2f} microsecs".format(tftime)) self.logger.info( "PFUIFW: Total Latency {0:.2f} microsecs".format(ttime))
class RankPipeline: def __init__(self, db_host, db_post, db_name, db_doc, redis_host, redis_port): self.db_host = db_host self.db_post = db_post self.db_name = db_name self.db_doc = db_doc self.redis_host = redis_host self.redis_port = redis_port @classmethod def from_crawler(cls, crawler): return cls(crawler.settings.get('MONGODB_HOST'), crawler.settings.get('MONGODB_POST'), crawler.settings.get('MONGODB_DBNAME'), crawler.settings.get('MONGODB_DOCNAME'), crawler.settings.get('REDIS_HOST'), crawler.settings.get('REDIS_PORT')) def open_spider(self, spider): self.db_client = MongoClient(host=self.db_host, port=self.db_post) self.db = self.db_client[self.db_name] self.b_post = self.db[self.db_doc] self.redis_client = StrictRedis(host=self.redis_host, port=self.redis_port) def close_spider(self, spider): self.db_client.close() self.redis_client.close() def process_item(self, item, spider): """ 处理spider传过来的item,当item是BilibiliItem或者ListItem的时候,则在mongodb中插入这个item, 并构造弹幕的url,传给弹幕爬虫的key :param item: spider传过来的item实例 :param spider: spider :return: item """ if isinstance(item, (BilibiliItem, ListItem)): # logger.info(item) self.b_post.insert_one(ItemAdapter(item).asdict()) # 插入后构造一个弹幕的url,并写到redis中,弹幕爬虫启动 # if isinstance(item, BilibiliItem): # 如果item是BIlibiliItem的实例,说明是普通Up主上传的视频,分集信息存在item的pages里 # pages = item['pages'] # else: # 是B站上传的视频,分集信息存在item的ep_list里 # pages = item['ep_list'] # for page in pages: # 构造每一个分集的danmu_url,并把url插入到redis的list中,以供弹幕爬虫调用 # danmu_url = spider.settings['DANMU_URL'].format(oid=page['cid'], pid=page['aid']) # self.redis_client.rpush(spider.settings.get('REDIS_DANMAKU_KEY'), danmu_url) elif isinstance(item, PageItem): # logger.info(item) rank_date = item.pop('rank_date') rank_type = item.pop('rank_type') self.b_post.update_one( { 'aid': item['aid'], 'rank_date': rank_date, 'rank_type': rank_type }, {'$push': { 'pages': ItemAdapter(item).asdict() }}) elif isinstance(item, EpItem): rank_date = item.pop('rank_date') rank_type = item.pop('rank_type') self.b_post.update_one( { 'season_id': item['season_id'], 'rank_date': rank_date, 'rank_type': rank_type }, {'$push': { 'ep_list': ItemAdapter(item).asdict() }}) def _insert(self, item, out): if isinstance(item, (BilibiliItem, ListItem)): self.b_post.insert(ItemAdapter(item).asdict()) # 插入后构造一个弹幕的url,并写到redis中,弹幕爬虫启动 # if isinstance(item, BilibiliItem): # 如果item是BIlibiliItem的实例,说明是普通Up主上传的视频,分集信息存在item的pages里 # pages = item['pages'] # else: # 是B站上传的视频,分集信息存在item的ep_list里 # pages = item['ep_list'] # for page in pages: # 构造每一个分集的danmu_url,并把url插入到redis的list中,以供弹幕爬虫调用 # danmu_url = spider.settings['DANMU_URL'].format(oid=page['cid'], pid=page['aid']) # self.redis_client.rpush(spider.settings.get('REDIS_DANMAKU_KEY'), danmu_url) elif isinstance(item, PageItem): logger.info('_insert ' + item) rank_date = ItemAdapter(item).pop('rank_date') rank_type = item.pop('rank_type') self.b_post.update_many( { 'aid': item['aid'], 'rank_date': rank_date, 'rank_type': rank_type }, {'$push': { 'pages': ItemAdapter(item).asdict() }}) elif isinstance(item, EpItem): rank_date = item.pop('rank_date') rank_type = item.pop('rank_type') self.b_post.update_many( { 'season_id': item['season_id'], 'rank_date': rank_date, 'rank_type': rank_type }, {'$push': { 'ep_list': ItemAdapter(item).asdict() }}) reactor.callFromThread(out.callback, item)
# 使用默认方式连接到数据库 redis = StrictRedis(host='localhost', port=6379, db=0) with open("kind_city.json", mode="r", encoding="utf-8") as f: datas = json.loads(f.read()) url1 = "https://s.1688.com/company/company_search.htm?" for data in datas: province = data.get("province") city = data.get("city") keywords = data.get("kind") params = { "province": province.encode('GBK'), "city": city.encode('GBK'), "keywords": keywords.encode('GBK'), "n": "y", "filt": "y", } params = parse.urlencode(params) url = f"https://s.1688.com/company/company_search.htm?{params}" print(url) # 插入数据 redis.lpush("alibaba:start_urls", url) # break print(len(datas)) # 关闭连接 redis.close()
class RedisAPI: """ API base on redis. """ def __init__(self, baseConfig): self.base = baseConfig self.redis = StrictRedis( connection_pool=ConnectionPool(host=self.base.host, port=self.base.port, db=self.base.db, password=self.base.password, decode_responses=True)) # 删除临时键和关闭redis服务 def close(self): """ delete keys and close redis server. :return: """ for item in self.redis.keys(): for i in [ self.base.TaskChannelHost, self.base.FailChannelHost, self.base.SaveBuiltinHost ]: if i in item: break else: self.redis.delete(item) else: self.redis.close() return 'success close redis.' # 初始化 创建key: DeviceTable 并设置全部设备为0: 暂停状态 def initial_device(self): """ Initial all device status. :return: """ for item in range(1, self.base.DeviceNumb + 1): self.redis.hset(name=self.base.DeviceKeyName, key=f'device{item}', value=0) else: return 'success initial all device.' # 探测所有工作设备的状态 def detect_all(self, *args, **kwargs): """ detect all device :return: state of all device """ for item in self.redis.hgetall(name=self.base.DeviceKeyName): self.redis.hset(name=self.base.DeviceKeyName, key=item, value=self.base.DeviceTest) else: sleep(self.base.MonitorWait) return self.redis.hgetall(name=self.base.DeviceKeyName) # 探测某一个工作设备的状态 def detect_one(self, device, *args, **kwargs): """ detect one device, return one device state :param device: 节点号 :return: state of the device """ if f'device{device}' not in self.redis.hgetall( name=self.base.DeviceKeyName): return f'error! not exist device{device}' self.redis.hset(name=self.base.DeviceKeyName, key=f'device{device}', value=self.base.DeviceTest) sleep(self.base.MonitorWait) return { f'device{device}': self.redis.hget(self.base.DeviceKeyName, f'device{device}') } # 增加一个工作设备 def add_device(self, *args, **kwargs): """ create a work device :return: """ self.base.DeviceNumb += 1 self.redis.hset(name=self.base.DeviceKeyName, key=f'device{self.base.DeviceNumb}', value=self.base.DevicePause) return 'success add a new device.' # 删除一个工作设备 def del_device(self, *args, **kwargs): """ delete a work device :return: """ if self.redis.hlen(name=self.base.DeviceKeyName) <= 0: return 'DeviceTable have no device.' self.redis.hget(name=self.base.DeviceKeyName, key=f'device{self.base.DeviceNumb}') self.base.DeviceNumb -= 1 return 'success delete a device.' # 暂停所有工作设备 def pause_all(self, *args, **kwargs): """ pause all work node :return: """ for item in self.redis.hgetall(name=self.base.DeviceKeyName): self.redis.hset(name=self.base.DeviceKeyName, key=item, value=self.base.DevicePause) else: return 'success pause all device.' # 暂停某一个工作设备 def pause_one(self, device, *args, **kwargs): """ pause one work node :param device: device ID :return: """ if f'device{device}' not in self.redis.hgetall( name=self.base.DeviceKeyName): return f'error! not exist device{device}' self.redis.hset(name=self.base.DeviceKeyName, key=f'device{device}', value=self.base.DevicePause) return f'success pause device{device}.' # 开启所有工作设备 def start_all(self, *args, **kwargs): """ run all work node :return: """ for item in self.redis.hgetall(name=self.base.DeviceKeyName): self.redis.hset(name=self.base.DeviceKeyName, key=item, value=self.base.DeviceStart) else: return 'success start all device.' # 开启某一个工作设备 def start_one(self, device, *args, **kwargs): """ run one work node :param device: device ID :return: """ if f'device{device}' not in self.redis.hgetall( name=self.base.DeviceKeyName): return f'error! not exist device{device}' self.redis.hset(name=self.base.DeviceKeyName, key=f'device{device}', value=self.base.DeviceStart) return f'success start device{device}.' # 离线所有工作设备 def stop_all(self, *args, **kwargs): """ run all work node :return: """ for item in self.redis.hgetall(name=self.base.DeviceKeyName): self.redis.hset(name=self.base.DeviceKeyName, key=item, value=self.base.DeviceOffLine) else: return 'success set all device off-line.' # 离线某一个工作设备 def stop_one(self, device, *args, **kwargs): """ run one work node :param device: device ID :return: """ if f'device{device}' not in self.redis.hgetall( name=self.base.DeviceKeyName): return f'error! not exist device{device}' self.redis.hset(name=self.base.DeviceKeyName, key=f'device{device}', value=self.base.DeviceOffLine) return f'success set device{device} off-line.' # 重置全部任务通道的最大存在数 def reset_task_max(self, numb, *args, **kwargs): """ reset max tasks channel number :param numb: tasks number :return: """ try: # max size can not negative if int(numb) > 0: self.base.TaskChannelMax = int(numb) return f'success reset task max: {numb}' else: return 0 # size must be number, not word except ValueError: return 0 # 探测某一失败任务通道的任务数量 def detect_fail_task_channel(self, channel, *args, **kwargs): """ detect the number of fail task in fail channel :return: """ return { f'{self.base.FailChannelHost}{channel}': self.redis.llen(f'{self.base.FailChannelHost}{channel}') } # 重载所有的失败通道任务至对应的任务通道 def reload_all_fail_channel(self, *arg, **kwargs): """ reload all task from all fail channel to their task channel. :return: """ for item in self.redis.keys(): if self.base.FailChannelHost in item: number = self.redis.llen(item) for i in range(number): task = self.redis.lpop(item) self.redis.rpush(f'{self.base.TaskChannelHost}{item[-1]}', task) else: return 'has been reload all task from all fail channel' # 重载某一失败通道所有任务至对应的任务通道 def reload_one_fail_channel_all_task(self, channel, *args, **kwargs): """ reload all task in the one fail channel :param channel: :return: """ try: channel = int(channel) if channel < 0: return 0 except ValueError: return 0 number = self.redis.llen(f'{self.base.FailChannelHost}{channel}') for i in range(number): task = self.redis.lpop(f'{self.base.FailChannelHost}{channel}') self.redis.rpush(f'{self.base.TaskChannelHost}{channel}', task) else: return f'has been reload all task from fail channel{channel}' # 重载某一失败通道非所有任务至对应的任务通道 def reload_one_fail_channel_one_task(self, channel, task_id, *args, **kwargs): """ reload one task in the one fail channel :param channel: fail channel id :param task_id: fail task id :return: """ try: channel = int(channel) task_id = int(task_id) if channel < 0: return 0 except ValueError: return 0 task = self.redis.lindex(name=f'{self.base.FailChannelHost}{channel}', index=task_id) if task: self.redis.rpush(f'{self.base.TaskChannelHost}{channel}', task) return f'has been reload task{task_id} from fail channel{channel}' # 删除某一失败队列中的某个任务 def del_one_task_by_fail_channel(self, channel, task_id, *args, **kwargs): """ delete a task from the channel. :param channel: fail channel id :param task_id: fail task id :return: """ try: channel = int(channel) task_id = int(task_id) if channel < 0: return 0 except ValueError: return 0 self.redis.lindex(name=f'{self.base.FailChannelHost}{channel}', index=task_id) return f'has been delete task{task_id} from fail channel{channel}' # 任务分发函数 def AllotMission(self, task, channel): """ Terminal of Allot mission. :param task: type of generator or iterator :param channel: task channel number :return: """ if not isinstance(task, (Iterator, Generator)): self.base.StopTaskSystem() raise TypeError('tasks must be iterator or generator') if self.redis.llen(f'{self.base.TaskChannelHost}{channel}' ) < self.base.TaskChannelMax: try: task = next(task) self.redis.rpush(f'{self.base.TaskChannelHost}{channel}', task) except StopIteration: self.base.StopTaskAllot() print(f'\nall task has put in task channel{channel}.')
class Manager: """ 工作节点的状态: -1离线 0暂停 1正常 工作节点应在接受检测前保存上一个状态,在遇到探测信号-1时,应将-1更改为上一状态 管理节点首先初始化工作节点的数目并将工作节点的状态设为0 管理节点属性: WorksNumb: 管理节点的总数 InstructionChannelNumb: 任务通道的总数 TasksChannelMax: 通道最大存在任务数 管理节点命令: MonitorAll向所有工作节点探测当前工作状态 MonitorOne向指定的工作节点探测当前工作状态 PauseAll暂停所有工作节点的工作 PauseOne暂停指定工作节点的工作 StartAll启动所有工作节点的工作 StartOne启动指定工作节点的工作 PauseTasks暂停当前管理节点的任务分发工作 StartTasks开始当前管理节点的任务分发工作 reload_all_fail_tasks重载全部失败任务至任务通道 reload_one_fail_task重载某一失败任务至任务通道 ResetTaskMax重置任务存在最大数 exit、stop中止所有工作节点并关闭管理节点 Example: import OracleSpider manager = OracleSpider.Manager(WorksNumb=10) 创建10个工作监测节点,默认最大任务存在数=50, 任务分发通道=1 tasks = (something)->type iterator 创建一个任务分发生成器, 如果是多任务的,就将任务分发通道改为相应的数 manager = run([tasks]) 将任务生成器放入列表中并开始运行 D:/>resettaskmax 20 更改最大任务存在数为20 D:/>startall 启动所有工作节点 D:/>pauseone 0 暂停0号工作节点 D:/>startone 0 启动0号工作节点 D:/>stop 暂停所有工作节点且关闭管理节点(未完成的任务仍会继续存在缓存缓存当中,下次启动时要对应更改分发任务的规则) """ def __init__(self, WorksNumb, host='localhost', port=6379, db=0, password=None, InstructionChannelNumb=1, TasksChannelMax=50): self.__MyRedis = StrictRedis( connection_pool=ConnectionPool(host=host, port=port, db=db, password=password, decode_responses=True)) self.WorksNumb = [i for i in range(WorksNumb)] # 工作节点编号 self.InstructionChannelNumb = InstructionChannelNumb # 任务通道数 self.TasksChannelMax = TasksChannelMax # 最大存在任务数 self.__ControlStatue = True self.__TaskPause = False self.__TaskStop = False def InitialMonitor(self, *args, **kwargs): """ initial work node state, work node original state is 0(pause) state :return: """ if DeviceKey in self.__MyRedis.keys(): self.__MyRedis.delete(DeviceKey) for item in self.WorksNumb: self.__MyRedis.hset(name=DeviceKey, key=f'device{item}', value=0) def MonitorAll(self, *args, **kwargs): """ detect all device :return: state of all device """ for item in self.WorksNumb: self.__MyRedis.hset(name=DeviceKey, key=f'device{item}', value=-1) else: sleep(MonitorWait) return self.__MyRedis.hgetall(DeviceKey) def MonitorOne(self, device, *args, **kwargs): """ detect one device, return one device state :param device: 节点号 :return: state of the device """ if f'device{device}' not in self.__MyRedis.hgetall(DeviceKey): return f'error! not exist device{device}' self.__MyRedis.hset(name=DeviceKey, key=f'device{device}', value=-1) sleep(MonitorWait) return { f'device{device}': self.__MyRedis.hget(DeviceKey, f'device{device}') } def PauseAll(self, *args, **kwargs): """ pause all work node :return: """ for item in self.WorksNumb: self.__MyRedis.hset(name=DeviceKey, key=f'device{item}', value=0) def PauseOne(self, device, *args, **kwargs): """ pause one work node :param device: device ID :return: """ if f'device{device}' not in self.__MyRedis.hgetall(DeviceKey): return 0 self.__MyRedis.hset(name=DeviceKey, key=f'device{device}', value=0) def StartAll(self, *args, **kwargs): """ run all work node :return: """ for item in self.WorksNumb: self.__MyRedis.hset(name=DeviceKey, key=f'device{item}', value=1) def StartOne(self, device, *args, **kwargs): """ run one work node :param device: device ID :return: """ if f'device{device}' not in self.__MyRedis.hgetall(DeviceKey): return 0 self.__MyRedis.hset(name=DeviceKey, key=f'device{device}', value=1) def ResetTaskMax(self, numb, *args, **kwargs): """ reset max tasks channel number :param numb: tasks number :return: """ self.TasksChannelMax = int(numb) def PauseTasks(self, *args, **kwargs): """ pause allot mission :return: """ self.__TaskPause = True def StartTasks(self, *args, **kwargs): """ start allot mission :return: """ self.__TaskPause = False def MonitorFailChannel(self, *args, **kwargs): """ detect the number of fail channel :return: """ return {FailChannel: self.__MyRedis.llen(FailChannel)} def ReloadAllFailChannel(self, param, *args, **kwargs): """ reload the one fail task to task channel :return: """ FChannel, TChannel = param.split(',') try: TChannel = int(TChannel) if TChannel < 0: return f'can not reload in {TasksChannel}{TChannel}' except ValueError: return f'can not reload in {TasksChannel}{TChannel}' try: FChannel = int(FChannel) if FChannel < 0: return f'can not reload in {FailChannel}{FChannel}' except ValueError: return f'can not reload in {FailChannel}{FChannel}' number = self.__MyRedis.llen(FailChannel) for i in range(number): task = self.__MyRedis.lpop(f'{FailChannel}{FChannel}') self.__MyRedis.rpush(f'{TasksChannel}{TChannel}', task) return 'reload fail task has been done' def ReloadOneFailChannel(self, param, *args, **kwargs): """ reload the one fail task to task channel :return: """ FChannel, TChannel, No = param.split(',') try: TChannel = int(TChannel) if TChannel < 0: return f'can not reload in {TasksChannel}{TChannel}' except ValueError: return f'can not reload in {TasksChannel}{TChannel}' try: FChannel = int(FChannel) if FChannel < 0: return f'can not reload in {FailChannel}{FChannel}' except ValueError: return f'can not reload in {FailChannel}{FChannel}' try: No = int(No) except ValueError: return f'can not get data of index: {No}' task = self.__MyRedis.lindex(f'{FailChannel}{FChannel}', No) if task: self.__MyRedis.rpush(f'{TasksChannel}{TChannel}', task) else: return f'can not get data of index: {No}' def TasksAllot(self, tasks, channel): """ 子线程 通过传入任务,由管理节点分发任务,分发完成关闭Control模块 :param tasks: iterator or generator :param channel: channel ID :return: """ if not (isinstance(tasks, Iterator) or isinstance(tasks, Generator)): self.__ControlStatue = False raise TypeError('tasks must be iterator or generator') while not self.__TaskStop: while not self.__TaskPause: if self.__MyRedis.llen(channel) <= self.TasksChannelMax: try: task = next(tasks) self.__MyRedis.rpush(channel, task) except StopIteration: print('\nallot mission done') return sleep(0.1) sleep(0.5) def Control(self): """ control by keyboard :return: """ CommandList = [ 'detect_all', 'detect_one', 'start_all', 'start_one', 'pause_all', 'pause_one', 'pause_allot', 'start_allot', 'reset_task_max', 'detect_fail_channel', 'reload_all_fail_tasks', 'reload_one_fail_task' ] ApiList = [ self.MonitorAll, self.MonitorOne, self.StartAll, self.StartOne, self.PauseAll, self.PauseOne, self.PauseTasks, self.StartTasks, self.ResetTaskMax, self.MonitorFailChannel, self.ReloadAllFailChannel, self.ReloadOneFailChannel ] Hash = dict(zip(CommandList, ApiList)) while self.__ControlStatue: sleep(0.4) command = input(abspath('').replace('\\', '/') + '>').split(' ') command[0] = command[0].lower() if command[0] in ['stop', 'exit', 'quit']: self.__TaskStop = True self.__TaskPause = True print('Manager has been stopped') break if command[0] in Hash: try: arg = command[1] except IndexError: arg = '' ans = Hash[command[0]](arg) print(ans) if ans else print(f'success to {command[0]}') else: print('invalid command.') def Run(self, tasks: list, *args, **kwargs): """ :param tasks: a list of tasks :return: """ self.InitialMonitor() if len(tasks) != self.InstructionChannelNumb: self.InstructionChannelNumb = len(tasks) WorkThread = [ Thread(target=self.TasksAllot, args=(task, f'{TasksChannel}{i}')) for i, task in zip(range(self.InstructionChannelNumb), tasks) ] CommandThread = Thread(target=self.Control) [i.start() for i in WorkThread] CommandThread.start() [i.join() for i in WorkThread] CommandThread.join() self.__MyRedis.delete(DeviceKey) self.__MyRedis.close()
class DbRedis(DbBase): def __init__(self, ip, port): super(DbRedis, self).__init__(ip, port) @connect_wrapper def query(self, tbl_name): try: query_result = [] keys = self.__connect.keys("%s_*" % tbl_name) for key in keys: info_dict = self.__connect.hgetall(key) # trans bytes to str decode_info_dict = {} for info_key, info_val in info_dict.items(): decode_info_dict[info_key.decode( 'utf-8')] = info_val.decode('utf-8') query_result.append(decode_info_dict) return True, query_result except Exception as e: logger.error("query error, info is %s." % str(e)) return False, str(e) @classmethod def _get_table_key(cls, tbl_name, key_dict): return tbl_name + "_" + "_".join(list(key_dict.values())) @connect_wrapper def _execute_add(self, tbl_name, key_dict, para_dict): try: tbl_key = self._get_table_key(tbl_name, key_dict) if self.__connect.exists(tbl_key) != 0: logger.info("add table %s already exist...." % tbl_name) return False, "add table %s already exist...." % tbl_name para_dict.update(key_dict) self.__connect.hmset(tbl_key, para_dict) return True, "add success...." except Exception as e: logger.error("_execute_add, info is %s." % str(e)) return False, str(e) @connect_wrapper def _execute_mod(self, tbl_name, key_dict, para_dict): try: tbl_key = self._get_table_key(tbl_name, key_dict) if self.__connect.exists(tbl_key) == 0: logger.info("mod table %s not exist...." % tbl_name) return False, "mod table %s not exist...." % tbl_name for key, val in para_dict.items(): self.__connect.hset(tbl_key, key, val) return True, "mod success...." except Exception as e: logger.error("_execute_mod, info is %s." % str(e)) return False, str(e) @connect_wrapper def _execute_rmv(self, tbl_name, key_dict): try: tbl_key = self._get_table_key(tbl_name, key_dict) if self.__connect.exists(tbl_key) == 0: logger.info("rmv table %s not exist...." % tbl_name) return False, "rmv table %s not exist...." % tbl_name self.__connect.delete(tbl_key) return True, "rmv success...." except Exception as e: logger.error("_execute_rmv, info is %s." % str(e)) return False, str(e) def execute(self, db_op_type, tbl_name, key_dict, para_dict): if db_op_type == DB_ACTION_ADD: return self._execute_add(tbl_name, key_dict, para_dict) elif db_op_type == DB_ACTION_MOD: return self._execute_mod(tbl_name, key_dict, para_dict) elif db_op_type == DB_ACTION_RMV: return self._execute_rmv(tbl_name, key_dict) else: return False, "Error op type...." def connect(self): self.__connect = StrictRedis(host=self.get_db_ip(), port=self.get_db_port(), db=0, socket_connect_timeout=0.1) def close(self): self.__connect.close()
import socket import struct import nmap from redis import StrictRedis ip = '192.168.0.1' def traceroute(ip): for ttl in range(1, 30): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.IPPROTO_IP, socket.IP_TTL, struct.pack('I', ttl)) s.settimeout(2) try: s.connect((ip, 80)) except (socket.error, socket.timeout) as err: print('ttl=%02d: %s' % (ttl, err)) continue finally: s.close() r = StrictRedis(host='localhost', port=6379, db=0) print(r.keys()) r.close()
class Worker: """ Work step: work = Worker(device=0) let this device or thread to be a device0 work.InitialSolveTaskWay(lambda x: x**2) define work node ability of solve task work.InitialSaveWay() work.Run(channel=0) Running prepare; 1. assign this worker ID. 2. define a function about solving the task. 3. using default saving function, or define a new saving function. default saving function is putting data to channel named Result. 4. send parameter what task channel ID do you want to solve. Define function requirement: 1. A function about solving the task (Focus): Include how to solve and how to handle when meet a error. if return NoneType that put it to error channel, else save it. 2. Saving function: It can success to save data. """ def __init__(self, device, host='localhost', port=6379, db=0, password=None): self.__MyRedis = StrictRedis( connection_pool=ConnectionPool(host=host, port=port, db=db, password=password, decode_responses=True)) self.__Device = device self.__TaskApproach = None self.__SaveApproach = None self.__StateMaintenanceState = True self.__ExecuteTaskStop = False self.__ExecuteTaskPause = True def __DefaultSave__(self, data): """ Default approach for saving data :param data: :return: """ self.__MyRedis.rpush(DataTempKey, data) def InitialSolveTaskWay(self, task_func): """ Initial approach of solving task. :param task_func: way of solve the task :return: """ if not isinstance(task_func, FunctionType): raise Exception('It is not a function.') self.__TaskApproach = task_func def InitialSaveWay(self, save_func=None): """ Initial how to save the result after solving task, default save in redis which named Result :param save_func: :return: """ if save_func: self.__SaveApproach = save_func else: self.__SaveApproach = self.__DefaultSave__ def GetTask(self, channel): """ Getting task from channel :param channel: channel name :return: task """ return self.__MyRedis.lpop(channel) def StateMaintenance(self): """ Detect current device state, adjust action if it chance :return: """ LastStatue = self.__MyRedis.hget(DeviceKey, f'device{self.__Device}') while self.__StateMaintenanceState: state = self.__MyRedis.hget(DeviceKey, f'device{self.__Device}') try: LastStatue = int(LastStatue) state = int(state) except TypeError: self.__ExecuteTaskStop = True self.__ExecuteTaskPause = True return if LastStatue != state: if state is -1: self.__MyRedis.hset(DeviceKey, f'device{self.__Device}', LastStatue) elif state is 0: LastStatue = state self.__ExecuteTaskPause = True elif state is 1: self.__ExecuteTaskPause = False LastStatue = state sleep(0.1) def ExecuteTask(self, channel): """ Getting task to execute from channel, if channel not exist that maybe tasks has been done or doesn't exist this channel. And if get fail mission, it would send to fail channel. :param channel: the channel id of getting task :return: """ channel = f'{TaskChannel}{channel}' if channel not in self.__MyRedis.keys(): print(f'not exist {channel} or tasks has been done') self.__StateMaintenanceState = False return None while not self.__ExecuteTaskStop: while not self.__ExecuteTaskPause: task = self.GetTask(channel) if task: try: result = self.__TaskApproach(task) if result: self.__SaveApproach(result) else: self.__MyRedis.rpush(f'{FailChannel}{channel}', task) except Exception as e: assert e is None, 'The way for solving task is error.' sleep(0.1) sleep(0.4) def Run(self, ChannelID): StateMaintenanceThread = Thread(target=self.StateMaintenance) ExecuteTaskThread = Thread(target=self.ExecuteTask, args=(ChannelID, )) StateMaintenanceThread.start() ExecuteTaskThread.start() StateMaintenanceThread.join() ExecuteTaskThread.join() self.__MyRedis.close()
class HotShopHandler(tornado.web.RequestHandler): executor = ThreadPoolExecutor(20) def initialize(self, logger, config_result): self.__conf = config_result self._logger = logger redis_conf = config_result['redis'] self.redis_conn = StrictRedis(host=redis_conf['ip'], port=redis_conf['port'], db=redis_conf['redis_num'], decode_responses=True) self.pl = self.redis_conn.pipeline() async def post(self): body_dict = json.loads(self.request.body) self._logger.info("input: {}".format(body_dict)) top_shop_li = body_dict['topShopList'] owner_code = body_dict['ownerCode'] self.area_code = body_dict['areaCode'] self.area_key = self.area_code + '_shop' self.personal_key = owner_code + self.area_key if int(body_dict['page']) == 1: # 刷新 # 刷新redis中用户推荐商铺数据并返回刷新后的全部商铺数据 shop_li = await self.refresh_rec_shop(owner_code, top_shop_li) else: # 翻页 shop_li = self.redis_conn.lrange(self.personal_key, 0, -1) # 如redis暂无个性化数据,说明redis过期,查询mysql获取推荐数据 if not shop_li: shop_li = await self.refresh_rec_shop(owner_code, top_shop_li) await self.finish_response(body_dict, shop_li) self.pl.close() self.redis_conn.close() def finish_response(self, request_body, all_shop_list): page = int(request_body['page']) rows = int(request_body['rows']) start_index = rows * (page - 1) result_dict = { 'resultCode': 0, 'msg': '操作成功', 'data': { "current": page, # 当前页 "pages": math.ceil(len(all_shop_list) / rows), # 总页数 "records": all_shop_list[start_index:start_index + rows], "size": rows, # 分页行数 "total": len(all_shop_list) # 数据总量 } } self._logger.info('output: {}'.format(result_dict)) self.set_header("Content-Type", "application/json; charset=UTF-8") return self.finish(json.dumps(result_dict, ensure_ascii=False)) # 从mysql获取用户推荐商铺排序用于刷新 @run_on_executor def refresh_rec_shop(self, owner_code, top_shop_list): mysql_conn = self.get_mysql_conn() select_sql = ''' SELECT shop_code_list FROM cb_hotshop_owner_rec_shops WHERE owner_code = %s ''' with mysql_conn.cursor() as cur: count = cur.execute(select_sql, owner_code) if not count: # 没有个性化推荐,统一使用冷启动用户推荐结果 self._logger.info('没有查询到个性化推荐,使用冷启动用户推荐数据') shop_list = self.get_strangers_rec(mysql_conn) else: shop_list = cur.fetchone()[0].split(',') mysql_conn.close() if top_shop_list: shop_list = top_shop_list + shop_list shop_list = self.shop_filter(shop_list) # 存入redis if shop_list: try: self.pl.delete(self.personal_key) self.pl.rpush(self.personal_key, *shop_list) self.pl.expire(self.personal_key, 1200) self.pl.execute() except: self._logger.exception('个性化商铺推荐数据写入redis失败') return shop_list # 冷启动用户的统一商铺推荐结果 def get_strangers_rec(self, mysql_conn): key_for_strangers = '000000' # 试图先从redis获取冷启动用户排序 shop_list = self.redis_conn.lrange(key_for_strangers, 0, -1) # redis没获取到需从mysql获取冷启动用户排序并存入redis if not shop_list: select_sql = ''' SELECT shop_code_list FROM cb_hotshop_owner_rec_shops WHERE owner_code = %s ''' with mysql_conn.cursor() as cur: count = cur.execute(select_sql, key_for_strangers) if count: shop_list = cur.fetchone()[0].split(',') # 存入redis并设置有效期为12个小时 try: self.pl.rpush(key_for_strangers, *shop_list) self.pl.expire(key_for_strangers, 43200) self.pl.execute() except: self._logger.exception('冷启动用户商铺推荐数据写入redis失败') # 没查到说明离线计算还没完成,先使用销量排序 else: shop_list = self.redis_conn.lrange(self.area_key, 0, -1) return self.shuffle_top_five(self.shop_filter(shop_list)) @staticmethod def shuffle_top_five(origin_li): top_five = origin_li[:5] # random.shuffle(top_five) top_five.sort(reverse=True) return top_five + origin_li[5:] # 获取虚拟小区列表 def get_virtual_areas(self): url = '{}/xwj-property-house/house/area/getVirtualAreaCodes'.format( self.__conf['spring_cloud']['host_name']) resp_json = json.loads(requests.get(url).text) return resp_json.get('data', []) # 商铺过滤器(包括小区、商铺状态和去重) # 加入虚拟小区的逻辑 def shop_filter(self, origin_shop_list): origin_shop_set = set(origin_shop_list) area_shop_set_for_filtering = set( self.redis_conn.lrange(self.area_key, 0, -1)) if not area_shop_set_for_filtering: # 没有就说明redis数据丢失, 从mysql获取用于过滤的数据, 并重新写入redis area_shop_set_for_filtering = self.get_shop_set_from_mysql() # 加上在虚拟小区下的商铺 virtual_area_li = self.get_virtual_areas() for area_code in virtual_area_li: shop_set_in_virtual_area = set( self.redis_conn.lrange(area_code + '_shop', 0, -1)) # self._logger.info('虚拟小区{}下有{}家商铺,商铺列表:{}'.format( # area_code, len(shop_set_in_virtual_area), shop_set_in_virtual_area # )) area_shop_set_for_filtering |= shop_set_in_virtual_area # self._logger.info('加上虚拟小区数据后,小区{}下有{}家商铺,商铺列表:{}'.format( # self.area_key, len(area_shop_set_for_filtering), area_shop_set_for_filtering # )) filtered_shop_list = list( origin_shop_set.intersection(area_shop_set_for_filtering)) filtered_shop_list.sort(key=origin_shop_list.index) # 把用于过滤的小区商铺数据存在但不在原集合的商铺添加到最后 return filtered_shop_list + list(area_shop_set_for_filtering - origin_shop_set) def get_mysql_conn(self): mysql_conf = self.__conf['mysql_2'] return connect( host=mysql_conf['ip'], port=int(mysql_conf['port']), user=mysql_conf['user'], password=mysql_conf['password'], database=mysql_conf['db'], ) def get_shop_set_from_mysql(self): self._logger.info('redis数据丢失, 使用mysql数据用于过滤并重新导入数据到redis') # 判断是否已经在执行导入商铺数据到redis的操作 # 这个key是在cloudbrain-preprocess-python项目中建立的,导入数据完成后会把这个key删除 # if not self.redis_conn.exists('loading_shop_flag'): # self.executor.submit(self.load_shop_to_redis) mysql_conn = self.get_mysql_conn() select_sql = ''' SELECT shop_code_list FROM cb_area_shop_list WHERE area_code = %s ''' with mysql_conn.cursor() as cur: cur.execute(select_sql, self.area_code) row_one = cur.fetchone() mysql_conn.close() shop_li = row_one[0].split(',') if row_one[0] else [] if shop_li: self.redis_conn.rpush(self.area_key, *shop_li) return set(shop_li)