class GenericConsumer(Base): def __init__(self, connection_str): super().__init__() self._logger = LoggingTool( name=self.get_class_name(), ttl=EnvUtil.get_pslx_env_variable('PSLX_INTERNAL_TTL')) self._connection_str = connection_str self._queue_consumers = [] def bind_queue(self, exchange, queue): queue_consumer = GenericQueueConsumer( consumer_name=queue.get_queue_name() + '_consumer') self._logger.info("Adding queue [" + queue.get_queue_name() + "] to consumer [" + queue_consumer.get_consumer_name() + '].') queue_consumer.create_consumer(exchange=exchange, connection_str=self._connection_str) queue_consumer.bind_queue(queue=queue) self._queue_consumers.append(queue_consumer) def start_consumer(self): try: for consumer in self._queue_consumers: self._logger.info("Starting consumer [" + consumer.get_consumer_name() + '].') consumer.start_consumer() while True: time.sleep(TimeSleepObj.ONE_SECOND) except KeyboardInterrupt: for consumer in self._queue_consumers: consumer.stop_consumer()
class EmailRPC(RPCBase): REQUEST_MESSAGE_TYPE = EmailPRCRequest def __init__(self, rpc_storage): super().__init__(service_name=self.get_class_name(), rpc_storage=rpc_storage) self._logger = LoggingTool( name="PSLX_EMAIL_RPC", ttl=EnvUtil.get_pslx_env_variable(var='PSLX_INTERNAL_TTL')) self._credentials = {} self._email_servers = {} def _login(self, credentials): if not credentials.password: self._logger.error("Failed in logging to email [" + credentials.user_name + '].') else: self._credentials[credentials.user_name] = credentials email_server = smtplib.SMTP( credentials.others['email_server'], int(credentials.others['email_server_port'])) email_server.starttls() email_server.login(credentials.user_name, credentials.password) self._email_servers[credentials.user_name] = email_server self._logger.info("Successfully login to email [" + credentials.user_name + '].') def add_email_credentials(self, credentials): self._credentials[credentials.user_name] = credentials self._login(credentials) def get_response_and_status_impl(self, request): if request.from_email not in self._credentials: self._logger.error("Email address is not logged in at all.") return None, Status.FAILED def _send_email(): if not request.is_test and request.to_email and request.content: self._email_servers[request.from_email].sendmail( from_addr=request.from_email, to_addrs=request.to_email, msg=request.content) try: _send_email() self._logger.info("Succeeded in sending email directly to " + request.to_email + '.') except (smtplib.SMTPSenderRefused, smtplib.SMTPServerDisconnected, smtplib.SMTPConnectError, smtplib.SMTPAuthenticationError) as err: self._logger.error("Sending email with exception: " + str(err) + '. Retry.') self._login(credentials=self._credentials[request.from_email]) _send_email() return None, Status.SUCCEEDED
class TTLCleanerOp(BatchOperator): def __init__(self): super().__init__(operator_name='ttl_cleaner_op') self._logger = LoggingTool( name='PSLX_TTL_CLEANER_OP', ttl=EnvUtil.get_pslx_env_variable(var='PSLX_INTERNAL_TTL')) self._ttl_dir = [EnvUtil.get_pslx_env_variable(var='PSLX_DATABASE')] def watch_dir(self, dir_name): self._ttl_dir.append(dir_name) def _recursively_check_dir_deletable(self, dir_name): if FileUtil.list_files_in_dir(dir_name=dir_name): return False sub_dirs = FileUtil.list_dirs_in_dir(dir_name=dir_name) if sub_dirs: for sub_dir in sub_dirs: if not self._recursively_check_dir_deletable(dir_name=sub_dir): return False return True def _delete_file(self, cur_time, path_name): num_file_removed, num_file_failed = 0, 0 if FileUtil.is_file(path_name=path_name): ttl = FileUtil.get_ttl_from_path(path_name=path_name) if ttl and cur_time - FileUtil.get_file_modified_time( file_name=path_name) > ttl: self._logger.info("Removing file " + path_name + '...') try: with FileLockTool(protected_file_path=path_name, read_mode=True, timeout=TimeSleepObj.ONE_TENTH_SECOND): FileUtil.remove_file(file_name=path_name) num_file_removed += 1 self.counter_increment("num_file_removed") except Exception as err: num_file_failed += 1 self.counter_increment("num_file_failed_to_be_removed") self._logger.error("Removing file " + path_name + ' failed with err ' + str(err) + '.') else: for file_name in FileUtil.list_files_in_dir(dir_name=path_name): stats = self._delete_file(cur_time=cur_time, path_name=file_name) num_file_removed += stats[0] num_file_failed += stats[1] for dir_name in FileUtil.list_dirs_in_dir(dir_name=path_name): stats = self._delete_file(cur_time=cur_time, path_name=dir_name) num_file_removed += stats[0] num_file_failed += stats[1] return num_file_removed, num_file_failed def _delete_dir(self, dir_name): num_dir_removed, num_dir_failed = 0, 0 for sub_dir_name in FileUtil.list_dirs_in_dir(dir_name=dir_name): if FileUtil.does_dir_exist( dir_name=sub_dir_name ) and self._recursively_check_dir_deletable(dir_name=sub_dir_name): self._logger.info("Removing directory " + sub_dir_name + '...') try: FileUtil.remove_dir_recursively(dir_name=sub_dir_name) self.counter_increment("num_directory_removed") num_dir_removed += 1 except Exception as err: num_dir_failed += 1 self.counter_increment( "num_directory_failed_to_be_removed") self._logger.error("Removing directory " + sub_dir_name + ' failed with err ' + str(err) + '.') else: stats = self._delete_dir(dir_name=sub_dir_name) num_dir_removed += stats[0] num_dir_failed += stats[1] return num_dir_removed, num_dir_failed def execute_impl(self): start_time = TimezoneUtil.cur_time_in_local() self._logger.info("TTL cleaner started at " + str(start_time) + '.') num_file_removed, num_file_failed = 0, 0 for ttl_dir_name in list(set(self._ttl_dir)): self._logger.info("TTL cleaner starts to check dir " + ttl_dir_name + " for file deletion.") stats = self._delete_file(cur_time=start_time, path_name=ttl_dir_name) num_file_removed += stats[0] num_file_failed += stats[1] self._logger.info("Total number of file removed in this round is " + str(num_file_removed) + '.') self._logger.info( "Total number of file failed to be removed in this round is " + str(num_file_failed) + '.') num_dir_removed, num_dir_failed = 0, 0 for ttl_dir_name in list(set(self._ttl_dir)): self._logger.info("TTL cleaner starts to check dir " + ttl_dir_name + " for directory deletion.") stats = self._delete_dir(dir_name=ttl_dir_name) num_dir_removed += stats[0] num_dir_failed += stats[1] self._logger.info( "Total number of directory removed in this round is " + str(num_dir_removed) + '.') self._logger.info( "Total number of directory failed to be removed in this round is " + str(num_dir_failed) + '.')
pslx_frontend_logger = LoggingTool( name=CLIENT_NAME, ttl=EnvUtil.get_pslx_env_variable(var='PSLX_INTERNAL_TTL')) frontend_config_file = EnvUtil.get_pslx_env_variable( 'PSLX_FRONTEND_CONFIG_PROTO_PATH') assert frontend_config_file != '' pslx_frontend_ui_app.config['frontend_config'] = FileUtil.read_proto_from_file( proto_type=FrontendConfig, file_name=frontend_config_file) pslx_frontend_ui_app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False pslx_frontend_ui_app.config['SQLALCHEMY_DATABASE_URI'] =\ 'sqlite:///' + pslx_frontend_ui_app.config['frontend_config'].sqlalchemy_database_path pslx_frontend_logger.info( "sqlalchemy database uri " + str(pslx_frontend_ui_app.config['SQLALCHEMY_DATABASE_URI']) + '.') pslx_frontend_db = SQLAlchemy(pslx_frontend_ui_app) pslx_partitioner_lru_cache = LRUCacheTool( max_capacity=EnvUtil.get_pslx_env_variable('PSLX_INTERNAL_CACHE')) pslx_proto_table_lru_cache = LRUCacheTool( max_capacity=EnvUtil.get_pslx_env_variable('PSLX_INTERNAL_CACHE')) pslx_dedicated_logging_storage_path = FileUtil.join_paths_to_dir_with_mode( root_dir=EnvUtil.get_pslx_env_variable('PSLX_DATABASE') + '/PSLX_DEDICATED_LOGGING', base_name='dedicated_logging.pb') from pslx.micro_service.frontend.renderer import index_renderer, file_viewer_renderer, proto_viewer_renderer, \
class GenericQueueConsumer(Base): def __init__(self, consumer_name): super().__init__() self._consumer_name = consumer_name self._logger = LoggingTool( name=consumer_name, ttl=EnvUtil.get_pslx_env_variable('PSLX_INTERNAL_TTL')) self._connection_str = '' self._exchange = '' self._connection = None self._queue = None self._thread = None self._has_added_queue = False def get_consumer_name(self): return self._consumer_name def get_connection_str(self): return self._connection_str def get_queue_name(self): if self._queue: return self._queue.get_queue_name() else: return '' def create_consumer(self, exchange, connection_str): self.sys_log("Create consumer connection_str [" + connection_str + '] and exchange [' + exchange + '].') self._logger.info("Create consumer connection_str [" + connection_str + '] and exchange [' + exchange + '].') self._connection_str = connection_str self._exchange = exchange self._connection = pika.SelectConnection( parameters=pika.URLParameters(connection_str), on_open_callback=self.on_open) def bind_queue(self, queue): if self._has_added_queue: self.sys_log("queue already exist, cannot bind any more.") self._logger.error("queue already exist, cannot bind any more.") raise QueueAlreadyExistException( "queue already exist, cannot bind any more.") self.sys_log("Binding to queue with name [" + queue.get_queue_name() + '] to consumer [' + self.get_consumer_name() + '].') self._logger.info("Binding to queue with name [" + queue.get_queue_name() + '] to consumer [' + self.get_consumer_name() + '].') self._has_added_queue = True self._queue = queue def _process_message(self, ch, method, props, body): try: generic_request = ProtoUtil.string_to_message( message_type=GenericRPCRequest, string=base64.b64decode(body)) self._logger.info("Getting request with uuid [" + generic_request.uuid + '] in consumer [' + self.get_consumer_name() + '].') response = self._queue.send_request(request=generic_request) response_str = ProtoUtil.message_to_string(proto_message=response) ch.basic_publish(exchange=self._exchange, routing_key=props.reply_to, properties=pika.BasicProperties( correlation_id=props.correlation_id), body=base64.b64encode(response_str)) except Exception as err: self._logger.error("Consumer [" + self.get_consumer_name() + "] processing message with error: " + str(err) + '.') def on_open(self, connection): connection.channel(on_open_callback=self._on_channel_open) def _on_channel_open(self, channel): channel.exchange_declare(exchange=self._exchange, durable=True) channel.queue_delete(queue=self.get_queue_name()) channel.queue_declare(queue=self.get_queue_name(), durable=True) channel.queue_bind(exchange=self._exchange, queue=self.get_queue_name(), routing_key=self.get_queue_name()) channel.basic_consume(queue=self.get_queue_name(), on_message_callback=self._process_message, auto_ack=True) def start_consumer(self): if not self._connection: raise QueueConsumerNotInitializedException( "Queue not initialized for consumer [" + self.get_consumer_name() + '].') self._thread = threading.Thread(target=self._connection.ioloop.start, name=self.get_consumer_name() + "_thread") self._thread.daemon = True self._thread.start() def stop_consumer(self): if self._thread: self._thread.join() os._exit(1)
class GenericServer(Base): def __init__(self, server_name): super().__init__() self._server_name = server_name self._logger = LoggingTool(name=self.get_server_name(), ttl=os.getenv('PSLX_INTERNAL_TTL', 7)) self._url = None self._rpc_server = None self._has_added_rpc = False def get_server_name(self): return self._server_name def get_server_url(self): return self._url def create_server(self, max_worker, server_url): server_url = server_url.replace('http://', '').replace('https://', '') self.sys_log("Create server with num of workers = " + str(max_worker) + " and url = " + server_url + ' for server [' + self.get_server_name() + '].') self._logger.info("Create server with num of workers = " + str(max_worker) + " and url = " + server_url + ' for server [' + self.get_server_name() + '].') self._rpc_server = grpc.server( futures.ThreadPoolExecutor(max_workers=max_worker)) self._url = server_url def bind_rpc(self, rpc): if self._has_added_rpc: self.sys_log("RPC already exist for server [" + self.get_server_name() + "], cannot bind any more.") self._logger.error("RPC already exist for server [" + self.get_server_name() + "], cannot bind any more.") raise RPCAlreadyExistException("RPC already exist for server [" + self.get_server_name() + "], cannot bind any more.") self.sys_log("Server " + self._url + " binding to server [" + rpc.get_rpc_service_name() + '].') self._logger.info("Server " + self._url + " binding to server [" + rpc.get_rpc_service_name() + '].') add_GenericRPCServiceServicer_to_server(rpc, self._rpc_server) self._has_added_rpc = True def start_server(self, private_key=None, certificate_chain=None): if self._rpc_server: self._logger.info("Starting server.") if not private_key or not certificate_chain: self.sys_log("Warning, channel is not secure.") self._rpc_server.add_insecure_port(self._url) else: server_credentials = grpc.ssl_server_credentials( ((private_key, certificate_chain), )) self._rpc_server.add_secure_port(self._url, server_credentials) self._rpc_server.start() self._rpc_server.wait_for_termination() else: self._logger.error("Please create server for " + self.get_server_name() + " first.") self.sys_log("Please create server for " + self.get_server_name() + " first.") raise RPCServerNotInitializedException( "Please create server for " + self.get_server_name() + " first.")
class RPCIO(RPCBase): REQUEST_MESSAGE_TYPE = RPCIORequest PARTITIONER_TYPE_TO_IMPL = { PartitionerStorageType.YEARLY: partitioner.YearlyPartitionerStorage, PartitionerStorageType.MONTHLY: partitioner.MonthlyPartitionerStorage, PartitionerStorageType.DAILY: partitioner.DailyPartitionerStorage, PartitionerStorageType.HOURLY: partitioner.HourlyPartitionerStorage, PartitionerStorageType.MINUTELY: partitioner.MinutelyPartitionerStorage, } def __init__(self, rpc_storage): super().__init__(service_name=self.get_class_name(), rpc_storage=rpc_storage) self._lru_cache_tool = LRUCacheTool( max_capacity=EnvUtil.get_pslx_env_variable(var='PSLX_INTERNAL_CACHE') ) self._storage_type_to_impl_func = { StorageType.DEFAULT_STORAGE: self._default_storage_impl, StorageType.FIXED_SIZE_STORAGE: self._fixed_size_storage_impl, StorageType.PROTO_TABLE_STORAGE: self._proto_table_storage_impl, StorageType.PARTITIONER_STORAGE: self._partitioner_storage_impl, } self._logger = LoggingTool( name='PSLX_RPC_IO_RPC', ttl=EnvUtil.get_pslx_env_variable(var='PSLX_INTERNAL_TTL') ) def _default_storage_impl(self, request): self._logger.info("Getting request of default storage read.") read_params = dict(request.params) if 'num_line' in read_params: read_params['num_line'] = int(read_params['num_line']) lru_key = (request.type, request.file_name) storage = self._lru_cache_tool.get(key=lru_key) if not storage: self.sys_log("Did not find the storage in cache. Making a new one...") storage = DefaultStorage() storage.initialize_from_file(file_name=request.file_name) self._lru_cache_tool.set( key=lru_key, value=storage ) else: self.sys_log("Found key in LRU cache.") self._logger.info('Current cache size ' + str(self._lru_cache_tool.get_cur_capacity())) response = RPCIOResponse() data = storage.read(params=read_params) rpc_list_data = RPCIOResponse.RPCListData() for item in data: rpc_data = rpc_list_data.data.add() rpc_data.string_data = item response.list_data.CopyFrom(rpc_list_data) return response def _fixed_size_storage_impl(self, request): self._logger.info("Getting request of fixed size storage read.") read_params = dict(request.params) if 'force_load' in read_params: read_params['force_load'] = ast.literal_eval(read_params['force_load']) if 'num_line' in read_params: read_params['num_line'] = int(read_params['num_line']) lru_key = (request.type, request.file_name) if 'fixed_size' in read_params: lru_key += (read_params['fixed_size'],) storage = self._lru_cache_tool.get(key=lru_key) if not storage: self.sys_log("Did not find the storage in cache. Making a new one...") if 'fixed_size' in read_params: storage = FixedSizeStorage(fixed_size=int(read_params['fixed_size'])) else: storage = FixedSizeStorage() storage.initialize_from_file(file_name=request.file_name) self._lru_cache_tool.set( key=lru_key, value=storage ) else: self.sys_log("Found key in LRU cache.") self._logger.info('Current cache size ' + str(self._lru_cache_tool.get_cur_capacity())) read_params.pop('fixed_size', None) response = RPCIOResponse() data = storage.read(params=read_params) rpc_list_data = RPCIOResponse.RPCListData() for item in data: rpc_data = rpc_list_data.data.add() rpc_data.string_data = item response.list_data.CopyFrom(rpc_list_data) return response def _proto_table_storage_impl(self, request): self._logger.info("Getting request of proto table storage read.") read_params = dict(request.params) if 'proto_module' in read_params: read_params['message_type'] = ProtoUtil.infer_message_type_from_str( message_type_str=read_params['message_type'], modules=read_params['proto_module'] ) else: read_params['message_type'] = ProtoUtil.infer_message_type_from_str( message_type_str=read_params['message_type'] ) lru_key = (request.type, request.file_name) storage = self._lru_cache_tool.get(key=lru_key) if not storage: self.sys_log("Did not find the storage in cache. Making a new one...") storage = ProtoTableStorage() storage.initialize_from_file(file_name=request.file_name) self._lru_cache_tool.set( key=lru_key, value=storage ) else: self.sys_log("Found key in LRU cache.") self._logger.info('Current cache size ' + str(self._lru_cache_tool.get_cur_capacity())) read_params.pop('proto_module', None) return storage.read(params=read_params) def _partitioner_storage_impl(self, request): self._logger.info("Getting request of partitioner storage read.") read_params = dict(request.params) is_proto_table = True if read_params['is_proto_table'] == '1' else False if 'base_name' in read_params: base_name = read_params['base_name'] else: base_name = 'data.pb' if is_proto_table else 'data' lru_key = (read_params['PartitionerStorageType'], request.dir_name) self._logger.info("Partitioner type is " + read_params['PartitionerStorageType']) storage = self._lru_cache_tool.get(key=lru_key) if not storage: self.sys_log("Did not find the storage in cache. Making a new one...") partitioner_type = ProtoUtil.get_value_by_name( enum_type=PartitionerStorageType, name=read_params['PartitionerStorageType'] ) storage = self.PARTITIONER_TYPE_TO_IMPL[partitioner_type]() storage.initialize_from_dir(dir_name=request.dir_name) self._lru_cache_tool.set( key=lru_key, value=storage ) else: self.sys_log("Found key in LRU cache.") self._logger.info('Current cache size ' + str(self._lru_cache_tool.get_cur_capacity())) read_params.pop('PartitionerStorageType', None) read_params.pop('is_proto_table', None) if is_proto_table: proto_table_storage = ProtoTableStorage() storage.set_underlying_storage(storage=proto_table_storage) else: read_params['num_line'] = -1 response = RPCIOResponse() if 'start_time' not in read_params: # calling read function if is_proto_table: # if underlying storage is proto table. if 'message_type' in read_params: assert 'proto_module' in read_params read_params['message_type'] = ProtoUtil.infer_message_type_from_str( message_type_str=read_params['message_type'], modules=read_params['proto_module'] ) proto_storage = ProtoTableStorage() if 'read_oldest' in read_params: proto_storage.initialize_from_file( file_name=FileUtil.join_paths_to_file( root_dir=storage.get_oldest_dir_in_root_directory(), base_name=base_name ) ) else: proto_storage.initialize_from_file( file_name=FileUtil.join_paths_to_file( root_dir=storage.get_latest_dir(), base_name=base_name ) ) data = proto_storage.read_all() for key, val in data.items(): rpc_list_data = RPCIOResponse.RPCListData() rpc_data = rpc_list_data.data.add() rpc_data.proto_data.CopyFrom(val) response.dict_data[key].CopyFrom(rpc_list_data) else: # if underlying storage is not proto table. default_storage = DefaultStorage() if 'read_oldest' in read_params: default_storage.initialize_from_file( file_name=FileUtil.join_paths_to_file( root_dir=storage.get_oldest_dir_in_root_directory(), base_name=base_name ) ) else: default_storage.initialize_from_file( file_name=FileUtil.join_paths_to_file( root_dir=storage.get_latest_dir(), base_name=base_name ) ) data = default_storage.read(params={ 'num_line': -1, }) rpc_list_data = RPCIOResponse.RPCListData() for item in data: rpc_data = rpc_list_data.data.add() rpc_data.string_data = item response.list_data.CopyFrom(rpc_list_data) else: # calling read_range function if 'start_time' in read_params: read_params['start_time'] = TimezoneUtil.cur_time_from_str( time_str=read_params['start_time'] ) if 'end_time' in read_params: read_params['end_time'] = TimezoneUtil.cur_time_from_str( time_str=read_params['end_time'] ) data = storage.read_range(params=read_params) if data: for key, val in data.items(): rpc_list_data = RPCIOResponse.RPCListData() if is_proto_table: for proto_key, any_message in val.items(): rpc_data = rpc_list_data.data.add() rpc_data.string_data = proto_key rpc_data = rpc_list_data.data.add() rpc_data.proto_data.CopyFrom(any_message) else: for entry in val: rpc_data = rpc_list_data.data.add() rpc_data.string_data = entry response.dict_data[key].CopyFrom(rpc_list_data) return response def get_response_and_status_impl(self, request): if request.is_test: return self.REQUEST_MESSAGE_TYPE(), Status.SUCCEEDED response = self._storage_type_to_impl_func[request.type](request=request) return response, Status.SUCCEEDED