def get_service_list(service_only: str, exclude_services: str, service_directory: struct_time) -> List[str]: """Constuct the list of the name of the services to be started in the poller, according to the arguments passed by the user. Returns a InvalidAttribute exception if any of the passed service is invalid. Args: service_only (str): take only the services in the space-separated list exclude_service (List[str]): remove the given services from the space-separated service list service_directory (str): the directory where services are defined Raises: SqPollerConfError: raised in case of wrong service name in 'include only' or exclude list Returns: List[str]: the list of services to executed in the poller """ if not os.path.isdir(service_directory): raise SqPollerConfError( 'The service directory provided is not a directory' ) svcs = list(Path(service_directory).glob('*.yml')) allsvcs = [os.path.basename(x).split('.')[0] for x in svcs] svclist = None if service_only: svclist = service_only.split() # Check if all the given services are valid notvalid = [s for s in svclist if s not in allsvcs] if notvalid: raise SqPollerConfError(f'Invalid svcs specified: {notvalid}. ' f'Should have been one of {allsvcs}') else: svclist = allsvcs if exclude_services: excluded_services = exclude_services.split() # Check if all the excluded services are valid notvalid = [e for e in excluded_services if e not in allsvcs] if notvalid: raise SqPollerConfError(f'Services {notvalid} excluded, but ' 'they are not valid.') svclist = list(filter(lambda x: x not in excluded_services, svclist)) if not svclist: raise SqPollerConfError('The list of services to execute is empty') return svclist
def __init__(self, add_task_fn: Callable, service_directory: str, schema_dir: str, output_queue: asyncio.Queue, run_mode: str, default_interval: int = 15, **kwargs) -> None: """Instantiate an instance of the ServiceManager class Args: add_task_fn (Callable): the function to call to schedule a task in the poller. service_directory (str): the directory where services are described. schema_dir (str): the directory containing the schema of the tables of the services. output_queue (asyncio.Queue): it is the queue where the services are supposed to write to make a writer persist the output of a command. run_mode (str): a string representing the running policy: - gather (run once): gather the output without processing - processing (run once): gather the output and process it - forever: periodically poll the nodes. default_interval (int): the default time execution interval. """ self._services = [] self._running_svcs = [] self.add_task_fn = add_task_fn self.output_queue = output_queue self.default_interval = default_interval self.run_mode = run_mode # Set and validate service and schema directories if not os.path.isdir(service_directory): raise SqPollerConfError( f"Service directory {service_directory} is not a directory" ) self.service_directory = service_directory self.schema_dir = schema_dir if not self.schema_dir: self.schema_dir = '{}/{}'.format(service_directory, 'schema') if not os.path.isdir(self.schema_dir): raise SqPollerConfError( f"Service directory {self.schema_dir} is not a directory" ) # Build the list of services to execute service_only = kwargs.pop('service_only', '') exclude_services = kwargs.pop('exclude_services', '') self._svcs_list = self._get_service_list(service_only, exclude_services)
def __init__(self, **kwargs): self.type = kwargs.get('type', None) self.logger = logging.getLogger(__name__) output_dir = kwargs.get('output_dir', None) if output_dir: self.root_output_dir = output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) elif not os.path.isdir(output_dir): raise SqPollerConfError(f'Output directory {output_dir}' 'is not a directory') else: raise SqPollerConfError('Need mandatory keyword arg: output_dir')
def _init_inventory(self, userargs, cfg): # Define the dictionary with the settings # for any kind of inventory source connect_timeout = cfg.get('poller', {}).get('connect-timeout', 15) inventory_args = { 'connect_timeout': connect_timeout, 'ssh_config_file': userargs.ssh_config_file, } # Retrieve the specific inventory source to use inv_types = Inventory.get_plugins() inventory_class = None source_args = {} if userargs.input_dir: # 'dir' is not a real inventory source # we need to override the Inventory class # in order to simulate nodes providing the data # inside the specified input directory. inventory_class = inv_types['dir'] source_args = {'input_dir': userargs.input_dir} else: mgr_cfg = cfg.get('poller', {}).get('manager', {}) type_to_use = mgr_cfg.get('type', 'static') inventory_class = inv_types.get(type_to_use) if not inventory_class: raise SqPollerConfError(f'No inventory {type_to_use} found') source_args = {**mgr_cfg, 'worker-id': self.worker_id} return inventory_class(self._add_poller_task, **source_args, **inventory_args)
def chunk(self, glob_inv: dict, n_chunks: int, **kwargs) -> List[Dict]: policy = kwargs.pop('policy', self.policy) chunk_fun = self.policies_fn.get(policy, None) if not chunk_fun: raise SqPollerConfError( f'Unknown chunking function for policy {policy}') inv_chunks = [c for c in chunk_fun(glob_inv, n_chunks) if c] if len(inv_chunks) < n_chunks: if self.policy == 'sequential': raise SqPollerConfError( 'Not enough devices to split the inventory' f'into {n_chunks} chunks') if self.policy == 'namespace': raise SqPollerConfError( 'Not enough namespaces to split the inventory' f'into {n_chunks} chunks') return inv_chunks
def _init_output_workers(self): """Create the appropriate output workers for persisting the poller output. """ # Load the available output workers worker_types = OutputWorker.get_plugins() for otype in self.output_types: if otype not in worker_types: raise SqPollerConfError(f'{otype} is not a valid output ' f'pick some of {worker_types.keys()}') new_worker = worker_types[otype](**self.output_args) self._output_workers.append(new_worker)
def _validate_poller_args(self, userargs: Dict, _): """Validate the arguments and the configuration passed to the poller. The function produces a SqPollerConfError exception if there is something wrong in the configuration. Args: userargs (Dict): Dictionary containing the arguments passed to the poller cfg (Dict): The content of the Suzieq configuration file Raises: SqPollerConfError: raise when the configuration is not valid """ if userargs.ssh_config_file: if not os.access(userargs.ssh_config_file, os.F_OK): raise SqPollerConfError( f'Unable to read ssh config in {userargs.ssh_config_file}') ssh_config_file = os.path.expanduser(userargs.ssh_config_file) if (os.stat(os.path.dirname(ssh_config_file)).st_mode | 0o40700 != 0o40700): raise SqPollerConfError( 'ssh directory has wrong permissions, must be 0700')
def init_plugins(cls, plugin_conf: Dict) -> List[Dict]: """Instantiate one or more instances of the current class according to the given configuration Args: plugin_conf (dict): plugin configuration Returns: List[Dict]: list of generated plugins """ if plugin_conf is None: raise RuntimeError('Plugin configuration cannot be None') ptype = plugin_conf.get("type") if not ptype: raise SqPollerConfError('No default type provided') controller_class = cls.get_plugins(plugin_name=ptype) if not controller_class: raise SqPollerConfError(f"Unknown plugin called {ptype}") return [controller_class[ptype](plugin_conf)]
async def init_poller(self): """Initialize the poller, instantiating the services and setting up the connection with the nodes. This function should be called only at the beginning before calling run(). """ logger.info('Initializing poller') init_tasks = [] init_tasks.append(self.inventory.build_inventory()) init_tasks.append(self.service_manager.init_services()) nodes, services = await asyncio.gather(*init_tasks) if not nodes or not services: # Logging should've been done by init_nodes/services for details raise SqPollerConfError('Terminating because no nodes' 'or services found')
def __init__(self, config_data: dict = None): self.policies_list = ['sequential', 'namespace'] self.policies_fn = {} for pol_name in self.policies_list: fun = getattr(self, f'split_{pol_name}', None) if not fun or not callable(fun): raise RuntimeError(f'Unknown {pol_name} policy') self.policies_fn[pol_name] = fun if config_data: policy = config_data \ .get('policy', self.policies_list[0]) if policy not in self.policies_list: raise SqPollerConfError(f'Unknown chunking policy {policy}') self.policy = policy else: self.policy = self.policies_list[0]
def __init__(self, **kwargs): data_directory = kwargs.get('data_dir') if not data_directory: output_dir = '/tmp/suzieq/parquet-out/' logger.warning('No output directory for parquet specified, using ' '/tmp/suzieq/parquet-out') else: output_dir = data_directory if not os.path.exists(output_dir): os.makedirs(output_dir) if not os.path.isdir(output_dir): raise SqPollerConfError( f'Output directory {output_dir} is not a directory') logger.info(f'Parquet outputs will be under {output_dir}') self.root_output_dir = output_dir
async def build_inventory(self) -> Dict[str, Node]: """Retrieve the list of nodes to poll and instantiate all the Nodes objects in the retrieved inventory. Raises: SqPollerConfError: in case of wrong inventory configuration InventorySourceError: in case of error with the inventory source Returns: Dict[str, Node]: a list containing all the nodes in the inventory """ inventory_list = await self._get_device_list() if not inventory_list: raise SqPollerConfError('The inventory source returned no hosts') # Initialize the nodes in the inventory self._nodes = await self._init_nodes(inventory_list) return self._nodes
def __init__(self, config_data: Dict = None): self._workers_count = config_data.get("workers", 1) # Workers we are already monitoring self._running_workers = defaultdict(None) # Workers we do not monitor yet self._waiting_workers = defaultdict(None) # The currently applied chunks self._active_chunks = [] self._poller_tasks_ready = asyncio.Event() # Get the running mode self._input_dir = config_data.get('input-dir', None) self._run_once = config_data.get('run-once', None) self._no_coalescer = config_data.get('no-coalescer', False) # If the debug mode is active we need to set run_once if config_data.get('debug'): self._run_once = 'debug' if not self._no_coalescer: self._coalescer_launcher = CoalescerLauncher( config_data['config'], config_data['config-dict']) # Configure the encyrption of the credential file cred_key = Fernet.generate_key() self._encryptor = Fernet(cred_key) # Save the key into an env. variable os.environ['SQ_CONTROLLER_POLLER_CRED'] = cred_key.decode() # Configure the output directory for the inventory files self._inventory_path = Path(f'/tmp/.suzieq/inventory.{os.getpid()}') \ .resolve() try: self._inventory_path.mkdir(parents=True, exist_ok=True) except FileExistsError: raise SqPollerConfError( f'The inventory dir is not a directory: {self._inventory_path}' ) os.environ['SQ_INVENTORY_PATH'] = str(self._inventory_path) self._inventory_file_name = 'inv' # Define poller parameters allowed_args = [ 'run-once', 'exclude-services', 'outputs', 'output-dir', 'service-only', 'ssh-config-file', 'config', 'input-dir' ] sq_path = get_sq_install_dir() self._args_to_pass = [f'{sq_path}/poller/worker/sq_worker.py'] for arg, value in config_data.items(): if arg in allowed_args and value: val_list = value if isinstance(value, list) else [value] self._args_to_pass.append(f'--{arg}', ) # All the arguments should be string self._args_to_pass += [str(v) for v in val_list]