def send_selection_query( self, query: SelectionQuery) -> ResultSet: # type: ignore """Sends a single `SELECT` or `DELETE` query to influx server. Arguments: query {Selection_Query} -- Query which should be executed Raises: ValueError: no SelectionQuery is given. Returns: ResultSet -- Result of the Query, Empty if `DELETE` """ if (not query or not isinstance(query, SelectionQuery)): raise ValueError("a selection query must be given") # check if any buffered table is selected, flushes buffer for table in query.tables: if (table in self.__insert_buffer): self.flush_insert_buffer() break # Convert querys to strings query_str = query.to_query() start_time = time.perf_counter() # Send querys try: result = self.__client.query( # type: ignore query=query_str, epoch='s', database=self.database.name) except (InfluxDBServerError, InfluxDBClientError) as err: # type: ignore ExceptionUtils.exception_info( error=err, extra_message="error when sending select statement" ) # type: ignore # result to maintain structure # raise errors = false since we did catch a error result: ResultSet = ResultSet({}, raise_errors=False) # type: ignore end_time = time.perf_counter() # if nothing is returned add count = 0 and table # also possible by `list(result.get_points())`, but that is lot of compute action if (result): length = len(result.raw['series'][0]['values']) # type: ignore else: length = 0 tables_count: Dict[Table, int] = {} for table in query.tables: tables_count[table] = int(length / len(query.tables)) self.__insert_metrics_to_buffer(query.keyword, tables_count, end_time - start_time) return result # type: ignore
def check_create_rp(self) -> None: """Checks if any retention policy needs to be altered or added Raises: ValueError: Multiple RP declared as default ValueError: Check failed due Database error """ try: results: List[Dict[ str, Any]] = self.__client.get_list_retention_policies( self.database.name) rp_dict: Dict[str, Dict[str, Any]] = {} for result in results: rp_dict[result['name']] = result add_rp_list: List[RetentionPolicy] = [] alter_rp_list: List[RetentionPolicy] = [] default_used = False for retention_policy in self.database.retention_policies: # make sure only one RP is default if (retention_policy.default): if (default_used): raise ValueError( "multiple Retention Policies are declared as default" ) default_used = True result_rp = rp_dict.get(retention_policy.name, None) if (result_rp is None): add_rp_list.append(retention_policy) elif (result_rp != retention_policy.to_dict()): alter_rp_list.append(retention_policy) # else: all good LOGGER.debug( f"missing {len(add_rp_list)} RP's. Adding {add_rp_list}") for retention_policy in add_rp_list: self.__client.create_retention_policy( # type: ignore name=retention_policy.name, duration=retention_policy.duration, replication=retention_policy.replication, database=retention_policy.database.name, default=retention_policy.default, shard_duration=retention_policy.shard_duration) LOGGER.debug( f"altering {len(add_rp_list)} RP's. altering {add_rp_list}") for retention_policy in alter_rp_list: self.__client.alter_retention_policy( # type: ignore name=retention_policy.name, duration=retention_policy.duration, replication=retention_policy.replication, database=retention_policy.database.name, default=retention_policy.default, shard_duration=retention_policy.shard_duration) except (ValueError, InfluxDBClientError, InfluxDBServerError, requests.exceptions.ConnectionError) as error: # type: ignore ExceptionUtils.exception_info(error=error) # type: ignore raise ValueError("Retention Policies check failed")
def __job_logs_to_stats(self, list_with_logs: List[Dict[str, Any]]) -> None: """Parses joblogs into their own statisic table, using declared supported ID's To parse more jobLogs define additional entrys in the attribute `supported_ids`. Arguments: list_with_logs {List[Dict[str, Any]]} -- List with all saved joblogs """ # only continue with joblogs we want to save supported_log_iterator = filter(lambda log: log['messageId'] in self.__supported_ids.keys(), list_with_logs) sorted_log_iterator = sorted(supported_log_iterator, key=lambda entry: entry['logTime']) max_sec_timestamp = 0 # required for preventing duplicates for job_log in sorted_log_iterator: message_id = job_log['messageId'] table_func_tuple = self.__supported_ids[message_id] (table_name, row_dict_func) = table_func_tuple if(not table_name): table_name = message_id try: row_dict = row_dict_func(job_log['messageParams']) except KeyError as error: ExceptionUtils.exception_info( error, extra_message="MessageID params wrong defined. Skipping one MessageId") continue row_dict['messageId'] = message_id # Issue 9, In case where all tag values duplicate another record, including the timestamp, Influx will throw the insert # out as a duplicate. In some cases, the changing of epoch timestamps from millisecond to second precision is # cause duplicate timestamps. To avoid this for certain tables, add seconds to the timestamp as needed to # ensure uniqueness. Only use this when some innacuracy of the timestamps is acceptable cur_timestamp = job_log['logTime'] if(table_name == 'vmBackupSummary'): if(cur_timestamp is None): # prevent None ExceptionUtils.error_message(f"Warning: logTime is None, duplicate may be purged. Log: {job_log}") if(isinstance(cur_timestamp, str)): # make sure its int cur_timestamp = int(cur_timestamp) cur_sec_timestamp = SppUtils.to_epoch_secs(cur_timestamp) if(cur_sec_timestamp <= max_sec_timestamp): digits = (int)(cur_timestamp / cur_sec_timestamp) max_sec_timestamp += 1 # increase by 1 second cur_timestamp = max_sec_timestamp * digits else: max_sec_timestamp = cur_sec_timestamp row_dict['time'] = cur_timestamp for(key, item) in row_dict.items(): if(item in ('null', 'null(null)')): row_dict[key] = None self.__influx_client.insert_dicts_to_buffer(table_name, [row_dict])
def connect(self) -> None: """Connect client to remote server. Call this before using any other methods. Raises: ValueError: Login failed """ try: self.__client: InfluxDBClient = InfluxDBClient( # type: ignore host=self.__address, port=self.__port, username=self.__user, password=self.__password, ssl=self.__use_ssl, verify_ssl=self.__verify_ssl, timeout=20) # ping to make sure connection works version: str = self.__client.ping() LOGGER.debug(f"Connected to influxdb, version: {version}") # create db, nothing happens if already existend self.__client.create_database(self.database.name) # check for exisiting retention policies and continuous queries in the influxdb self.check_create_rp() self.check_create_cq() except (ValueError, InfluxDBClientError, InfluxDBServerError, requests.exceptions.ConnectionError) as error: # type: ignore ExceptionUtils.exception_info(error=error) # type: ignore raise ValueError("Login into influxdb failed")
def login(self) -> None: """Logs in into the REST-API. Call this before using any methods. Sets up the sessionId and the server URL. Raises: ValueError: Login was not sucessfull. """ http_auth: HTTPBasicAuth = HTTPBasicAuth(self.__username, self.__password) # type: ignore self.__srv_url = "https://{srv_address}:{port}".format(srv_address=self.__srv_address, port=self.__srv_port) endpoint = "/api/endeavour/session" LOGGER.debug(f"login to SPP REST API server: {self.__srv_url}") if(self.__verbose): LOGGER.info(f"login to SPP REST API server: {self.__srv_url}") try: response_json = self.post_data(endpoint=endpoint, auth=http_auth) # type: ignore except ValueError as error: ExceptionUtils.exception_info(error=error) ExceptionUtils.error_message( "Please make sure your Hostadress, port, username and password for REST-API (not SSH) login is correct." + "\nYou may test this by logging in into the SPP-Website with the used credentials.") raise ValueError(f"REST API login request not successfull.") self.__sessionid: str = response_json.get("sessionid", "") (version, build) = self.get_spp_version_build() LOGGER.debug(f"SPP-Version: {version}, build {build}") LOGGER.debug(f"REST API Session ID: {self.__sessionid}") if(self.__verbose): LOGGER.info(f"REST API Session ID: {self.__sessionid}") LOGGER.info(f"SPP-Version: {version}, build {build}") self.__headers['X-Endeavour-Sessionid'] = self.__sessionid
def ssh_execute_commands(cls, ssh_clients: List[SshClient], ssh_type: SshTypes, command_list: List[SshCommand]) -> List[Tuple[str, List[Dict[str, Any]]]]: """ functions executes commands via ssh on several hosts. the hosts (other, vsnap, vadp) can be defined in the JSON configuation file commands which shall be executed on vsnap and / or vadp proxies in the dedicated ist of strings. 'otherCommands' is a list of commands which are executed on hosts which are not of type: vsnap | vadp. if any host are not reachable, they are skipped """ if(not command_list): LOGGER.debug("No commands specified, aborting command.") if(cls.verbose): LOGGER.info("No commands specified, aborting command.") return [] client_list = list(filter(lambda client: client.client_type is ssh_type, ssh_clients)) if(not client_list): LOGGER.debug(f"No {ssh_type.name} ssh client present. Aborting command") if(cls.verbose): LOGGER.info(f"No {ssh_type.name} ssh client present. Aborting command") return [] ssh_cmd_response_list = [] result_list = [] for client in client_list: if(cls.verbose): LOGGER.info(f">> executing {ssh_type.name} command(s) on host {client.host_name}") try: result_commands = client.execute_commands( commands=command_list, verbose=cls.verbose ) except ValueError as error: ExceptionUtils.exception_info(error=error, extra_message="Error when executing commands, skipping this client") continue for ssh_command in result_commands: insert_dict = {} insert_dict["host"] = ssh_command.host_name insert_dict["command"] = ssh_command.cmd insert_dict["output"] = json.dumps(ssh_command.result) insert_dict['ssh_type'] = ssh_type.name time_key, time_value = SppUtils.get_capture_timestamp_sec() insert_dict[time_key] = time_value ssh_cmd_response_list.append(insert_dict) try: table_result_tuple = ssh_command.parse_result(ssh_type=ssh_type) result_list.append(table_result_tuple) except ValueError as error: ExceptionUtils.exception_info(error=error, extra_message="Error when parsing result, skipping parsing of this result") result_list.append(("sshCmdResponse", ssh_cmd_response_list)) return result_list
def set_critial_configs(self, config_file: Dict[str, Any]) -> None: """Sets up any critical infrastructure, to be called within the init. Be aware not everything may be initalized on call time. Add config here if the system should abort if it is missing. Arguments: config_file {Dict[str, Any]} -- Opened Config file """ if (not config_file): ExceptionUtils.error_message( "missing or empty config file, aborting") self.exit(error_code=ERROR_CODE_CMD_LINE) try: # critical components only auth_influx = SppUtils.get_cfg_params(param_dict=config_file, param_name="influxDB") if (not isinstance(auth_influx, dict)): raise ValueError("influx config need to be dict") self.influx_client = InfluxClient(auth_influx=auth_influx) self.influx_client.connect() except ValueError as err: ExceptionUtils.exception_info( error=err, extra_message="error while setting up critical config. Aborting" ) self.influx_client = None # set none cause it does not work. self.exit(error_code=ERROR_CODE)
def set_critial_configs(self, config_file: Dict[str, Any]) -> None: """Sets up any critical infrastructure, to be called within the init. Be aware not everything may be initalized on call time. Add config here if the system should abort if it is missing. Arguments: config_file {Dict[str, Any]} -- Opened Config file """ if (not config_file): ExceptionUtils.error_message( "missing or empty config file, aborting") self.exit(error_code=ERROR_CODE_START_ERROR) try: # critical components only self.influx_client = InfluxClient(config_file) if (not self.ignore_setup): # delay the connect into the testing phase self.influx_client.connect() except ValueError as err: ExceptionUtils.exception_info( error=err, extra_message="error while setting up critical config. Aborting" ) self.influx_client = None # set none, otherwise the variable is undeclared self.exit(error_code=ERROR_CODE)
def __init__(self, auth_influx: Dict[str, Any]): """Initalize the influx client from a config dict. Call `connect` before using the client. Arguments: auth_influx {dictonary} -- Dictionary with required parameters. Raises: ValueError: Raises a ValueError if any important parameters are missing within the file """ try: self.__user: str = auth_influx["username"] self.__password: str = auth_influx["password"] self.__use_ssl: bool = auth_influx["ssl"] if (self.__use_ssl): self.__verify_ssl: bool = auth_influx["verify_ssl"] else: self.__verify_ssl = False self.__port: int = auth_influx["srv_port"] self.__address: str = auth_influx["srv_address"] self.__database: Database = Database(auth_influx["dbName"]) # Create table definitions in code Definitions.add_table_definitions(self.database) self.__metrics_table: Table = self.database['influx_metrics'] except KeyError as key_error: ExceptionUtils.exception_info(error=key_error) raise ValueError("Missing Influx-Config arg", str(key_error)) # declare for later self.__client: InfluxDBClient
def get_all_jobs(self) -> None: """incrementally saves all stored jobsessions, even before first execution of sppmon""" job_list = MethodUtils.query_something( name="job list", source_func=self.__api_queries.get_job_list) for job in job_list: job_id = job.get("id", None) job_name = job.get("name", None) # this way to make sure we also catch empty strings if (not job_id or not job_name): ExceptionUtils.error_message( f"skipping, missing name or id for job {job}") continue LOGGER.info( ">> capturing Job information for Job \"{}\"".format(job_name)) try: self.__job_by_id(job_id=job_id) except ValueError as error: ExceptionUtils.exception_info( error=error, extra_message= f"error when getting jobs for {job_name}, skipping it") continue
def filter_values_dict( cls, result_list: List[Dict[str, Any]], white_list: List[str] = None, ignore_list: List[str] = None) -> List[Dict[str, Any]]: """Removes unwanted values from a list of dicts. Use white_list to only pick the values specified. Use ignore_list to pick everything but the values specified Both: white_list itmes overwrite ignore_list times, still getting all items not filterd. Args: result_list (List[Dict[str, Any]]): items to be filtered white_list (List[str], optional): items to be kept. Defaults to None. ignore_list (List[str], optional): items to be removed. Defaults to None. Raises: ValueError: no result list specified Returns: List[Dict[str, Any]]: list of filtered dicts """ if (result_list is None): raise ValueError("need valuelist to filter values") new_result_list: List[Dict[str, Any]] = [] # if single object this is a 1 elem list for result in result_list: new_result: Dict[str, Any] = {} # Only aquire items wanted if (white_list): for white_key in white_list: (key, value) = SppUtils.get_nested_kv(key_name=white_key, nested_dict=result) if (key in new_result): key = white_key new_result[key] = value # warn if something is missing if (len(new_result) != len(white_list)): ExceptionUtils.error_message( f"Result has not same lenght as whitelist, probably typing error: {result_list}" ) # aquire all but few unwanted if (ignore_list is not None): # add sub-dicts to dictonary itself, filtering inclusive full_result = cls.get_with_sub_values(mydict=result, ignore_list=ignore_list) new_result.update(full_result) new_result_list.append(new_result) return new_result_list
def post_data(self, endpoint: str = None, url: str = None, post_data: str = None, auth: HTTPBasicAuth = None) -> Dict[str, Any]: # type: ignore """Queries endpoint by a POST-Request. Only specify `auth` if you want to log in. Either specify endpoint or url. Keyword Arguments: endpoint {str} -- Endpoint to be queried (default: {None}) url {str} -- URL to be queried (default: {None}) post_data {str} -- data with filters/parameters (default: {None}) auth {HTTPBasicAuth} -- auth if you want to log in (default: {None}) Raises: ValueError: no endpoint or url specified ValueError: both url and endpoint specified ValueError: no post_data or auth specified ValueError: error when sending post data ValueError: wrong status code in response ValueError: failed to parse query Returns: Dict[str, Any] -- [description] """ if(not endpoint and not url): raise ValueError("neither url nor endpoint specified") if(endpoint and url): raise ValueError("both url and endpoint specified") if(not post_data and not auth): raise ValueError("either provide auth or post_data") if(not url): url = self.__srv_url + endpoint LOGGER.debug(f"post_data request {url} {post_data} {auth}") try: if(post_data): response_query: Response = requests.post( # type: ignore url, headers=self.__headers, data=post_data, verify=False, timeout=(self.__initial_connection_timeout, self.__timeout)) else: response_query: Response = requests.post( # type: ignore url, headers=self.__headers, auth=auth, verify=False, timeout=(self.__initial_connection_timeout, self.__timeout)) except requests.exceptions.RequestException as error: # type: ignore ExceptionUtils.exception_info(error=error) # type: ignore raise ValueError("Error when sending REST-API post data", endpoint, post_data) if response_query.status_code != 200: raise ValueError("Status Code Error in REST-API post data response", response_query.status_code, response_query, endpoint, post_data) # type: ignore try: response_json: Dict[str, Any] = response_query.json() except (json.decoder.JSONDecodeError, ValueError) as error: # type: ignore raise ValueError("failed to parse query in restAPI post request", response_query, endpoint, post_data) # type: ignore return response_json
def get_vms_per_sla(self) -> List[Dict[str, Any]]: """retrieves and calculates all vmware per SLA.""" endpoint = "/ngp/slapolicy" white_list = ["name", "id"] array_name = "slapolicies" sla_policty_list = self.__rest_client.get_objects( endpoint=endpoint, white_list=white_list, array_name=array_name, add_time_stamp=False) result_list: List[Dict[str, Any]] = [] for sla_policty in sla_policty_list: try: sla_name: str = sla_policty["name"] except KeyError as error: ExceptionUtils.exception_info( error, extra_message="skipping one sla entry due missing name.") continue sla_id: Optional[str] = sla_policty.get("id", None) result_dict: Dict[str, Any] = {} ## hotadd: sla_name = urllib.parse.quote_plus(sla_name) endpoint = "/api/hypervisor/search" endpoint = ConnectionUtils.url_set_param(url=endpoint, param_name="resourceType", param_value="vm") endpoint = ConnectionUtils.url_set_param(url=endpoint, param_name="from", param_value="hlo") filter_str: str = '[{"property":"storageProfileName","value": "' + sla_name + '", "op":"="}]' endpoint = ConnectionUtils.url_set_param(url=endpoint, param_name="filter", param_value=filter_str) # note: currently only vmware is queried per sla, not hyperV # need to check if hypervisortype must be specified post_data = json.dumps({"name": "*", "hypervisorType": "vmware"}) response_json = self.__rest_client.post_data(endpoint=endpoint, post_data=post_data) result_dict["slaName"] = sla_name result_dict["slaId"] = sla_id result_dict["vmCountBySLA"] = response_json.get("total") time_key, time = SppUtils.get_capture_timestamp_sec() result_dict[time_key] = time result_list.append(result_dict) return result_list
def check_create_cq(self) -> None: """Checks if any continuous query needs to be altered or added Raises: ValueError: Check failed due Database error """ try: results: List[Dict[str, List[Dict[ str, Any]]]] = self.__client.get_list_continuous_queries() cq_result_list: Optional[List[Dict[str, Any]]] = None for result in results: # check if this is the associated database cq_result_list = result.get(self.database.name, None) if (cq_result_list is not None): break if (cq_result_list is None): cq_result_list = [] cq_dict: Dict[str, ContinuousQuery] = {} for cq_result in cq_result_list: cq_dict[cq_result['name']] = cq_result['query'] add_cq_list: List[ContinuousQuery] = [] alter_cq_list: List[ContinuousQuery] = [] for continuous_query in self.database.continuous_queries: result_cq = cq_dict.get(continuous_query.name, None) if (result_cq is None): add_cq_list.append(continuous_query) elif (result_cq != continuous_query.to_query()): alter_cq_list.append(continuous_query) # else: all good LOGGER.debug( f"altering {len(add_cq_list)} CQ's. deleting {add_cq_list}") # alter not possible -> drop and readd for continuous_query in alter_cq_list: self.__client.drop_continuous_query( # type: ignore name=continuous_query.name, database=continuous_query.database.name) # extend to reinsert add_cq_list.extend(alter_cq_list) LOGGER.debug( f"adding {len(add_cq_list)} CQ's. adding {add_cq_list}") for continuous_query in add_cq_list: self.__client.create_continuous_query( # type: ignore name=continuous_query.name, select=continuous_query.select, database=continuous_query.database.name, resample_opts=continuous_query.resample_opts) except (ValueError, InfluxDBClientError, InfluxDBServerError, requests.exceptions.ConnectionError) as error: # type: ignore ExceptionUtils.exception_info(error=error) # type: ignore raise ValueError("Continuous Query check failed")
def insert_dicts_to_buffer(self, table_name: str, list_with_dicts: List[Dict[str, Any]]) -> None: """Insert a list of dicts with data into influxdb. Splits according to table definition. It is highly recommened to define a table before in database_table.py. If not present, splits by type analysis. Important: Querys are only buffered, not sent. Call flush_insert_buffer to flush. Arguments: table_name {str} -- Name of the table to be inserted list_with_dicts {List[Dict[str, Any]]} -- List with dicts whith collum name as key. Raises: ValueError: No list with dictonarys are given or of wrong type. ValueError: No table name is given """ if (list_with_dicts is None): # empty list is allowed raise ValueError("missing list with dictonarys in insert") if (not table_name): raise ValueError("table name needs to be set in insert") # Only insert of something is there to insert if (not list_with_dicts): LOGGER.debug("nothing to insert for table %s due empty list", table_name) return # get table instance table = self.database[table_name] # Generate querys for each dict query_buffer = [] for mydict in list_with_dicts: try: # split dict according to default tables (tags, values, timestamp) = table.split_by_table_def(mydict=mydict) if (isinstance(timestamp, str)): timestamp = int(timestamp) # create query and append to query_buffer query_buffer.append(InsertQuery(table, values, tags, timestamp)) except ValueError as err: ExceptionUtils.exception_info( error=err, extra_message="skipping single dict to insert") continue # extend existing inserts by new one and add to insert_buffer table_buffer = self.__insert_buffer.get(table, list()) table_buffer.extend(query_buffer) self.__insert_buffer[table] = table_buffer # safeguard to avoid memoryError if (len(self.__insert_buffer[table]) > 5 * self.__query_max_batch_size): self.flush_insert_buffer()
def flush_insert_buffer(self) -> None: """Flushes the insert buffer, send querys to influxdb server. Sends in batches defined by `__batch_size` to reduce http overhead. Only send-statistics remain in buffer, flush again to send those too. Raises: ValueError: Critical: The query Buffer is None. """ if (self.__insert_buffer is None): raise ValueError( "query buffer is somehow None, this should never happen!") # Only send if there is something to send if (not self.__insert_buffer): return # Done before to be able to clear buffer before sending # therefore stats can be re-inserted insert_list: List[Tuple[Table, List[str]]] = [] for (table, queries) in self.__insert_buffer.items(): insert_list.append( (table, list(map(lambda query: query.to_query(), queries)))) # clear all querys which are now transformed self.__insert_buffer.clear() for (table, queries_str) in insert_list: # stop time for send progess start_time = time.perf_counter() try: # send batch_size querys at once self.__client.write_points( points=queries_str, database=self.database.name, retention_policy=table.retention_policy.name, batch_size=self.__query_max_batch_size, time_precision='s', protocol='line') except (InfluxDBServerError, InfluxDBClientError) as error: # type: ignore ExceptionUtils.exception_info( error=error, extra_message="Error when sending Insert Buffer" ) # type: ignore end_time = time.perf_counter() # add metrics for the next sending process. # compute duration, metrics computed per batch self.__insert_metrics_to_buffer(Keyword.INSERT, {table: len(queries_str)}, end_time - start_time, len(queries_str))
def check_pid_file(self) -> bool: if (ARGS.verbose): LOGGER.info("Checking for other SPPMon instances") self.pid_file_path = SppUtils.mk_logger_file(ARGS.configFile, ".pid_file") try: try: file = open(self.pid_file_path, "rt") match_list = re.findall(r"(\d+) " + str(ARGS), file.read()) file.close() deleted_processes: List[str] = [] for match in match_list: # add spaces to make clear the whole number is matched match = f' {match} ' try: if (os.name == 'nt'): args = ['ps', '-W'] else: args = ['ps', '-p', match] result = subprocess.run(args, check=True, capture_output=True) if (re.search(match, str(result.stdout))): return False # not in there -> delete entry deleted_processes.append(match) except CalledProcessError as error: deleted_processes.append(match) # delete processes which did get killed, not often called if (deleted_processes): file = open(self.pid_file_path, "rt") file_str = file.read() file.close() options = str(ARGS) for pid in deleted_processes: file_str = file_str.replace(f"{pid} {options}", "") # do not delete if empty since we will use it below file = open(self.pid_file_path, "wt") file.write(file_str.strip()) file.close() except FileNotFoundError: pass # no file created yet # always write your own pid into it file = open(self.pid_file_path, "at") file.write(f"{os.getpid()} {str(ARGS)}") file.close() return True except Exception as error: ExceptionUtils.exception_info(error) raise ValueError("Error when checking pid file")
def exit(self, error_code: int = False) -> NoReturn: """Executes finishing tasks and exits sppmon. To be called every time. Executes finishing tasks and displays error messages. Specify only error message if something did went wrong. Use Error codes specified at top of module. Does NOT return. Keyword Arguments: error {int} -- Errorcode if a error occured. (default: {False}) """ # error with the command line arguments # dont store runtime here if (error_code == ERROR_CODE_CMD_LINE): prog_args = [] prog_args.append(sys.argv[0]) prog_args.append("--help") os.execv(sys.executable, ['python'] + prog_args) sys.exit(ERROR_CODE_CMD_LINE) # unreachable? script_end_time = SppUtils.get_actual_time_sec() LOGGER.debug("Script end time: %d", script_end_time) try: if (not self.ignore_setup): self.store_script_metrics() if (self.influx_client): self.influx_client.disconnect() if (self.rest_client): self.rest_client.logout() except ValueError as error: ExceptionUtils.exception_info( error=error, extra_message="Error occured while exiting sppmon") error_code = ERROR_CODE if (not error_code): LOGGER.info("\n\n!!! script completed !!!\n") self.remove_pid_file() # Both clauses are actually the same, but for clarification, always last due always beeing true for any number if (error_code == ERROR_CODE or error_code): ExceptionUtils.error_message( "Error occured while executing sppmon") print( f"check log for details: grep \"PID {os.getpid()}\" {self.log_path} > sppmon.log.{os.getpid()}" ) sys.exit(error_code)
def ssh(self) -> None: """Executes all ssh related functionality for each type of client each.""" LOGGER.info(f"> executing ssh commands for each sshclient-type individually.") for ssh_type in SshTypes: try: LOGGER.info(f">> executing ssh commands, which are labled to be executed for {ssh_type.value} ssh clients") self.__exec_save_commands( ssh_type=ssh_type, command_list=self.__client_commands[ssh_type] + self.__all_command_list ) except ValueError as error: ExceptionUtils.exception_info( error=error, extra_message=f"Top-level-error when excecuting {ssh_type.value} ssh commands, skipping them all")
def execute_commands(self, commands: List[SshCommand], verbose: bool = False) -> List[SshCommand]: """Executes given commands on this ssh client. Returns a new list of commands. Automatically connects and disconnects. Arguments: commands {List[SshCommand]} -- List of commands to be executed Keyword Arguments: verbose {bool} -- whether to print the result (default: {False}) Raises: ValueError: No list of commands given. """ if(not commands or not isinstance(commands, list)): raise ValueError("Need list of commands to execute") LOGGER.debug(f"> connecting to {self.client_type.name} client on host {self.host_name}") if(verbose): LOGGER.info(f"> connecting to {self.client_type.name} client on host {self.host_name}") self.connect() LOGGER.debug("> connection successfull") if(verbose): LOGGER.info("> connection successfull") new_command_list = [] for ssh_command in commands: try: LOGGER.debug(f"Executing command {ssh_command.cmd} on host {self.host_name}") result = self.__send_command(ssh_command.cmd) # save result new_command = ssh_command.save_result(result, self.host_name) LOGGER.debug(f"Command result: {result}") except ValueError as error: ExceptionUtils.exception_info( error=error, extra_message=f"failed to execute command on host: {self.host_name}, skipping it: {ssh_command.cmd}") # make sure it is not set new_command = ssh_command.save_result(result=None, host_name=self.host_name) new_command_list.append(new_command) self.disconnect() return new_command_list
def process_stats(self) -> None: """Executes all server-process stats related functionality.""" try: LOGGER.info(f"> executing process_stats ssh commands") self.__exec_save_commands( ssh_type=SshTypes.SERVER, command_list=self.__client_commands[SshTypes.SERVER] + self.__all_command_list) except ValueError as error: ExceptionUtils.exception_info( error=error, extra_message= "Top-level-error when process_stats ssh commands, skipping them all" )
def disconnect(self) -> None: """Disconnects client from remote server and finally flushes buffer.""" LOGGER.debug("disconnecting Influx database") # Double send to make sure all metrics are send try: self.flush_insert_buffer() self.flush_insert_buffer() except ValueError as error: ExceptionUtils.exception_info( error=error, extra_message= "Failed to flush buffer on logout, possible data loss") self.__client.close()
def query_something( cls, name: str, source_func: Callable[[], List[Dict[str, Any]]], rename_tuples: List[Tuple[str, str]] = None, deactivate_verbose: bool = False) -> List[Dict[str, Any]]: """ Generic function to query from the REST-API and rename elements within it. Use deactivate_verbose to deactivate any result-printing to compute the result and query yourself. Arguments: name {str} -- Name of item you want to query for the logger. source_func {Function} -- Function which returns a list of dicts with elems wanted. Keyword Arguments: rename_tuples {list} -- List of Tuples if you want to rename Keys. (old_name, new_name) (default: {None}) deactivate_verbose {bool} -- deactivates result-prints within the function. (default: {False}) Raises: ValueError: No name is provided ValueError: No Function is provided or not a function Returns: list -- List of dicts with the results. """ # None checks if(rename_tuples is None): rename_tuples = [] if(not name): raise ValueError("need name to query something") if(not source_func): raise ValueError("need a source function to query data") LOGGER.info("> getting %s", name) # request all Sites from SPP elem_list = source_func() if(not elem_list): ExceptionUtils.error_message(f">> No {name} are found") if(rename_tuples): for elem in elem_list: # rename fields to make it more informative. for(old_name, new_name) in rename_tuples: elem[new_name] = elem.pop(old_name) if(cls.verbose and not deactivate_verbose): MethodUtils.my_print(elem_list) return elem_list
def remove_pid_file(self) -> None: try: file = open(self.pid_file_path, "rt") file_str = file.read() file.close() new_file_str = file_str.replace(f"{os.getpid()} {str(ARGS)}", "").strip() if (not new_file_str.strip()): os.remove(self.pid_file_path) else: file = open(self.pid_file_path, "wt") file.write(new_file_str) file.close() except Exception as error: ExceptionUtils.exception_info(error, "Error when removing pid_file")
def __init__(self, name: str, database: Database, duration: str, replication: int = 1, shard_duration: str = "0s", default: bool = False) -> None: if (not name): raise ValueError("need retention policy name for creation") if (not database): raise ValueError("need retention policy database for creation") if (not duration): raise ValueError("need retention policy duration for creation") if (not replication): raise ValueError( "need retention policy replication factor for creation") if (not shard_duration): raise ValueError( "need retention policy shard duration for creation") if (default is None): raise ValueError( "need retention policy default setting for creation") self.__name = name self.__database = database self.__replication = replication self.__shard_duration = shard_duration self.__default = default try: # str due usage of method self.__duration: str = InfluxUtils.transform_time_literal( duration, single_vals=False) except ValueError as error: ExceptionUtils.exception_info(error) raise ValueError( f"duration for retention policy {name} is not in the correct time format" ) try: # str due usage of method self.__shard_duration: str = InfluxUtils.transform_time_literal( shard_duration, single_vals=False) except ValueError as error: ExceptionUtils.exception_info(error) raise ValueError( f"shard duration for retention policy {name} is not in the correct time format" )
def site_name_by_id(self, site_id: Union[int, str]) -> Optional[str]: """Returns a site_name by a associated site_id. Uses a already buffered result if possible, otherwise queries the influxdb for the name. Arguments: site_id {Union[int, str]} -- id of the site Returns: Optional[str] -- name of the site, None if not found. """ if (site_id is None): ExceptionUtils.error_message("siteId is none, returning None") return None # if string, parse to int if (isinstance(site_id, str)): site_id = site_id.strip(" ") if (re.match(r"\d+", site_id)): site_id = int(site_id) else: ExceptionUtils.error_message( "siteId is of unsupported string format") return None # if still not int, error if (not isinstance(site_id, int)): ExceptionUtils.error_message("site id is of unsupported type") return None # return if already saved -> previous call or `sites`-call result = self.__site_name_dict.get(site_id, None) if (result is not None): # empty str allowed return result table_name = 'sites' table = self.__influx_client.database[table_name] query = SelectionQuery( keyword=Keyword.SELECT, tables=[table], # description, throttleRates cause we need a field to query fields=["siteId", "siteName", "description", "throttleRates"], where_str=f"siteId = \'{site_id}\'", order_direction="DESC", limit=1) result_set = self.__influx_client.send_selection_query( query) # type: ignore result_dict: Dict[str, Any] = next(result_set.get_points(), None) # type: ignore if (not result_dict): ExceptionUtils.error_message( f"no site with the id {site_id} exists") return None # save result and return it result = result_dict['siteName'] self.__site_name_dict[site_id] = result return result
def __send_command(self, ssh_command: str) -> str: """Sends a command to the ssh client. Raises error if fails. You may need to json.load the result if it is a dict. Arguments: ssh_command {str} -- Command to be send as str Raises: ValueError: No command given. ValueError: Result is empty ValueError: Error when executing command ValueError: Paramiko error Returns: str -- result of the command as str """ if (not ssh_command or not ssh_command): raise ValueError("need command to execute") LOGGER.debug(f">> excecuting command: {ssh_command}") try: (ssh_stdin, ssh_stdout, ssh_stderr) = self.__client_ssh.exec_command( ssh_command) # type: ignore response_cmd = ssh_stdout.read() # type: ignore if (not response_cmd): raise ValueError(f"Result of ssh command is empty.", ssh_command) sq_result: str = response_cmd.decode() if (re.match(r"ERROR:.*", sq_result)): raise ValueError("Error when executing command", ssh_command, sq_result) return sq_result except paramiko.ssh_exception.SSHException as error: # type: ignore ExceptionUtils.exception_info(error=error) # type: ignore raise ValueError("paramiko error when executing ssh-command", error) # type: ignore
def setup_ssh_clients(config_file: Dict[str, Any]) -> List[SshClient]: auth_ssh = SppUtils.get_cfg_params(param_dict=config_file, param_name="sshclients") if (not isinstance(auth_ssh, list)): raise ValueError("not a list of sshconfig given", auth_ssh) ssh_clients: List[SshClient] = [] for client_ssh in auth_ssh: try: ssh_clients.append(SshClient(client_ssh)) except ValueError as error: ExceptionUtils.exception_info( error=error, extra_message= f"Setting up one ssh-client failed, skipping it. Client: \ {client_ssh.get('name', 'ERROR WHEN GETTING NAME')}") return ssh_clients
def __init__(self, config_file: Dict[str, Any]): """Initalize the influx client from a config dict. Call `connect` before using the client. Arguments: auth_influx {dictonary} -- Dictionary with required parameters. Raises: ValueError: Raises a ValueError if any important parameters are missing within the file """ if (not config_file): raise ValueError( "A config file is required to setup the InfluxDB client.") auth_influx = SppUtils.get_cfg_params(param_dict=config_file, param_name="influxDB") if (not isinstance(auth_influx, dict)): raise ValueError( "The InfluxDB config is corrupted within the file: Needs to be a dictionary." ) try: self.__user: str = auth_influx["username"] self.__password: str = auth_influx["password"] self.__use_ssl: bool = auth_influx["ssl"] if (self.__use_ssl): self.__verify_ssl: bool = auth_influx["verify_ssl"] else: self.__verify_ssl = False self.__port: int = auth_influx["srv_port"] self.__address: str = auth_influx["srv_address"] self.__database: Database = Database(auth_influx["dbName"]) # Create table definitions in code Definitions.add_table_definitions(self.database) self.__metrics_table: Table = self.database['influx_metrics'] except KeyError as key_error: ExceptionUtils.exception_info(error=key_error) raise ValueError("Missing Influx-Config arg", str(key_error)) # declare for later self.__client: InfluxDBClient self.__version: str
def setup_args(self) -> None: """This method set up all required parameters and transforms arg groups into individual args. """ # ## call functions based on cmdline parameters # Temporary features / Depricated if (OPTIONS.minimumLogs): ExceptionUtils.error_message( "DEPRICATED: using depricated argument '--minumumLogs'. Switch to '--loadedSystem'." ) # incremental setup, higher executes all below all_args: bool = OPTIONS.all daily: bool = OPTIONS.daily or all_args hourly: bool = OPTIONS.hourly or daily constant: bool = OPTIONS.constant or hourly # ######## All Methods ################# self.sites: bool = OPTIONS.sites or all_args # ######## Daily Methods ############### self.vms: bool = OPTIONS.vms or daily self.job_logs: bool = OPTIONS.jobLogs or daily self.sla_stats: bool = OPTIONS.slaStats or daily self.vm_stats: bool = OPTIONS.vmStats or daily # ######## Hourly Methods ############## self.jobs: bool = OPTIONS.jobs or hourly self.vadps: bool = OPTIONS.vadps or hourly self.storages: bool = OPTIONS.storages or hourly # ssh vsnap pools ? # ######## Constant Methods ############ self.ssh: bool = OPTIONS.ssh or constant self.process_stats: bool = OPTIONS.processStats or constant self.cpu: bool = OPTIONS.cpu or constant self.spp_catalog: bool = OPTIONS.sppcatalog or constant