def __init__(self, table: Structures.Table, fields: Dict[str, Any], tags: Dict[str, Any] = None, time_stamp: Union[int, str, None] = None): if(not table): raise ValueError("need table to create query") if(not fields): raise ValueError("need at least one value to create query") if(tags is None): tags = {} if(time_stamp is None): time_stamp = SppUtils.get_actual_time_sec() # Keyword is always Insert since insert Statement self.__keyword = Keyword.INSERT self.__table = table self.__time_stamp = SppUtils.to_epoch_secs(time_stamp) fields = self.format_fields(fields) # make sure you have some fields if they are not privided if(not list(filter(lambda field_tup: field_tup[1] is not None, fields.items()))): # need default def to be able to do anything if(not table.fields): raise ValueError("fields after formatting empty, need at least one value!") # only works for strings, any other addition would corrupt the data for (key, datatype) in table.fields.items(): if(datatype is Structures.Datatype.STRING): fields[key] = '\"autofilled\"' break # test again, improvement possible here if(not list(filter(lambda field_tup: field_tup[1] is not None, fields.items()))): raise ValueError("fields after formatting empty, need at least one value!") self.__fields: Dict[str, Union[float, str, bool]] = fields self.__tags: Dict[str, str] = self.format_tags(tags)
def __job_logs_to_stats(self, list_with_logs: List[Dict[str, Any]]) -> None: """Parses joblogs into their own statisic table, using declared supported ID's To parse more jobLogs define additional entrys in the attribute `supported_ids`. Arguments: list_with_logs {List[Dict[str, Any]]} -- List with all saved joblogs """ # only continue with joblogs we want to save supported_log_iterator = filter(lambda log: log['messageId'] in self.__supported_ids.keys(), list_with_logs) sorted_log_iterator = sorted(supported_log_iterator, key=lambda entry: entry['logTime']) max_sec_timestamp = 0 # required for preventing duplicates for job_log in sorted_log_iterator: message_id = job_log['messageId'] table_func_tuple = self.__supported_ids[message_id] (table_name, row_dict_func) = table_func_tuple if(not table_name): table_name = message_id try: row_dict = row_dict_func(job_log['messageParams']) except KeyError as error: ExceptionUtils.exception_info( error, extra_message="MessageID params wrong defined. Skipping one MessageId") continue row_dict['messageId'] = message_id # Issue 9, In case where all tag values duplicate another record, including the timestamp, Influx will throw the insert # out as a duplicate. In some cases, the changing of epoch timestamps from millisecond to second precision is # cause duplicate timestamps. To avoid this for certain tables, add seconds to the timestamp as needed to # ensure uniqueness. Only use this when some innacuracy of the timestamps is acceptable cur_timestamp = job_log['logTime'] if(table_name == 'vmBackupSummary'): if(cur_timestamp is None): # prevent None ExceptionUtils.error_message(f"Warning: logTime is None, duplicate may be purged. Log: {job_log}") if(isinstance(cur_timestamp, str)): # make sure its int cur_timestamp = int(cur_timestamp) cur_sec_timestamp = SppUtils.to_epoch_secs(cur_timestamp) if(cur_sec_timestamp <= max_sec_timestamp): digits = (int)(cur_timestamp / cur_sec_timestamp) max_sec_timestamp += 1 # increase by 1 second cur_timestamp = max_sec_timestamp * digits else: max_sec_timestamp = cur_sec_timestamp row_dict['time'] = cur_timestamp for(key, item) in row_dict.items(): if(item in ('null', 'null(null)')): row_dict[key] = None self.__influx_client.insert_dicts_to_buffer(table_name, [row_dict])
def format_fields( self, fields: Dict[str, Any]) -> Dict[str, Union[int, float, str]]: """Formats fields accordingly to the requirements of the influxdb. Cast and transforms all values to the required datatype, declared in the given table. Escapes all characters which are not allowed, applies to both key and value. Arguments: table {Table} -- Table with given field declarations fields {Dict[str, Any]} -- Dict of all fields to be formatted, key is name, value is data Returns: Dict[str, Union[int, float, str]] -- Dict with field name as key and data as value """ ret_dict: Dict[str, Union[int, float, str]] = {} for (key, value) in fields.items(): if (value is None or (isinstance(value, str) and not value)): continue # Get Colum Datatype datatype = self.table.fields.get(key, None) # If nothing is defined select it automatic if (datatype is None): datatype = Structures.Datatype.get_auto_datatype(value) # Escape not allowed chars in Key key = InfluxUtils.escape_chars( value=key, replace_list=self.__bad_name_characters) # Format Strings if (datatype == Structures.Datatype.STRING): value = InfluxUtils.escape_chars(value=value, replace_list=[(r'"', r'\"')]) value = "\"{}\"".format(value) # Make time always be saved in seconds, save as int if (datatype == Structures.Datatype.TIMESTAMP): value = SppUtils.to_epoch_secs(value) value = '{}i'.format(value) # Make Integer to an IntLiteral if (datatype == Structures.Datatype.INT): value = '{}i'.format(value) ret_dict[key] = value return ret_dict
def create_inventory_summary(self) -> None: """Retrieves and calculate VM inventory summary by influx catalog data.""" LOGGER.info( "> computing inventory information (not from catalog, means not only backup data is calculated)") # ########## Part 1: Check if something need to be computed ############# # query the timestamp of the last vm, commited as a field is always needed by influx rules. vms_table = self.__influx_client.database["vms"] time_query = SelectionQuery( keyword=Keyword.SELECT, tables=[vms_table], fields=['time', 'commited'], limit=1, order_direction="DESC" ) result = self.__influx_client.send_selection_query(time_query) # type: ignore last_vm: Dict[str, Any] = next(result.get_points(), None) # type: ignore if(not last_vm): raise ValueError("no VM's stored, either none are available or you have to store vm's first") # query the last vm stats to compare timestamps with last vm last_time_ms: int = last_vm["time"] last_time = SppUtils.to_epoch_secs(last_time_ms) where_str = "time = {}s".format(last_time) vm_stats_table = self.__influx_client.database["vmStats"] vm_stats_query = SelectionQuery( keyword=Keyword.SELECT, tables=[vm_stats_table], fields=['*'], where_str=where_str, limit=1 ) result = self.__influx_client.send_selection_query(vm_stats_query) # type: ignore if(len(list(result.get_points())) > 0): # type: ignore LOGGER.info(">> vm statistics already computed, skipping") return # ####################### Part 2: Compute new Data #################### fields = [ 'uptime', 'powerState', 'commited', 'uncommited', 'memory', 'host', 'vmVersion', 'isProtected', 'inHLO', 'isEncrypted', 'datacenterName', 'hypervisorType', ] query = SelectionQuery( keyword=Keyword.SELECT, tables=[vms_table], fields=fields, where_str=where_str ) result = self.__influx_client.send_selection_query(query) # type: ignore all_vms_list: List[Dict[str, Union[str, int, float, bool]]] = list(result.get_points()) # type: ignore # skip if no new data can be computed if(not all_vms_list): raise ValueError("no VM's stored, either none are available or store vms first") vm_stats: Dict[str, Any] = {} try: vm_stats['vmCount'] = len(all_vms_list) # returns largest/smallest vm_stats['vmMaxSize'] = max(all_vms_list, key=(lambda mydict: mydict['commited']))['commited'] # on purpose zero size vm's are ignored vms_no_null_size = list(filter(lambda mydict: mydict['commited'] > 0, all_vms_list)) if(vms_no_null_size): vm_stats['vmMinSize'] = min(vms_no_null_size, key=(lambda mydict: mydict['commited']))['commited'] vm_stats['vmSizeTotal'] = sum(mydict['commited'] for mydict in all_vms_list) vm_stats['vmAvgSize'] = vm_stats['vmSizeTotal'] / vm_stats['vmCount'] # returns largest/smallest vm_stats['vmMaxUptime'] = max(all_vms_list, key=(lambda mydict: mydict['uptime']))['uptime'] # on purpose zero size vm's are ignored vms_no_null_time = list(filter(lambda mydict: mydict['uptime'] > 0, all_vms_list)) if(vms_no_null_time): vm_stats['vmMinUptime'] = min(vms_no_null_time, key=(lambda mydict: mydict['uptime']))['uptime'] vm_stats['vmUptimeTotal'] = sum(mydict['uptime'] for mydict in all_vms_list) vm_stats['vmAvgUptime'] = vm_stats['vmUptimeTotal'] / vm_stats['vmCount'] vm_stats['vmCountProtected'] = len(list(filter(lambda mydict: mydict['isProtected'] == "True", all_vms_list))) vm_stats['vmCountUnprotected'] = vm_stats['vmCount'] - vm_stats['vmCountProtected'] vm_stats['vmCountEncrypted'] = len(list(filter(lambda mydict: mydict['isEncrypted'] == "True", all_vms_list))) vm_stats['vmCountPlain'] = vm_stats['vmCount'] - vm_stats['vmCountEncrypted'] vm_stats['vmCountHLO'] = len(list(filter(lambda mydict: mydict['inHLO'] == "True", all_vms_list))) vm_stats['vmCountNotHLO'] = vm_stats['vmCount'] - vm_stats['vmCountHLO'] vm_stats['vmCountVMware'] = len(list(filter(lambda mydict: mydict['hypervisorType'] == "vmware", all_vms_list))) vm_stats['vmCountHyperV'] = len(list(filter(lambda mydict: mydict['hypervisorType'] == "hyperv", all_vms_list))) vm_stats['nrDataCenters'] = len(set(map(lambda vm: vm['datacenterName'], all_vms_list))) vm_stats['nrHosts'] = len(set(map(lambda vm: vm['host'], all_vms_list))) vm_stats['time'] = all_vms_list[0]['time'] if self.__verbose: MethodUtils.my_print([vm_stats]) except (ZeroDivisionError, AttributeError, KeyError, ValueError) as error: ExceptionUtils.exception_info(error=error) raise ValueError("error when computing extra vm stats", vm_stats) LOGGER.info(">> store vmInventory information in Influx DB") self.__influx_client.insert_dicts_to_buffer("vmStats", [vm_stats])