def storages(self) -> None:
        """Saves all storages such as vsnaps."""

        table_name = 'storages'

        # deactivate verbose to avoid double print
        result = MethodUtils.query_something(
            name=table_name,
            source_func=self.__api_queries.get_storages,
            deactivate_verbose=True
        )

        # get calulated extra info
        for row in result:
            row['siteName'] = self.__system_methods.site_name_by_id(row['site'])
            if('free' in row and 'total' in row
               and row['free'] > 0 and row['total'] > 0):
                row['used'] = row['total'] - row['free']
                row['pct_free'] = row['free'] / row['total'] * 100
                row['pct_used'] = row['used'] / row['total'] * 100

        if(self.__verbose):
            MethodUtils.my_print(data=result)

        LOGGER.info(">> inserting storage info into database")

        self.__influx_client.insert_dicts_to_buffer(table_name=table_name, list_with_dicts=result)
    def sla_dumps(self) -> None:
        """Captures and saves SLA subpolicys."""
        # capture and display / store SLA dumps
        sla_dump_list = MethodUtils.query_something(
            name="slaDumps",
            source_func=self.__api_queries.get_sla_dump,
            rename_tuples=[
                ("id", "slaId"),
                ("subpolicy", "slaDump"),
                ("name", "slaName")
            ]
        )

        LOGGER.info(">> updating slaStat table with dump of SLA subpolicy")
        table_name = "slaStats"
        for row in sla_dump_list:
            sla_dump = row['slaDump']
            time_stamp = row[SppUtils.capture_time_key]
            sla_id = row['slaId']
            tag_dic = {}
            field_dic = {'slaDump': sla_dump}
            self.__influx_client.update_row(
                table_name=table_name,
                tag_dic=tag_dic,
                field_dic=field_dic,
                where_str="time = {}ms AND slaId = \'{}\'".format(
                    time_stamp, sla_id)
            )
Ejemplo n.º 3
0
    def get_all_jobs(self) -> None:
        """incrementally saves all stored jobsessions, even before first execution of sppmon"""

        job_list = MethodUtils.query_something(
            name="job list", source_func=self.__api_queries.get_job_list)

        for job in job_list:
            job_id = job.get("id", None)
            job_name = job.get("name", None)

            # this way to make sure we also catch empty strings
            if (not job_id or not job_name):
                ExceptionUtils.error_message(
                    f"skipping, missing name or id for job {job}")
                continue
            LOGGER.info(
                ">> capturing Job information for Job \"{}\"".format(job_name))

            try:
                self.__job_by_id(job_id=job_id)
            except ValueError as error:
                ExceptionUtils.exception_info(
                    error=error,
                    extra_message=
                    f"error when getting jobs for {job_name}, skipping it")
                continue
    def vadps(self) -> None:
        """Requests and stores all VAPD proxys from the SPP-server."""
        table_name = 'vadps'
        result = MethodUtils.query_something(
            name=table_name,
            source_func=self.__api_queries.get_vadps,
            rename_tuples=[
                ('id', 'vadpId'),
                ('displayName', 'vadpName')
            ],
            deactivate_verbose=True
            )
        for row in result:
            row['siteName'] = self.__system_methods.site_name_by_id(row['siteId'])
        if(self.__verbose):
            MethodUtils.my_print(result)

        self.__influx_client.insert_dicts_to_buffer(table_name=table_name, list_with_dicts=result)
Ejemplo n.º 5
0
    def sppcatalog(self) -> None:
        """Saves the spp filesystem catalog information."""
        result = MethodUtils.query_something(
            name="sppcatalog stats",
            source_func=self.__api_queries.get_file_system,
            deactivate_verbose=True)
        value_renames = {
            'Configuration': "Configuration",
            'Search': "File",
            'System': "System",
            'Catalog': "Recovery"
        }
        for row in result:
            row['name'] = value_renames[row['name']]

        if (self.__verbose):
            MethodUtils.my_print(result)
        self.__influx_client.insert_dicts_to_buffer("sppcatalog", result)
 def vms_per_sla(self) -> None:
     """Calculates the number of VM's per SLA. Hypervisors not supported yet."""
     LOGGER.info("> calculating number of VMs per SLA")
     result = MethodUtils.query_something(
         name="VMs per SLA",
         source_func=self.__api_queries.get_vms_per_sla
     )
     LOGGER.info(">> inserting number of VMs per SLA into DB")
     self.__influx_client.insert_dicts_to_buffer(
         table_name="slaStats", list_with_dicts=result)
Ejemplo n.º 7
0
    def __exec_save_commands(self, ssh_type: SshTypes,
                             command_list: List[SshCommand]) -> None:
        """Helper method, executes and saves all commands via ssh for all clients of the given type.

        Used cause of the individual save of the results + the verbose print.
        This functionality is not integrated in MethodUtils cause of the missing Influxclient in static context.

        Arguments:
            ssh_type {SshTypes} -- all clients of this type are going to be queried
            command_list {List[SshCommand]} -- list of commands to be executed on all clients.
        """
        result_tuples = MethodUtils.ssh_execute_commands(
            ssh_clients=self.__ssh_clients,
            ssh_type=ssh_type,
            command_list=command_list)
        for (table_name, insert_list) in result_tuples:
            if (self.__verbose):
                MethodUtils.my_print(insert_list)

            self.__influx_client.insert_dicts_to_buffer(
                table_name=table_name, list_with_dicts=insert_list)
Ejemplo n.º 8
0
    def cpuram(self) -> None:
        """Saves the cpu and ram usage of the spp system."""
        table_name = 'cpuram'

        result = MethodUtils.query_something(
            name=table_name,
            rename_tuples=[
                ('data.size', 'dataSize'),
                ('data.util', 'dataUtil'),
                ('data2.size', 'data2Size'),
                ('data2.util', 'data2Util'),
                ('data3.size', 'data3Size'),
                ('data3.util', 'data3Util'),
                ('memory.size', 'memorySize'),
                ('memory.util', 'memoryUtil'),
            ],
            source_func=self.__api_queries.get_server_metrics)
        self.__influx_client.insert_dicts_to_buffer(table_name=table_name,
                                                    list_with_dicts=result)
Ejemplo n.º 9
0
    def sites(self) -> None:
        """Collects all site informations including throttle rate.

        This information does not contain much statistic information.
        It should only be called if new sites were added or changed.
        """
        table_name = 'sites'

        result = MethodUtils.query_something(
            name=table_name,
            source_func=self.__api_queries.get_sites,
            rename_tuples=[('id', 'siteId'), ('name', 'siteName'),
                           ('throttles', 'throttleRates')])
        # save results for renames later
        for row in result:
            self.__site_name_dict[row['siteId']] = row['siteName']

        self.__influx_client.insert_dicts_to_buffer(table_name=table_name,
                                                    list_with_dicts=result)
    def store_vms(self) -> None:
        """Stores all vms stats individually

        Those are reused later to compute vm_stats
        """
        all_vms_list = MethodUtils.query_something(
            name="all VMs",
            source_func=self.__api_queries.get_all_vms,
            rename_tuples=[
                ("properties.datacenter.name", "datacenterName")
            ],
            deactivate_verbose=True)

        if(self.__verbose):
            LOGGER.info(f"found {len(all_vms_list)} vm's.")

        self.__influx_client.insert_dicts_to_buffer(
            table_name="vms",
            list_with_dicts=all_vms_list
        )
Ejemplo n.º 11
0
    def sites(self) -> None:
        """Collects all site informations including throttle rate.

        This information does not contain much statistic information.
        It should only be called if new sites were added or changed.
        """
        table_name = 'sites'

        result = MethodUtils.query_something(
            name=table_name,
            source_func=self.__api_queries.get_sites,
            rename_tuples=[('id', 'siteId'), ('name', 'siteName'),
                           ('throttles', 'throttleRates')])
        LOGGER.debug(f"sites: {result}")
        # save results into internal storage to avoid additional request for ID's
        # used instead of `site_name_by_id`
        for row in result:
            self.__site_name_dict[row['siteId']] = row['siteName']
            # explicit none check since [] also needs to be converted into str
            if (row['throttleRates'] != None):
                row['throttleRates'] = str(row['throttleRates'])

        self.__influx_client.insert_dicts_to_buffer(table_name=table_name,
                                                    list_with_dicts=result)
    def create_inventory_summary(self) -> None:
        """Retrieves and calculate VM inventory summary by influx catalog data."""

        LOGGER.info(
            "> computing inventory information (not from catalog, means not only backup data is calculated)")

        # ########## Part 1: Check if something need to be computed #############
        # query the timestamp of the last vm, commited as a field is always needed by influx rules.
        vms_table = self.__influx_client.database["vms"]

        time_query = SelectionQuery(
            keyword=Keyword.SELECT,
            tables=[vms_table],
            fields=['time', 'commited'],
            limit=1,
            order_direction="DESC"
        )
        result = self.__influx_client.send_selection_query(time_query) # type: ignore
        last_vm: Dict[str, Any] = next(result.get_points(), None) # type: ignore

        if(not last_vm):
            raise ValueError("no VM's stored, either none are available or you have to store vm's first")

        # query the last vm stats to compare timestamps with last vm
        last_time_ms: int = last_vm["time"]
        last_time = SppUtils.epoch_time_to_seconds(last_time_ms)
        where_str = "time = {}s".format(last_time)

        vm_stats_table = self.__influx_client.database["vmStats"]

        vm_stats_query = SelectionQuery(
            keyword=Keyword.SELECT,
            tables=[vm_stats_table],
            fields=['*'],
            where_str=where_str,
            limit=1
        )
        result = self.__influx_client.send_selection_query(vm_stats_query) # type: ignore
        if(len(list(result.get_points())) > 0): # type: ignore
            LOGGER.info(">> vm statistics already computed, skipping")
            return

        # ####################### Part 2: Compute new Data ####################
        fields = [
            'uptime',
            'powerState',
            'commited',
            'uncommited',
            'memory',
            'host',
            'vmVersion',
            'isProtected',
            'inHLO',
            'isEncrypted',
            'datacenterName',
            'hypervisorType',
        ]
        query = SelectionQuery(
            keyword=Keyword.SELECT,
            tables=[vms_table],
            fields=fields,
            where_str=where_str
        )
        result = self.__influx_client.send_selection_query(query) # type: ignore

        all_vms_list: List[Dict[str, Union[str, int, float, bool]]] = list(result.get_points()) # type: ignore

        # skip if no new data can be computed
        if(not all_vms_list):
            raise ValueError("no VM's stored, either none are available or store vms first")

        vm_stats: Dict[str, Any] = {}
        try:
            vm_stats['vmCount'] = len(all_vms_list)

            # returns largest/smallest
            vm_stats['vmMaxSize'] = max(all_vms_list, key=(lambda mydict: mydict['commited']))['commited']
            #  on purpose zero size vm's are ignored
            vms_no_null_size = list(filter(lambda mydict: mydict['commited'] > 0, all_vms_list))
            if(vms_no_null_size):
                vm_stats['vmMinSize'] = min(vms_no_null_size, key=(lambda mydict: mydict['commited']))['commited']
            vm_stats['vmSizeTotal'] = sum(mydict['commited'] for mydict in all_vms_list)
            vm_stats['vmAvgSize'] = vm_stats['vmSizeTotal'] / vm_stats['vmCount']

             # returns largest/smallest
            vm_stats['vmMaxUptime'] = max(all_vms_list, key=(lambda mydict: mydict['uptime']))['uptime']
            #  on purpose zero size vm's are ignored
            vms_no_null_time = list(filter(lambda mydict: mydict['uptime'] > 0, all_vms_list))
            if(vms_no_null_time):
                vm_stats['vmMinUptime'] = min(vms_no_null_time, key=(lambda mydict: mydict['uptime']))['uptime']
            vm_stats['vmUptimeTotal'] = sum(mydict['uptime'] for mydict in all_vms_list)
            vm_stats['vmAvgUptime'] = vm_stats['vmUptimeTotal'] / vm_stats['vmCount']

            vm_stats['vmCountProtected'] = len(list(filter(lambda mydict: mydict['isProtected'] == "True", all_vms_list)))
            vm_stats['vmCountUnprotected'] = vm_stats['vmCount'] - vm_stats['vmCountProtected']
            vm_stats['vmCountEncrypted'] = len(list(filter(lambda mydict: mydict['isEncrypted'] == "True", all_vms_list)))
            vm_stats['vmCountPlain'] = vm_stats['vmCount'] - vm_stats['vmCountEncrypted']
            vm_stats['vmCountHLO'] = len(list(filter(lambda mydict: mydict['inHLO'] == "True", all_vms_list)))
            vm_stats['vmCountNotHLO'] = vm_stats['vmCount'] - vm_stats['vmCountHLO']


            vm_stats['vmCountVMware'] = len(list(filter(lambda mydict: mydict['hypervisorType'] == "vmware", all_vms_list)))
            vm_stats['vmCountHyperV'] = len(list(filter(lambda mydict: mydict['hypervisorType'] == "hyperv", all_vms_list)))


            vm_stats['nrDataCenters'] = len(set(map(lambda vm: vm['datacenterName'], all_vms_list)))
            vm_stats['nrHosts'] = len(set(map(lambda vm: vm['host'], all_vms_list)))

            vm_stats['time'] = all_vms_list[0]['time']

            if self.__verbose:
                MethodUtils.my_print([vm_stats])

        except (ZeroDivisionError, AttributeError, KeyError, ValueError) as error:
            ExceptionUtils.exception_info(error=error)
            raise ValueError("error when computing extra vm stats", vm_stats)

        LOGGER.info(">> store vmInventory information in Influx DB")
        self.__influx_client.insert_dicts_to_buffer("vmStats", [vm_stats])
Ejemplo n.º 13
0
class JobMethods:
    """Wrapper for all job related functionality. You may implement new methods in here.

    Methods:
        get_all_jobs - incrementally saves all stored jobsessions, even before first execution of sppmon.
        job_logs -> saves all jobLogs for the jobsessions in influx catalog.

    """

    # only here to maintain for later, unused yet
    __job_log_allow_list = [
        "CTGGA2340", "CTGGA0071", "CTGGA2260", "CTGGA2315", "CTGGA0550",
        "CTGGA2384"
    ]

    # to be moved somewhere else
    # ######### Add new logs to be parsed here #######################################
    # Structure:
    # Dict with messageID of log as name
    # value is a tuple of
    # #1 the tablename
    # #2 a lambda which maps each elem to a name. Must contain at least one argument!
    # #3 list of tuples: keys of additional informations to be saved: (#1: key, #2: rename). Part 2 optional, only if rename
    # the values are delived by the param_list of the joblog
    # if the value is something like 10sec or 10gb use `parse_unit` to parse it.
    __supported_ids: Dict[str, Tuple[str, Callable[[List[Any]], Dict[
        str, Any]], List[Union[Tuple[str, str], str]]]] = {
            'CTGGA2384': (
                'vmBackupSummary',
                lambda params: {
                    "name": params[0],
                    "proxy": params[1],
                    "vsnaps": params[2],
                    "type": params[3],
                    "transportType": params[4],
                    "transferredBytes": SppUtils.parse_unit(params[5]),
                    "throughputBytes/s": SppUtils.parse_unit(params[6]),
                    "queueTimeSec": SppUtils.parse_unit(params[7]),
                    "protectedVMDKs": params[8],
                    "TotalVMDKs": params[9],
                    "status": params[10]
                },
                ["messageId"]  # Additional Information from job-message itself
            ),
            'CTGGA0071': ('vmBackupSummary', lambda params: {
                'protectedVMDKs': params[0],
                'TotalVMDKs': int(params[1]) + int(params[0]),
                'transferredBytes': SppUtils.parse_unit(params[2]),
                'throughputBytes/s': SppUtils.parse_unit(params[3]),
                'queueTimeSec': SppUtils.parse_unit(params[4])
            }, ["messageId"]),
            'CTGGA0072': ('vmReplicateSummary', lambda params: {
                'total': params[0],
                'failed': params[1],
                'duration': SppUtils.parse_unit(params[2])
            }, []),
            'CTGGA0398':
            ('vmReplicateStats',
             lambda params: {
                 'replicatedBytes': SppUtils.parse_unit(params[0]),
                 'throughputBytes/sec': SppUtils.parse_unit(params[1]),
                 'duration': SppUtils.parse_unit(params[2], delimiter=':')
             }, []),
            'CTGGR0003': (
                'office365Stats',
                lambda params: {
                    'imported365Users': int(params[0]),
                },
                [  # Additional Information from job-message itself, including rename
                    "jobId",
                    "jobSessionId",
                    "jobName",
                    "jobExecutionTime"  # used to instantly integrate with other stats
                ]),
            'CTGGA2444': (
                'office365Stats',
                lambda params: {
                    'protectedItems': int(params[0]),
                    'selectedItems': int(params[0]),
                },
                [
                    "jobId",
                    "jobSessionId",
                    "jobName",
                    "jobExecutionTime"  # used to instantly integrate with other stats
                ]),
            'CTGGA2402': (
                'office365TransfBytes',
                lambda params:
                # If not matching, this will return a empty dict which is going to be ignored
                MethodUtils.joblogs_parse_params(
                    r"(\w+)\s*\(Server:\s*([^\s,]+), Transfer Size: (\d+(?:.\d*)?\s*\w*)\)",
                    params[1], lambda match_list: {
                        "itemName": params[0],
                        "itemType": match_list[1],
                        "serverName": match_list[2],
                        "transferredBytes": SppUtils.parse_unit(match_list[3]),
                    }),
                ["jobId", "jobSessionId", "jobName"]),
        }
    """LogLog messageID's which can be parsed by sppmon. Check detailed summary above the declaration."""
    def __init__(self, influx_client: Optional[InfluxClient],
                 api_queries: Optional[ApiQueries],
                 job_log_retention_time: str, job_log_types: List[str],
                 verbose: bool):

        if (not influx_client):
            raise ValueError(
                "Job Methods are not available, missing influx_client")
        if (not api_queries):
            raise ValueError(
                "Job Methods are not available, missing api_queries")

        self.__influx_client = influx_client
        self.__api_queries = api_queries
        self.__verbose = verbose

        self.__job_log_retention_time = job_log_retention_time
        """used to limit the time jobLogs are queried, only interestig for init call"""

        self.__job_log_types = job_log_types

    def get_all_jobs(self) -> None:
        """incrementally saves all stored jobsessions, even before first execution of sppmon"""

        job_list = MethodUtils.query_something(
            name="job list", source_func=self.__api_queries.get_job_list)

        for job in job_list:
            job_id = job.get("id", None)
            job_name = job.get("name", None)

            # this way to make sure we also catch empty strings
            if (not job_id or not job_name):
                ExceptionUtils.error_message(
                    f"skipping, missing name or id for job {job}")
                continue
            LOGGER.info(
                ">> capturing Job information for Job \"{}\"".format(job_name))

            try:
                self.__job_by_id(job_id=job_id)
            except ValueError as error:
                ExceptionUtils.exception_info(
                    error=error,
                    extra_message=
                    f"error when getting jobs for {job_name}, skipping it")
                continue

    def __job_by_id(self, job_id: str) -> None:
        """Requests and saves all jobsessions for a jobID"""
        if (not job_id):
            raise ValueError("need job_id to request jobs for that ID")

        keyword = Keyword.SELECT
        table = self.__influx_client.database['jobs']
        query = SelectionQuery(
            keyword=keyword,
            fields=['id', 'jobName'],
            tables=[table],
            where_str=
            f'jobId = \'{job_id}\' AND time > now() - {table.retention_policy.duration}'
            # unnecessary filter?
        )
        LOGGER.debug(query)
        result = self.__influx_client.send_selection_query(  # type: ignore
            query)
        id_list: List[int] = []
        row: Dict[str, Any] = {}  # make sure the var exists
        for row in result.get_points():  # type: ignore
            id_list.append(row['id'])  # type: ignore

        if (not row):
            LOGGER.info(
                f">>> no entries in Influx database found for job with id {job_id}"
            )

        # calculate time to be requested
        (rp_hours, rp_mins, rp_secs) = InfluxUtils.transform_time_literal(
            table.retention_policy.duration, single_vals=True)
        max_request_timestamp = datetime.datetime.now() - datetime.timedelta(
            hours=float(rp_hours),
            minutes=float(rp_mins),
            seconds=float(rp_secs))
        unixtime = int(time.mktime(max_request_timestamp.timetuple()))
        # make it ms instead of s
        unixtime *= 1000

        # retrieve all jobs in this category from REST API, filter to avoid drops due RP
        LOGGER.debug(f">>> requesting job sessions for id {job_id}")
        all_jobs = self.__api_queries.get_jobs_by_id(job_id=job_id)

        # filter all jobs where start time is not bigger then the retention time limit
        latest_jobs = list(
            filter(lambda job: job['start'] > unixtime, all_jobs))

        missing_jobs = list(
            filter(lambda job_api: int(job_api['id']) not in id_list,
                   latest_jobs))

        if (len(missing_jobs) > 0):
            LOGGER.info(
                f">>> {len(missing_jobs)} datasets missing in DB for jobId: {job_id}"
            )

            # Removes `statistics` from jobs
            self.__compute_extra_job_stats(missing_jobs, job_id)

            LOGGER.info(
                f">>> inserting job information of {len(missing_jobs)} jobs into jobs table"
            )
            self.__influx_client.insert_dicts_to_buffer(
                list_with_dicts=missing_jobs, table_name="jobs")
        else:
            LOGGER.info(
                f">>> no new jobs to insert into DB for job with ID {job_id}")

        # TODO: artifact from older versions, not replaced yet
        if self.__verbose:
            display_number_of_jobs = 5
            keyword = Keyword.SELECT
            table = self.__influx_client.database['jobs']
            where_str = 'jobId = \'{}\''.format(job_id)
            query = SelectionQuery(keyword=keyword,
                                   fields=['*'],
                                   tables=[table],
                                   where_str=where_str,
                                   order_direction='DESC',
                                   limit=display_number_of_jobs)
            result = self.__influx_client.send_selection_query(  # type: ignore
                query)  # type: ignore
            result_list: List[str] = list(result.get_points())  # type: ignore

            job_list_to_print: List[str] = []
            for row_str in result_list:
                job_list_to_print.append(row_str)
            print()
            print(
                "displaying last {} jobs for job with ID {} from database (as available)"
                .format(display_number_of_jobs, job_id))
            MethodUtils.my_print(data=job_list_to_print)

    def __compute_extra_job_stats(self, list_with_jobs: List[Dict[str, Any]],
                                  job_id: str) -> None:
        """Extracts additional `statistic` list from jobs and removes it from the original list.

        Computes an additional table out of the data.

        Args:
            list_with_jobs (List[Dict[str, Any]]): list with all jobs
        """

        LOGGER.info(
            f">>> computing additional job statistics for jobId: {job_id}")

        insert_list: List[Dict[str, Any]] = []
        # check for none instead of bool-check: Remove empty statistic lists [].
        for job in filter(lambda x: x.get("statistics", None) is not None,
                          list_with_jobs):
            job_statistics_list = job.pop('statistics')

            for job_stats in job_statistics_list:
                try:
                    insert_dict: Dict[str, Any] = {}

                    # fields
                    insert_dict['resourceType'] = job_stats.get(
                        'resourceType', None)
                    insert_dict['total'] = job_stats.get('total', 0)
                    insert_dict['success'] = job_stats.get('success', 0)
                    insert_dict['failed'] = job_stats.get('failed', 0)

                    skipped = job_stats.get('skipped', None)
                    if (skipped is None):
                        skipped = insert_dict["total"] - insert_dict[
                            "success"] - insert_dict["failed"]
                    insert_dict["skipped"] = skipped

                    # time key
                    insert_dict['start'] = job['start']

                    # regular tag values for grouping:
                    insert_dict['id'] = job.get('id', None)
                    insert_dict['jobId'] = job.get('jobId', None)
                    insert_dict['status'] = job.get('status', None)
                    insert_dict['indexStatus'] = job.get('indexStatus', None)
                    insert_dict['jobName'] = job.get('jobName', None)
                    insert_dict['type'] = job.get('type', None)
                    insert_dict['subPolicyType'] = job.get(
                        'subPolicyType', None)

                    insert_list.append(insert_dict)
                except KeyError as error:
                    ExceptionUtils.exception_info(
                        error=error,
                        extra_message=
                        f"failed to compute job-individual statistics due key error. report to developer. Job: {job} ; job_stats: {job_stats}"
                    )

        if (len(insert_list) > 0):
            self.__influx_client.insert_dicts_to_buffer(
                list_with_dicts=insert_list, table_name="jobs_statistics")
        else:
            LOGGER.info(
                f">>> no additional job statistics to insert into DB for jobId: {job_id}"
            )

    def __job_logs_to_stats(self, list_with_logs: List[Dict[str,
                                                            Any]]) -> None:
        """Parses joblogs into their own statisic table, using declared supported ID's

        To parse more jobLogs define additional entrys in the attribute `supported_ids`.

        Arguments:
            list_with_logs {List[Dict[str, Any]]} -- List with all saved joblogs
        """

        # only continue with joblogs we want to save
        supported_log_iterator = filter(
            lambda log: log['messageId'] in self.__supported_ids.keys(),
            list_with_logs)
        sorted_log_iterator = sorted(supported_log_iterator,
                                     key=lambda entry: entry['logTime'])
        max_sec_timestamp = 0  # required for preventing duplicates

        for job_log in sorted_log_iterator:
            message_id = job_log['messageId']

            table_func_triple = self.__supported_ids[message_id]

            (table_name, row_dict_func, additional_fields) = table_func_triple

            if (not table_name):
                table_name = message_id
                ExceptionUtils.error_message(
                    f"Warning: No tablename specified for message_id {message_id}. Please report to developer."
                )

            try:
                # Saving information from the message-params list within the job_log
                row_dict = row_dict_func(job_log['messageParams'])
                if (not row_dict):
                    # this was matched incorrectly, therefore skipped.
                    # No warning cause this will happen often.
                    continue
                # Saving additional fields from the job_log struct itself.
                if (additional_fields):
                    for value in additional_fields:
                        # with rename
                        if (isinstance(value, Tuple)):
                            row_dict[value[0]] = job_log[value[1]]
                        else:
                            # without rename
                            row_dict[value] = job_log[value]
            except (KeyError, IndexError) as error:
                ExceptionUtils.exception_info(
                    error,
                    extra_message=
                    f"MessageID params wrong defined. Skipping message_id {message_id} with content: {job_log}"
                )
                continue

            # Issue 9, In case where all tag values duplicate another record, including the timestamp, Influx will throw the insert
            # out as a duplicate.  In some cases, the changing of epoch timestamps from millisecond to second precision is
            # cause duplicate timestamps.  To avoid this for certain tables, add seconds to the timestamp as needed to
            # ensure uniqueness.  Only use this when some innacuracy of the timestamps is acceptable
            cur_timestamp = job_log['logTime']
            if (table_name == 'vmBackupSummary'):

                if (cur_timestamp is None):  # prevent None
                    ExceptionUtils.error_message(
                        f"Warning: logTime is None, duplicate may be purged. Log: {job_log}"
                    )

                if (isinstance(cur_timestamp, str)):  # make sure its int
                    cur_timestamp = int(cur_timestamp)

                cur_sec_timestamp = SppUtils.to_epoch_secs(cur_timestamp)
                if (cur_sec_timestamp <= max_sec_timestamp):
                    digits = (int)(cur_timestamp / cur_sec_timestamp)
                    max_sec_timestamp += 1  # increase by 1 second
                    cur_timestamp = max_sec_timestamp * digits
                else:
                    max_sec_timestamp = cur_sec_timestamp

            row_dict['time'] = cur_timestamp

            for (key, item) in row_dict.items():
                if (item in ('null', 'null(null)')):
                    row_dict[key] = None

            self.__influx_client.insert_dicts_to_buffer(table_name, [row_dict])

    def job_logs(self) -> None:
        """saves all jobLogs for the jobsessions in influx catalog.

        Make sure to call `get_all_jobs` before to aquire all jobsessions.
        In order to save them it deletes and rewrites all affected jobsession entrys.
        It automatically parses certain jobLogs into additional stats, defined by `supported_ids`.
        """

        # total count of requested logs
        logs_requested_total = 0
        # total count of inserted logs
        logs_to_stats_total = 0
        # should be equal, but on failure isnt (skipped logs)

        # list to be inserted after everything is updated
        job_update_list: List[Dict[str, Any]] = []

        LOGGER.info("> Requesting jobs with missing logs from influx database")

        table = self.__influx_client.database['jobs']
        # only store if there is something to store -> limited by job log rentation time.
        where_str = 'jobsLogsStored <> \'True\' and time > now() - %s' % self.__job_log_retention_time
        where_str += f' AND time > now() - {table.retention_policy.duration}'

        # Select all jobs without joblogs
        keyword = Keyword.SELECT
        query = SelectionQuery(keyword=keyword,
                               tables=[table],
                               fields=['*'],
                               where_str=where_str)

        # send query and compute
        missing_logs_jobs_rs = self.__influx_client.send_selection_query(  # type: ignore
            query)

        # this list contains all jobs which are missing its Logs
        # Cast from resultset into list
        missing_logs_jobs: List[Dict[str, Any]] = list(
            missing_logs_jobs_rs.get_points())  # type: ignore

        LOGGER.info(
            f">>> Number of jobs with no joblogs stored in Influx database: {len(missing_logs_jobs)}"
        )

        LOGGER.info("> Requesting missing jobLogs from REST-API.")
        # request all jobLogs from REST-API
        # counter only for displaying purposes
        for counter, row in enumerate(missing_logs_jobs, 0):

            # Only print every 5 rows if not verbose
            # starts at 0, therefore already updated
            if (self.__verbose or counter % 5 == 0):
                LOGGER.info(
                    f">>> computed joblogs for {counter} / {len(missing_logs_jobs)} job sessions."
                )

            job_session_id: Optional[int] = row.get('id', None)

            # if somehow jobLogid is missing: skip
            # Should usually not happen
            if (job_session_id is None):
                ExceptionUtils.error_message(
                    f"Error: jobSessionId missing for row {row}")
                continue

            if (self.__verbose):
                LOGGER.info(
                    f">>> Requesting jobLogs {self.__job_log_types} for session {job_session_id}."
                )
            LOGGER.debug(
                f">>> Requesting jobLogs {self.__job_log_types} for session {job_session_id}."
            )

            try:
                # cant use `query_something` like in other places due the extra params:
                # api_queries - query_something only works with no params

                # This list contains all joblogs for a single job-execution
                current_job_logs = self.__api_queries.get_job_log_details(
                    jobsession_id=job_session_id,
                    job_logs_types=self.__job_log_types,
                    request_ids=list(self.__supported_ids.keys()))
            except ValueError as error:
                ExceptionUtils.exception_info(
                    error=error,
                    extra_message=
                    f"Error when api-requesting joblogs for job_session_id {job_session_id}, skipping it"
                )
                continue

            job_log_count = len(current_job_logs)
            logs_requested_total += job_log_count

            if (self.__verbose):
                LOGGER.info(
                    f">>> Found {job_log_count} logs for jobsessionId {job_session_id}"
                )
            LOGGER.debug(
                f"Found {job_log_count} logs for jobsessionId {job_session_id}"
            )

            # ####################################################################################
            # Compute results and save logs
            # #####################################################################################
            # The request of REST-API Logs is finished here
            # To not crash by saving 100.000+ Logs, directly compute results and insert them
            # ######################################################################################

            for job_log in current_job_logs:
                # add additional information from job-session itself
                job_log["jobId"] = row.get("jobId", None)
                job_log["jobName"] = row.get("jobName", None)
                job_log["jobExecutionTime"] = row.get("start", None)

                # rename for clarity
                job_log["jobLogId"] = job_log.pop("id", None)
                job_log["jobSessionId"] = job_log.pop("jobsessionId", None)

            # ##########################################################
            # compute jobLog-Stats into each associated table
            # ##########################################################
            try:
                self.__job_logs_to_stats(current_job_logs)
            except ValueError as error:
                ExceptionUtils.exception_info(
                    error,
                    extra_message=
                    f"Failed parse jobLogs into its own table, skipping for jobsessionId {job_session_id}"
                )

            logs_to_stats_total += job_log_count

            # ##########################################################
            # save logs within the joblog-dump
            # ##########################################################

            # Only dump them after computing stats since they are read within the computing stats part
            for job_log in current_job_logs:
                # dump message params to allow saving as string
                job_log["messageParams"] = json.dumps(job_log["messageParams"])

            # if list is empty due beeing erased etc it will simply return and do nothing
            self.__influx_client.insert_dicts_to_buffer(
                list_with_dicts=current_job_logs, table_name="jobLogs")

            # shallow copy dict to allow a update without errors
            copied_jobsession = dict(row.items())

            # update job table and set jobsLogsStored = True, jobLogsCount = len(jobLogDetails)
            update_fields = {
                "jobLogsCount": job_log_count,
                "jobsLogsStored": True
            }
            # update the fields
            for (key, value) in update_fields.items():
                copied_jobsession[key] = value
            job_update_list.append(copied_jobsession)

            # ##########################################################
            # End of For-Each
            # ##########################################################

        # ##########################################################
        # Delete each job, then re-insert
        # ##########################################################

        # Delete all jobs which got requested, no matter if failed
        delete_query = SelectionQuery(keyword=Keyword.DELETE,
                                      tables=[table],
                                      where_str=where_str)

        # now send remove query to prevent data loss
        self.__influx_client.send_selection_query(delete_query)  # type: ignore

        # Insert data after everything is completed
        self.__influx_client.insert_dicts_to_buffer(table.name,
                                                    job_update_list)

        if (logs_requested_total != logs_to_stats_total):
            LOGGER.info(
                f"> Requested a total of {logs_requested_total} but only computed {logs_to_stats_total} into sppmon statistics"
            )
        else:
            LOGGER.info(
                f">>> requested and computed a total of {logs_requested_total} logs"
            )

        LOGGER.info(f">> Updated a total of {len(job_update_list)} jobs")
Ejemplo n.º 14
0
    def __job_by_id(self, job_id: str) -> None:
        """Requests and saves all jobsessions for a jobID"""
        if (not job_id):
            raise ValueError("need job_id to request jobs for that ID")

        keyword = Keyword.SELECT
        table = self.__influx_client.database['jobs']
        query = SelectionQuery(
            keyword=keyword,
            fields=['id', 'jobName'],
            tables=[table],
            where_str=
            f'jobId = \'{job_id}\' AND time > now() - {table.retention_policy.duration}'
            # unnecessary filter?
        )
        LOGGER.debug(query)
        result = self.__influx_client.send_selection_query(  # type: ignore
            query)
        id_list: List[int] = []
        row: Dict[str, Any] = {}  # make sure the var exists
        for row in result.get_points():  # type: ignore
            id_list.append(row['id'])  # type: ignore

        if (not row):
            LOGGER.info(
                f">>> no entries in Influx database found for job with id {job_id}"
            )

        # calculate time to be requested
        (rp_hours, rp_mins, rp_secs) = InfluxUtils.transform_time_literal(
            table.retention_policy.duration, single_vals=True)
        max_request_timestamp = datetime.datetime.now() - datetime.timedelta(
            hours=float(rp_hours),
            minutes=float(rp_mins),
            seconds=float(rp_secs))
        unixtime = int(time.mktime(max_request_timestamp.timetuple()))
        # make it ms instead of s
        unixtime *= 1000

        # retrieve all jobs in this category from REST API, filter to avoid drops due RP
        LOGGER.debug(f">>> requesting job sessions for id {job_id}")
        all_jobs = self.__api_queries.get_jobs_by_id(job_id=job_id)

        # filter all jobs where start time is not bigger then the retention time limit
        latest_jobs = list(
            filter(lambda job: job['start'] > unixtime, all_jobs))

        missing_jobs = list(
            filter(lambda job_api: int(job_api['id']) not in id_list,
                   latest_jobs))

        if (len(missing_jobs) > 0):
            LOGGER.info(
                f">>> {len(missing_jobs)} datasets missing in DB for jobId: {job_id}"
            )

            # Removes `statistics` from jobs
            self.__compute_extra_job_stats(missing_jobs, job_id)

            LOGGER.info(
                f">>> inserting job information of {len(missing_jobs)} jobs into jobs table"
            )
            self.__influx_client.insert_dicts_to_buffer(
                list_with_dicts=missing_jobs, table_name="jobs")
        else:
            LOGGER.info(
                f">>> no new jobs to insert into DB for job with ID {job_id}")

        # TODO: artifact from older versions, not replaced yet
        if self.__verbose:
            display_number_of_jobs = 5
            keyword = Keyword.SELECT
            table = self.__influx_client.database['jobs']
            where_str = 'jobId = \'{}\''.format(job_id)
            query = SelectionQuery(keyword=keyword,
                                   fields=['*'],
                                   tables=[table],
                                   where_str=where_str,
                                   order_direction='DESC',
                                   limit=display_number_of_jobs)
            result = self.__influx_client.send_selection_query(  # type: ignore
                query)  # type: ignore
            result_list: List[str] = list(result.get_points())  # type: ignore

            job_list_to_print: List[str] = []
            for row_str in result_list:
                job_list_to_print.append(row_str)
            print()
            print(
                "displaying last {} jobs for job with ID {} from database (as available)"
                .format(display_number_of_jobs, job_id))
            MethodUtils.my_print(data=job_list_to_print)
Ejemplo n.º 15
0
    def test_connection(influx_client: InfluxClient, rest_client: RestClient,
                        config_file: Dict[str, Any]):
        if (not config_file):
            raise ValueError("SPPmon does not work without a config file")

        LOGGER.info("Testing all connections required for SPPMon to work")
        working: bool = True  # SPPMon itself will finish sucessfull (no critical errors)
        no_warnings: bool = True  # SPPMon will finish without any warnings (no errors at all)

        # ## InfluxDB ##

        LOGGER.info("> Testing and configuring InfluxDB")
        try:
            influx_client.connect()
            influx_client.disconnect()
            if (not influx_client.use_ssl):
                ExceptionUtils.error_message(
                    "> WARNING: Mandatory SSL is disabled. We hightly recommend to enable it!"
                )
                no_warnings = False

            LOGGER.info("InfluxDB is ready for use")
        except ValueError as error:
            ExceptionUtils.exception_info(
                error,
                extra_message=
                "> Testing of the InfluxDB failed. This is a crictial component of SPPMon."
            )
            working = False

        # ## REST-API ##

        LOGGER.info("> Testing REST-API of SPP.")
        try:
            rest_client.login()
            (version_nr, build_nr) = rest_client.get_spp_version_build()
            LOGGER.info(
                f">> Sucessfully connected to SPP V{version_nr}, build {build_nr}."
            )
            rest_client.logout()
            LOGGER.info("> REST-API is ready for use")
        except ValueError as error:
            ExceptionUtils.exception_info(
                error,
                extra_message=
                "> Testing of the REST-API failed. This is a crictial component of SPPMon."
            )
            working = False

        # ## SSH-CLIENTS ##

        LOGGER.info(
            "> Testing all types of SSH-Clients: Server, VAPDs, vSnaps, Cloudproxy and others"
        )
        ssh_working = True  # The arg --ssh will finish without any error at all

        # Count of clients checks
        ssh_clients: List[SshClient] = SshMethods.setup_ssh_clients(
            config_file)
        if (not ssh_clients):
            ExceptionUtils.error_message(
                ">> No SSH-clients detected at all. At least the server itself should be added for process-statistics."
            )
            ssh_working = False
        else:
            for type in SshTypes:
                if (not list(
                        filter(lambda client: client.client_type == type,
                               ssh_clients))):
                    LOGGER.info(f">> No {type.name} client detected.")

                    if (type == SshTypes.SERVER):
                        ExceptionUtils.error_message(
                            ">> Critical: Without Server as ssh client you wont have any process statistics available. These are a key part of SPPMon."
                        )
                        ssh_working = False  # No error, but still critical

                    if (type == SshTypes.VSNAP):
                        LOGGER.info(
                            ">> WARNING: Without vSnap as ssh client you have no access to storage information. You may add vSnap's for additional monitoring and alerts."
                        )
                        no_warnings = False  # ssh will still work, but thats definitly a warning

            ssh_methods: SshMethods = SshMethods(influx_client, config_file,
                                                 False)
            # Connection check
            LOGGER.info(
                f">> Testing now connection and commands of {len(ssh_clients)} registered ssh-clients."
            )
            for client in ssh_clients:
                try:
                    client.connect()
                    client.disconnect()

                    error_count: int = len(ExceptionUtils.stored_errors)
                    MethodUtils.ssh_execute_commands(
                        ssh_clients=[client],
                        ssh_type=client.client_type,
                        command_list=ssh_methods.client_commands[
                            client.client_type] + ssh_methods.all_command_list)
                    if (len(ExceptionUtils.stored_errors) != error_count):
                        ssh_working = False
                        ExceptionUtils.error_message(
                            f"Not all commands available for client {client.host_name} with type: {client.client_type}.\n"
                            +
                            "Please check manually if the commands are installed and their output."
                        )

                except ValueError as error:
                    ExceptionUtils.exception_info(
                        error,
                        extra_message=
                        f"Connection failed for client {client.host_name} with type: {client.client_type}."
                    )
                    ssh_working = False

        if (ssh_working):
            LOGGER.info("> Testing of SSH-clients sucessfull.")
        else:
            LOGGER.info(
                "> Testing of SSH-clients failed! SPPMon will still work, not all informations are available."
            )
            no_warnings = False

        # #### Conclusion ####

        if (working and no_warnings):
            LOGGER.info(
                "> All components tested sucessfully. SPPMon is ready to be used!"
            )
        elif (working):
            LOGGER.info(
                "> Testing partially sucessful. SPPMon will run, but please check the warnings."
            )
        else:
            LOGGER.info(
                "> Testing failed. SPPMon is not ready to be used. Please fix the connection issues."
            )