Python SelectionQuery Examples, influx.influx_queries.SelectionQuery Python Examples

Example #1

0

Show file

File: influx_client.py Project: alex-martin-storage/spectrum-protect-sppmon

    def send_selection_query(
            self, query: SelectionQuery) -> ResultSet:  # type: ignore
        """Sends a single `SELECT` or `DELETE` query to influx server.

        Arguments:
            query {Selection_Query} -- Query which should be executed

        Raises:
            ValueError: no SelectionQuery is given.

        Returns:
            ResultSet -- Result of the Query, Empty if `DELETE`
        """
        if (not query or not isinstance(query, SelectionQuery)):
            raise ValueError("a selection query must be given")

        # check if any buffered table is selected, flushes buffer
        for table in query.tables:
            if (table in self.__insert_buffer):
                self.flush_insert_buffer()
                break

        # Convert querys to strings
        query_str = query.to_query()

        start_time = time.perf_counter()
        # Send querys
        try:
            result = self.__client.query(  # type: ignore
                query=query_str,
                epoch='s',
                database=self.database.name)

        except (InfluxDBServerError,
                InfluxDBClientError) as err:  # type: ignore
            ExceptionUtils.exception_info(
                error=err, extra_message="error when sending select statement"
            )  # type: ignore
            # result to maintain structure
            # raise errors = false since we did catch a error
            result: ResultSet = ResultSet({},
                                          raise_errors=False)  # type: ignore

        end_time = time.perf_counter()

        # if nothing is returned add count = 0 and table
        # also possible by `list(result.get_points())`, but that is lot of compute action
        if (result):
            length = len(result.raw['series'][0]['values'])  # type: ignore
        else:
            length = 0

        tables_count: Dict[Table, int] = {}
        for table in query.tables:
            tables_count[table] = int(length / len(query.tables))

        self.__insert_metrics_to_buffer(query.keyword, tables_count,
                                        end_time - start_time)

        return result  # type: ignore

Example #2

0

Show file

File: definitions.py Project: falkobanaszak/spectrum-protect-sppmon

    def _CQ_TMPL(
            cls,
            fields: List[str],
            new_retention_policy: RetentionPolicy,
            group_time: str,
            group_args: List[str] = None,
            where_str: str = None) -> Callable[[Table, str], ContinuousQuery]:
        """Creates a CQ to do whatever you want with it.

        Args:
            fields (List[str]): Fields to be selected and aggregated, influx-keywords need to be escaped.
            new_retention_policy (RetentionPolicy): new retention policy to be inserted into
            group_time (str): time-literal on which the data should be grouped
            group_args (List[str], optional): Optional other grouping clause. Defaults to ["*"].
            where_str (str): a where clause in case you want to define it. Defaults to None.

        Returns:
            Callable[[Table, str], ContinuousQuery]: Lambda which is transformed into a CQ later on.
        """
        if (not group_args):
            group_args = ["*"]
        return lambda table, name: ContinuousQuery(
            name=name,
            database=cls.__database,
            select_query=SelectionQuery(
                Keyword.SELECT,
                tables=[table],
                into_table=Table(cls.__database,
                                 table.name,
                                 retention_policy=new_retention_policy),
                fields=fields,
                where_str=where_str,
                group_list=[f"time({group_time})"] + group_args),
            for_interval="7d")

Example #3

0

Show file

File: definitions.py Project: falkobanaszak/spectrum-protect-sppmon

    def _CQ_DWSMPL(
        cls,
        fields: List[str],
        new_retention_policy: RetentionPolicy,
        group_time: str,
        group_args: List[str] = None
    ) -> Callable[[Table, str], ContinuousQuery]:
        """Creates a template CQ which groups by time, * . Always uses the base table it was created from.

        The callable shall aways have this format, the missing fields are filled by the `__add_table_def`-method.
        The need of this is that no table-instance is available, since CQ are defined together with the table.

        Args:
            fields (List[str]): Fields to be selected and aggregated, influx-keywords need to be escaped.
            new_retention_policy (RetentionPolicy): new retention policy to be inserted into
            group_time (str): time-literal on which the data should be grouped
            group_args (List[str], optional): Optional other grouping clause. Defaults to ["*"].

        Returns:
            Callable[[Table, str], ContinuousQuery]: Lambda which is transformed into a CQ later on.
        """
        if (not group_args):
            group_args = ["*"]
        return lambda table, name: ContinuousQuery(
            name=name,
            database=cls.__database,
            select_query=SelectionQuery(
                Keyword.SELECT,
                tables=[table],
                into_table=Table(cls.__database,
                                 table.name,
                                 retention_policy=new_retention_policy),
                fields=fields,
                group_list=[f"time({group_time})"] + group_args),
            for_interval="7d")

Example #4

0

Show file

File: system.py Project: IBM/spectrum-protect-sppmon

    def site_name_by_id(self, site_id: Union[int, str]) -> Optional[str]:
        """Returns a site_name by a associated site_id.

        Uses a already buffered result if possible, otherwise queries the influxdb for the name.

        Arguments:
            site_id {Union[int, str]} -- id of the site

        Returns:
            Optional[str] -- name of the site, None if not found.
        """
        if (site_id is None):
            ExceptionUtils.error_message("siteId is none, returning None")
            return None
        # if string, parse to int
        if (isinstance(site_id, str)):
            site_id = site_id.strip(" ")
            if (re.match(r"\d+", site_id)):
                site_id = int(site_id)
            else:
                ExceptionUtils.error_message(
                    "siteId is of unsupported string format")
                return None
        # if still not int, error
        if (not isinstance(site_id, int)):
            ExceptionUtils.error_message("site id is of unsupported type")
            return None

        # return if already saved -> previous call or `sites`-call
        result = self.__site_name_dict.get(site_id, None)
        if (result is not None):  # empty str allowed
            return result

        table_name = 'sites'
        table = self.__influx_client.database[table_name]
        query = SelectionQuery(
            keyword=Keyword.SELECT,
            tables=[table],
            # description, throttleRates cause we need a field to query
            fields=["siteId", "siteName", "description", "throttleRates"],
            where_str=f"siteId = \'{site_id}\'",
            order_direction="DESC",
            limit=1)
        result_set = self.__influx_client.send_selection_query(
            query)  # type: ignore
        result_dict: Dict[str, Any] = next(result_set.get_points(),
                                           None)  # type: ignore
        if (not result_dict):
            ExceptionUtils.error_message(
                f"no site with the id {site_id} exists")
            return None

        # save result and return it
        result = result_dict['siteName']
        self.__site_name_dict[site_id] = result
        return result

Example #5

0

Show file

File: protection.py Project: alex-martin-storage/spectrum-protect-sppmon

    def create_inventory_summary(self) -> None:
        """Retrieves and calculate VM inventory summary by influx catalog data."""

        LOGGER.info(
            "> computing inventory information (not from catalog, means not only backup data is calculated)")

        # ########## Part 1: Check if something need to be computed #############
        # query the timestamp of the last vm, commited as a field is always needed by influx rules.
        vms_table = self.__influx_client.database["vms"]

        time_query = SelectionQuery(
            keyword=Keyword.SELECT,
            tables=[vms_table],
            fields=['time', 'commited'],
            limit=1,
            order_direction="DESC"
        )
        result = self.__influx_client.send_selection_query(time_query) # type: ignore
        last_vm: Dict[str, Any] = next(result.get_points(), None) # type: ignore

        if(not last_vm):
            raise ValueError("no VM's stored, either none are available or you have to store vm's first")

        # query the last vm stats to compare timestamps with last vm
        last_time_ms: int = last_vm["time"]
        last_time = SppUtils.epoch_time_to_seconds(last_time_ms)
        where_str = "time = {}s".format(last_time)

        vm_stats_table = self.__influx_client.database["vmStats"]

        vm_stats_query = SelectionQuery(
            keyword=Keyword.SELECT,
            tables=[vm_stats_table],
            fields=['*'],
            where_str=where_str,
            limit=1
        )
        result = self.__influx_client.send_selection_query(vm_stats_query) # type: ignore
        if(len(list(result.get_points())) > 0): # type: ignore
            LOGGER.info(">> vm statistics already computed, skipping")
            return

        # ####################### Part 2: Compute new Data ####################
        fields = [
            'uptime',
            'powerState',
            'commited',
            'uncommited',
            'memory',
            'host',
            'vmVersion',
            'isProtected',
            'inHLO',
            'isEncrypted',
            'datacenterName',
            'hypervisorType',
        ]
        query = SelectionQuery(
            keyword=Keyword.SELECT,
            tables=[vms_table],
            fields=fields,
            where_str=where_str
        )
        result = self.__influx_client.send_selection_query(query) # type: ignore

        all_vms_list: List[Dict[str, Union[str, int, float, bool]]] = list(result.get_points()) # type: ignore

        # skip if no new data can be computed
        if(not all_vms_list):
            raise ValueError("no VM's stored, either none are available or store vms first")

        vm_stats: Dict[str, Any] = {}
        try:
            vm_stats['vmCount'] = len(all_vms_list)

            # returns largest/smallest
            vm_stats['vmMaxSize'] = max(all_vms_list, key=(lambda mydict: mydict['commited']))['commited']
            #  on purpose zero size vm's are ignored
            vms_no_null_size = list(filter(lambda mydict: mydict['commited'] > 0, all_vms_list))
            if(vms_no_null_size):
                vm_stats['vmMinSize'] = min(vms_no_null_size, key=(lambda mydict: mydict['commited']))['commited']
            vm_stats['vmSizeTotal'] = sum(mydict['commited'] for mydict in all_vms_list)
            vm_stats['vmAvgSize'] = vm_stats['vmSizeTotal'] / vm_stats['vmCount']

             # returns largest/smallest
            vm_stats['vmMaxUptime'] = max(all_vms_list, key=(lambda mydict: mydict['uptime']))['uptime']
            #  on purpose zero size vm's are ignored
            vms_no_null_time = list(filter(lambda mydict: mydict['uptime'] > 0, all_vms_list))
            if(vms_no_null_time):
                vm_stats['vmMinUptime'] = min(vms_no_null_time, key=(lambda mydict: mydict['uptime']))['uptime']
            vm_stats['vmUptimeTotal'] = sum(mydict['uptime'] for mydict in all_vms_list)
            vm_stats['vmAvgUptime'] = vm_stats['vmUptimeTotal'] / vm_stats['vmCount']

            vm_stats['vmCountProtected'] = len(list(filter(lambda mydict: mydict['isProtected'] == "True", all_vms_list)))
            vm_stats['vmCountUnprotected'] = vm_stats['vmCount'] - vm_stats['vmCountProtected']
            vm_stats['vmCountEncrypted'] = len(list(filter(lambda mydict: mydict['isEncrypted'] == "True", all_vms_list)))
            vm_stats['vmCountPlain'] = vm_stats['vmCount'] - vm_stats['vmCountEncrypted']
            vm_stats['vmCountHLO'] = len(list(filter(lambda mydict: mydict['inHLO'] == "True", all_vms_list)))
            vm_stats['vmCountNotHLO'] = vm_stats['vmCount'] - vm_stats['vmCountHLO']


            vm_stats['vmCountVMware'] = len(list(filter(lambda mydict: mydict['hypervisorType'] == "vmware", all_vms_list)))
            vm_stats['vmCountHyperV'] = len(list(filter(lambda mydict: mydict['hypervisorType'] == "hyperv", all_vms_list)))


            vm_stats['nrDataCenters'] = len(set(map(lambda vm: vm['datacenterName'], all_vms_list)))
            vm_stats['nrHosts'] = len(set(map(lambda vm: vm['host'], all_vms_list)))

            vm_stats['time'] = all_vms_list[0]['time']

            if self.__verbose:
                MethodUtils.my_print([vm_stats])

        except (ZeroDivisionError, AttributeError, KeyError, ValueError) as error:
            ExceptionUtils.exception_info(error=error)
            raise ValueError("error when computing extra vm stats", vm_stats)

        LOGGER.info(">> store vmInventory information in Influx DB")
        self.__influx_client.insert_dicts_to_buffer("vmStats", [vm_stats])

Example #6

0

Show file

    def job_logs(self) -> None:
        """saves all jobLogs for the jobsessions in influx catalog.

        Make sure to call `get_all_jobs` before to aquire all jobsessions.
        In order to save them it deletes and rewrites all affected jobsession entrys.
        It automatically parses certain jobLogs into additional stats, defined by `supported_ids`.
        """

        # total count of requested logs
        logs_requested_total = 0
        # total count of inserted logs
        logs_to_stats_total = 0
        # should be equal, but on failure isnt (skipped logs)

        # list to be inserted after everything is updated
        job_update_list: List[Dict[str, Any]] = []

        LOGGER.info("> Requesting jobs with missing logs from influx database")

        table = self.__influx_client.database['jobs']
        # only store if there is something to store -> limited by job log rentation time.
        where_str = 'jobsLogsStored <> \'True\' and time > now() - %s' % self.__job_log_retention_time
        where_str += f' AND time > now() - {table.retention_policy.duration}'

        # Select all jobs without joblogs
        keyword = Keyword.SELECT
        query = SelectionQuery(keyword=keyword,
                               tables=[table],
                               fields=['*'],
                               where_str=where_str)

        # send query and compute
        missing_logs_jobs_rs = self.__influx_client.send_selection_query(  # type: ignore
            query)

        # this list contains all jobs which are missing its Logs
        # Cast from resultset into list
        missing_logs_jobs: List[Dict[str, Any]] = list(
            missing_logs_jobs_rs.get_points())  # type: ignore

        LOGGER.info(
            f">>> Number of jobs with no joblogs stored in Influx database: {len(missing_logs_jobs)}"
        )

        LOGGER.info("> Requesting missing jobLogs from REST-API.")
        # request all jobLogs from REST-API
        # counter only for displaying purposes
        for counter, row in enumerate(missing_logs_jobs, 0):

            # Only print every 5 rows if not verbose
            # starts at 0, therefore already updated
            if (self.__verbose or counter % 5 == 0):
                LOGGER.info(
                    f">>> computed joblogs for {counter} / {len(missing_logs_jobs)} job sessions."
                )

            job_session_id: Optional[int] = row.get('id', None)

            # if somehow jobLogid is missing: skip
            # Should usually not happen
            if (job_session_id is None):
                ExceptionUtils.error_message(
                    f"Error: jobSessionId missing for row {row}")
                continue

            if (self.__verbose):
                LOGGER.info(
                    f">>> Requesting jobLogs {self.__job_log_types} for session {job_session_id}."
                )
            LOGGER.debug(
                f">>> Requesting jobLogs {self.__job_log_types} for session {job_session_id}."
            )

            try:
                # cant use `query_something` like in other places due the extra params:
                # api_queries - query_something only works with no params

                # This list contains all joblogs for a single job-execution
                current_job_logs = self.__api_queries.get_job_log_details(
                    jobsession_id=job_session_id,
                    job_logs_types=self.__job_log_types,
                    request_ids=list(self.__supported_ids.keys()))
            except ValueError as error:
                ExceptionUtils.exception_info(
                    error=error,
                    extra_message=
                    f"Error when api-requesting joblogs for job_session_id {job_session_id}, skipping it"
                )
                continue

            job_log_count = len(current_job_logs)
            logs_requested_total += job_log_count

            if (self.__verbose):
                LOGGER.info(
                    f">>> Found {job_log_count} logs for jobsessionId {job_session_id}"
                )
            LOGGER.debug(
                f"Found {job_log_count} logs for jobsessionId {job_session_id}"
            )

            # ####################################################################################
            # Compute results and save logs
            # #####################################################################################
            # The request of REST-API Logs is finished here
            # To not crash by saving 100.000+ Logs, directly compute results and insert them
            # ######################################################################################

            for job_log in current_job_logs:
                # add additional information from job-session itself
                job_log["jobId"] = row.get("jobId", None)
                job_log["jobName"] = row.get("jobName", None)
                job_log["jobExecutionTime"] = row.get("start", None)

                # rename for clarity
                job_log["jobLogId"] = job_log.pop("id", None)
                job_log["jobSessionId"] = job_log.pop("jobsessionId", None)

            # ##########################################################
            # compute jobLog-Stats into each associated table
            # ##########################################################
            try:
                self.__job_logs_to_stats(current_job_logs)
            except ValueError as error:
                ExceptionUtils.exception_info(
                    error,
                    extra_message=
                    f"Failed parse jobLogs into its own table, skipping for jobsessionId {job_session_id}"
                )

            logs_to_stats_total += job_log_count

            # ##########################################################
            # save logs within the joblog-dump
            # ##########################################################

            # Only dump them after computing stats since they are read within the computing stats part
            for job_log in current_job_logs:
                # dump message params to allow saving as string
                job_log["messageParams"] = json.dumps(job_log["messageParams"])

            # if list is empty due beeing erased etc it will simply return and do nothing
            self.__influx_client.insert_dicts_to_buffer(
                list_with_dicts=current_job_logs, table_name="jobLogs")

            # shallow copy dict to allow a update without errors
            copied_jobsession = dict(row.items())

            # update job table and set jobsLogsStored = True, jobLogsCount = len(jobLogDetails)
            update_fields = {
                "jobLogsCount": job_log_count,
                "jobsLogsStored": True
            }
            # update the fields
            for (key, value) in update_fields.items():
                copied_jobsession[key] = value
            job_update_list.append(copied_jobsession)

            # ##########################################################
            # End of For-Each
            # ##########################################################

        # ##########################################################
        # Delete each job, then re-insert
        # ##########################################################

        # Delete all jobs which got requested, no matter if failed
        delete_query = SelectionQuery(keyword=Keyword.DELETE,
                                      tables=[table],
                                      where_str=where_str)

        # now send remove query to prevent data loss
        self.__influx_client.send_selection_query(delete_query)  # type: ignore

        # Insert data after everything is completed
        self.__influx_client.insert_dicts_to_buffer(table.name,
                                                    job_update_list)

        if (logs_requested_total != logs_to_stats_total):
            LOGGER.info(
                f"> Requested a total of {logs_requested_total} but only computed {logs_to_stats_total} into sppmon statistics"
            )
        else:
            LOGGER.info(
                f">>> requested and computed a total of {logs_requested_total} logs"
            )

        LOGGER.info(f">> Updated a total of {len(job_update_list)} jobs")

Example #7

0

Show file

    def __job_by_id(self, job_id: str) -> None:
        """Requests and saves all jobsessions for a jobID"""
        if (not job_id):
            raise ValueError("need job_id to request jobs for that ID")

        keyword = Keyword.SELECT
        table = self.__influx_client.database['jobs']
        query = SelectionQuery(
            keyword=keyword,
            fields=['id', 'jobName'],
            tables=[table],
            where_str=
            f'jobId = \'{job_id}\' AND time > now() - {table.retention_policy.duration}'
            # unnecessary filter?
        )
        LOGGER.debug(query)
        result = self.__influx_client.send_selection_query(  # type: ignore
            query)
        id_list: List[int] = []
        row: Dict[str, Any] = {}  # make sure the var exists
        for row in result.get_points():  # type: ignore
            id_list.append(row['id'])  # type: ignore

        if (not row):
            LOGGER.info(
                f">>> no entries in Influx database found for job with id {job_id}"
            )

        # calculate time to be requested
        (rp_hours, rp_mins, rp_secs) = InfluxUtils.transform_time_literal(
            table.retention_policy.duration, single_vals=True)
        max_request_timestamp = datetime.datetime.now() - datetime.timedelta(
            hours=float(rp_hours),
            minutes=float(rp_mins),
            seconds=float(rp_secs))
        unixtime = int(time.mktime(max_request_timestamp.timetuple()))
        # make it ms instead of s
        unixtime *= 1000

        # retrieve all jobs in this category from REST API, filter to avoid drops due RP
        LOGGER.debug(f">>> requesting job sessions for id {job_id}")
        all_jobs = self.__api_queries.get_jobs_by_id(job_id=job_id)

        # filter all jobs where start time is not bigger then the retention time limit
        latest_jobs = list(
            filter(lambda job: job['start'] > unixtime, all_jobs))

        missing_jobs = list(
            filter(lambda job_api: int(job_api['id']) not in id_list,
                   latest_jobs))

        if (len(missing_jobs) > 0):
            LOGGER.info(
                f">>> {len(missing_jobs)} datasets missing in DB for jobId: {job_id}"
            )

            # Removes `statistics` from jobs
            self.__compute_extra_job_stats(missing_jobs, job_id)

            LOGGER.info(
                f">>> inserting job information of {len(missing_jobs)} jobs into jobs table"
            )
            self.__influx_client.insert_dicts_to_buffer(
                list_with_dicts=missing_jobs, table_name="jobs")
        else:
            LOGGER.info(
                f">>> no new jobs to insert into DB for job with ID {job_id}")

        # TODO: artifact from older versions, not replaced yet
        if self.__verbose:
            display_number_of_jobs = 5
            keyword = Keyword.SELECT
            table = self.__influx_client.database['jobs']
            where_str = 'jobId = \'{}\''.format(job_id)
            query = SelectionQuery(keyword=keyword,
                                   fields=['*'],
                                   tables=[table],
                                   where_str=where_str,
                                   order_direction='DESC',
                                   limit=display_number_of_jobs)
            result = self.__influx_client.send_selection_query(  # type: ignore
                query)  # type: ignore
            result_list: List[str] = list(result.get_points())  # type: ignore

            job_list_to_print: List[str] = []
            for row_str in result_list:
                job_list_to_print.append(row_str)
            print()
            print(
                "displaying last {} jobs for job with ID {} from database (as available)"
                .format(display_number_of_jobs, job_id))
            MethodUtils.my_print(data=job_list_to_print)

Example #8

0

Show file

File: jobs.py Project: johnnygard05/spectrum-protect-sppmon

    def job_logs(self) -> None:
        """saves all jobLogs for the jobsessions in influx catalog.

        Make sure to call `get_all_jobs` before to aquire all jobsessions.
        In order to save them it deletes and rewrites all affected jobsession entrys.
        It automatically parses certain jobLogs into additional stats, defined by `supported_ids`.
        """

        table = self.__influx_client.database['jobs']
        # only store if there is something to store -> limited by job log rentation time.
        where_str = 'jobsLogsStored <> \'True\' and time > now() - %s' % self.__job_log_retention_time
        where_str += f' AND time > now() - {table.retention_policy.duration}'

        jobs_updated = 0
        logs_total_count = 0
        LOGGER.info("> getting joblogs for jobsessions without saved logs")
        LOGGER.info(">> requesting jobList from database")

        # Select all jobs without joblogs
        keyword = Keyword.SELECT
        query = SelectionQuery(keyword=keyword,
                               tables=[table],
                               fields=['*'],
                               where_str=where_str)
        # send query and compute
        result = self.__influx_client.send_selection_query(  # type: ignore
            query)
        result_list: List[Dict[str, Any]] = list(
            result.get_points())  # type: ignore

        rows_affected = len(result_list)

        LOGGER.info(
            ">>> number of jobs with no joblogs stored in Influx database: {}".
            format(rows_affected))

        job_log_dict: Dict[int, List[Dict[str, Any]]] = {}

        # request all jobLogs from REST-API
        # if errors occur, skip single row and debug
        for row in result_list:
            job_session_id: Optional[int] = row.get('id', None)

            # if somehow id is missing: skip
            if (job_session_id is None):
                ExceptionUtils.error_message(
                    f"Error: joblogId missing for row {row}")
                continue

            if (job_session_id in job_log_dict):
                ExceptionUtils.error_message(
                    f"Error: joblogId duplicate, skipping.{job_session_id}")
                continue

            if (self.__verbose):
                LOGGER.info(
                    f">>> requested joblogs for {len(job_log_dict)} / {rows_affected} job sessions."
                )
            elif (len(job_log_dict) % 5 == 0):
                LOGGER.info(
                    f">>> requested joblogs for {len(job_log_dict)} / {rows_affected} job sessions."
                )

            # request job_session_id
            try:
                if (self.__verbose):
                    LOGGER.info(
                        f"requesting jobLogs {self.__job_log_type} for session {job_session_id}."
                    )
                LOGGER.debug(
                    f"requesting jobLogs {self.__job_log_type} for session {job_session_id}."
                )

                # cant use query something like everwhere due the extra params needed
                job_log_list = self.__api_queries.get_job_log_details(
                    jobsession_id=job_session_id,
                    job_logs_type=self.__job_log_type)
            except ValueError as error:
                ExceptionUtils.exception_info(
                    error=error,
                    extra_message=
                    f"error when api-requesting joblogs for job_session_id {job_session_id}, skipping it"
                )
                continue

            if (self.__verbose):
                LOGGER.info(
                    f">>> Found {len(job_log_list)} logs for jobsessionId {job_session_id}"
                )

            LOGGER.debug(
                f"Found {len(job_log_list)} logs for jobsessionId {job_session_id}"
            )
            # default empty list if no details available -> should not happen, in for safty reasons
            # if this is none, go down to rest client and fix it. Should be empty list.
            if (job_log_list is None):
                job_log_list = []
                ExceptionUtils.error_message(
                    "A joblog_list was none, even if the type does not allow it. Please report to developers."
                )
            job_log_dict[job_session_id] = job_log_list

        # list to be inserted after everything is updated
        insert_list: List[Dict[str, Any]] = []

        # Query data in ranges to avoid too many requests
        # Results from first select query above
        for row in result_list:
            job_id: int = row['id']
            job_log_list: Optional[List[Dict[str, Any]]] = job_log_dict.get(
                job_id, None)

            if (job_log_list is None):
                ExceptionUtils.error_message(
                    f"missing job_log_list even though it is in influxdb for jobId {job_id}. Skipping it"
                )
                continue

            # jobLogsCount will be zero if jobLogs are deleted after X days by maintenance jobs, GUI default is 60 days
            job_logs_count = len(job_log_list)
            if (self.__verbose):
                LOGGER.info(
                    ">>> storing {} joblogs for jobsessionId: {} in Influx database"
                    .format(len(job_log_list), job_id))
            LOGGER.debug(
                ">>> storing {} joblogs for jobsessionId: {} in Influx database"
                .format(len(job_log_list), job_id))

            for job_log in job_log_list:
                # rename log keys and add additional information
                job_log["jobId"] = row.get("jobId", None)
                job_log["jobName"] = row.get("jobName", None)
                job_log["jobExecutionTime"] = row.get("start", None)
                job_log["jobLogId"] = job_log.pop("id")
                job_log["jobSessionId"] = job_log.pop("jobsessionId")

            # compute other stats out of jobList
            try:
                self.__job_logs_to_stats(job_log_list)
            except ValueError as error:
                ExceptionUtils.exception_info(
                    error,
                    extra_message=
                    f"Failed to compute stats out of job logs, skipping for jobsessionId {job_id}"
                )

            for job_log in job_log_list:
                # dump message params to allow saving as string
                job_log["messageParams"] = json.dumps(job_log["messageParams"])

            # if list is empty due beeing erased etc it will simply return and do nothing
            self.__influx_client.insert_dicts_to_buffer(
                list_with_dicts=job_log_list, table_name="jobLogs")

            jobs_updated += 1
            logs_total_count += job_logs_count
            # update job table and set jobsLogsStored = True, jobLogsCount = len(jobLogDetails)
            update_fields = {
                "jobLogsCount": job_logs_count,
                "jobsLogsStored": True
            }
            # copy dict to allow update without errors
            mydict = dict(row.items())
            # update fields
            for (key, value) in update_fields.items():
                mydict[key] = value
            insert_list.append(mydict)

        # Delete data to allow reinsert with different tags
        delete_query = SelectionQuery(keyword=Keyword.DELETE,
                                      tables=[table],
                                      where_str=where_str)

        # now send remove query to prevent data loss
        self.__influx_client.send_selection_query(delete_query)  # type: ignore

        # Insert data after everything is completed
        self.__influx_client.insert_dicts_to_buffer(table.name, insert_list)

        LOGGER.info(
            ">>> inserting a total of {} logs".format(logs_total_count))

Example #9

0

Show file

File: influx_client.py Project: alex-martin-storage/spectrum-protect-sppmon

    def update_row(self,
                   table_name: str,
                   tag_dic: Dict[str, str] = None,
                   field_dic: Dict[str, Union[str, int, float, bool]] = None,
                   where_str: str = None):
        """DEPRICATED: Updates a row of the given table by given tag and field dict.

        Applies on multiple rows if `where` clause is fullfilled.
        Updates row by row, causing a high spike in call times: 3 Influx-Querys per call.
        Simple overwrite if no tag is changed, otherwise deletes old row first.
        Possible to add new values to old records.
        No replacement method available yet, check jobLogs (jobs update) how to query, then delete / update all at once.

        Arguments:
            table_name {str} -- name of table to be updated

        Keyword Arguments:
            tag_dic {Dict[str, str]} -- new tag values (default: {None})
            field_dic {Dict[str, Union[str, int, float, bool]]} -- new field values (default: {None})
            where_str {str} -- clause which needs to be fullfilled, any matched rows are updated (default: {None})

        Raises:
            ValueError: No table name is given.
            ValueError: Neither tag nor field dic given.
        """
        # None or empty checks
        if (not table_name):
            raise ValueError("Need table_name to update row")
        if (not tag_dic and not field_dic):
            raise ValueError(
                f"Need either new field or tag to update row in table {table_name}"
            )

        keyword = Keyword.SELECT
        table = self.database[table_name]
        query = SelectionQuery(keyword=keyword,
                               fields=['*'],
                               tables=[table],
                               where_str=where_str)
        result = self.send_selection_query(query)  # type: ignore
        result_list: List[Dict[str, Union[int, float, bool, str]]] = list(
            result.get_points())  # type: ignore

        # no results found
        if (not result_list):
            return

        # split between remove and insert
        # if tag are replaced it is needed to remove the old row first
        if (tag_dic):
            # WHERE clause reused
            keyword = Keyword.DELETE
            table = self.database[table_name]
            query = SelectionQuery(keyword=keyword,
                                   tables=[table],
                                   where_str=where_str)
            self.send_selection_query(query)

        insert_list = []
        for row in result_list:
            if (tag_dic):
                for (key, value) in tag_dic.items():
                    row[key] = value
            if (field_dic):
                for (key, value) in field_dic.items():
                    row[key] = value
            insert_list.append(row)

        # default insert method
        self.insert_dicts_to_buffer(table_name, insert_list)