예제 #1
0
 def test_get_parallel_number(self):
     helper.os = Mock()
     helper.os.cpu_count.return_value = 2
     self.assertEqual(helper.get_parallel_number(100), 6)
     self.assertEqual(helper.get_parallel_number(1), 1)
     helper.os.cpu_count.return_value = 4
     self.assertEqual(helper.get_parallel_number(100), 8)
     self.assertEqual(helper.get_parallel_number(1), 1)
     helper.os.cpu_count.return_value = 8
     self.assertEqual(helper.get_parallel_number(100), 8)
     self.assertEqual(helper.get_parallel_number(1), 1)
     helper.os.cpu_count.return_value = 12
     self.assertEqual(helper.get_parallel_number(100), 8)
     self.assertEqual(helper.get_parallel_number(1), 1)
예제 #2
0
    def __get_attr_elements_async(self, limit: int = 50000) -> list:
        """Get elements of report attributes asynchronously.

        Implements GET /reports/<report_id>/attributes/<attribute_id>/elements.
        """

        attr_elements = []
        if self.attributes:
            threads = helper.get_parallel_number(len(self.attributes))
            with FuturesSession(
                    executor=ThreadPoolExecutor(max_workers=threads),
                    session=self._connection.session) as session:
                # Fetch first chunk of attribute elements.
                futures = self.__fetch_attribute_elements_chunks(
                    session, limit)
                pbar = tqdm(futures,
                            desc="Loading attribute elements",
                            leave=False,
                            disable=(not self.progress_bar))
                for i, future in enumerate(pbar):
                    attr = self.attributes[i]
                    response = future.result()
                    if not response.ok:
                        helper.response_handler(
                            response, "Error getting attribute " +
                            attr['name'] + " elements")
                    elements = response.json()
                    # Get total number of rows from headers.
                    total = int(response.headers['x-mstr-total-count'])
                    for _offset in range(limit, total, limit):
                        response = reports.report_single_attribute_elements(
                            connection=self._connection,
                            report_id=self._report_id,
                            attribute_id=attr["id"],
                            offset=_offset,
                            limit=limit,
                        )
                        elements.extend(response.json())
                    # Append attribute data to the list of attributes.
                    attr_elements.append({
                        "attribute_name": attr['name'],
                        "attribute_id": attr['id'],
                        "elements": elements
                    })
                pbar.close()

            return attr_elements
예제 #3
0
    def disconnect_all_databases(self,
                                 force: bool = False
                                 ) -> Union[List[dict], None]:
        """Disconnect all database connections.

        Args:
            force: if True, no additional prompt will be shown before
                disconnecting all connections

        Returns:
            - list of statuses of disconnecting all connections with their ids
               and messages from the I-Server
            - in case of error it returns None
        """
        user_input = 'N'
        if not force:
            user_input = input(
                "Are you sure you want to disconnect all database connections? [Y/N]: "
            )
            if user_input != "Y":
                return None
            else:
                force = True

        connections = self.list_connections()
        threads = helper.get_parallel_number(len(connections))
        with FuturesSessionWithRenewal(connection=self.connection,
                                       max_workers=threads) as session:

            futures = [
                monitors.delete_database_connection_async(
                    session, self.connection, conn["id"])
                for conn in connections
            ]
            statuses: List[Dict[str, Union[str, int]]] = []
            for f in futures:
                response = f.result()
                statuses.append({
                    'id': response.url.rsplit("/").pop(-1),
                    'status': response.status_code
                })
        return self._prepare_disconnect_by_id_message(statuses=statuses)
예제 #4
0
파일: cube.py 프로젝트: jonaqp/mstrio-py
    def to_dataframe(self, limit=None, multi_df=False):
        """Extract contents of a cube into a Pandas `DataFrame`.

        Args:
            limit (None or int, optional): Used to control data extract
                behavior. By default (None) the limit is calculated
                automatically, based on an optimized physical size of one chunk.
                Setting limit manually will force the number of rows per chunk.
                Depending on system resources, a higher limit (e.g. 50,000) may
                reduce the total time required to extract the entire dataset.
            multi_df (bool, optional): If True, return a list of data frames
                resembling the table structure of the cube. If False (default),
                returns one data frame.

        Returns:
            Pandas Data Frame containing the cube contents
        """
        if limit:
            self._initial_limit = limit

        if self.instance_id is None:
            res = self.__initialize_cube(self._initial_limit)
        else:
            # try to get first chunk from already initialized instance of cube,
            # if not possible, initialize new instance
            try:
                res = self.__get_chunk(instance_id=self.instance_id,
                                       offset=0,
                                       limit=self._initial_limit)
            except requests.HTTPError:
                res = self.__initialize_cube(self._initial_limit)

        # Gets the pagination totals and instance_id from the response object
        _instance = res.json()
        _instance_id = _instance['instanceId']
        _pagination = _instance['data']['paging']

        # initialize parser and process first response
        p = Parser(response=res.json(), parse_cube=True)
        p.parse(response=_instance)

        # If there are more rows to fetch, fetch them
        if _pagination['current'] != _pagination['total']:
            if not limit:
                limit = max(
                    1000,
                    int((self._initial_limit * self._size_limit) /
                        len(res.content)))
            # Count the number of additional iterations
            it_total = int((_pagination['total'] - self._initial_limit) / limit) + \
                ((_pagination['total'] - self._initial_limit) % limit != 0)

            if self.parallel and it_total > 1:
                threads = helper.get_parallel_number(it_total)
                with FuturesSession(
                        executor=ThreadPoolExecutor(max_workers=threads),
                        session=self._connection.session) as session:
                    fetch_pbar = tqdm(desc="Downloading",
                                      total=it_total + 1,
                                      disable=(not self.progress_bar))
                    future = self.__fetch_chunks_future(
                        session, _pagination, _instance_id, limit)
                    fetch_pbar.update()
                    for i, f in enumerate(future, start=1):
                        response = f.result()
                        if not response.ok:
                            helper.response_handler(
                                response, "Error getting cube contents.")
                        fetch_pbar.update()
                        fetch_pbar.set_postfix(rows=str(
                            min(self._initial_limit +
                                i * limit, _pagination['total'])))
                        p.parse(response.json())
                    fetch_pbar.close()
            else:
                self.__fetch_chunks(p, _pagination, it_total, _instance_id,
                                    limit)

        # return parsed data as a data frame
        self._dataframe = p.dataframe
        # split dataframe to dataframes matching tables in Cube
        if multi_df:
            # split dataframe to dataframes matching tables in Cube
            self._dataframes = [
                self._dataframe[columns].copy()
                for _, columns in self.__multitable_definition().items()
            ]
            return self._dataframes
        else:
            return self._dataframe
예제 #5
0
    def to_dataframe(self, limit=None, multi_df=False):
        """Extract contents of a cube into a Pandas Data Frame. Previously `microstrategy.Connection.get_cube()`.

        Args:
            limit (None or int, optional): Used to control data extract behavior. By default (None)
                the limit is calculated automatically, based on an optimized physical size of one
                chunk. Setting limit manually will force the number of rows per chunk. Depending on
                system resources, a higher limit (e.g. 50,000) may reduce the total time required
                to extract the entire dataset.
            multi_df (bool, optional): If True, return a list of data frames resembling the table
                structure of the cube. If False (default), returns one data frame.

        Returns:
            Pandas Data Frame containing the cube contents
        """
        inst_pbar = tqdm(desc='Initializing an instance of a cube. Please wait...',
                         bar_format='{desc}', leave=False, ncols=280, disable=(not self.progress_bar))
        if limit:
            self._initial_limit = limit

        # Request a new instance, set instance id
        res = cubes.cube_instance(connection=self._connection,
                                  cube_id=self._cube_id,
                                  body=self._filter.filter_body(),
                                  offset=self.__OFFSET,
                                  limit=self._initial_limit)
        inst_pbar.close()

        # Gets the pagination totals and instance_id from the response object
        _instance = res.json()
        _instance_id = _instance['instanceId']
        _pagination = _instance['data']['paging']

        # initialize parser and process first response
        p = Parser(response=_instance, parse_cube=True)
        p.parse(response=_instance)

        # If there are more rows to fetch, fetch them
        if _pagination['current'] != _pagination['total']:
            if not limit:
                limit = max(1000, int((self._initial_limit * self._size_limit) / len(res.content)))
            # Count the number of additional iterations
            it_total = int((_pagination['total']-self._initial_limit)/limit) + ((_pagination['total']-self._initial_limit) % limit != 0)

            if self.parallel and it_total > 1:
                threads = helper.get_parallel_number(it_total)
                with FuturesSession(executor=ThreadPoolExecutor(max_workers=threads)) as session:
                    fetch_pbar = tqdm(desc="Downloading", total=it_total+1, disable=(not self.progress_bar))
                    future = self.__fetch_chunks_future(session, _pagination, _instance_id, limit)
                    fetch_pbar.update()
                    for i, f in enumerate(future, start=1):
                        response = f.result()
                        if not response.ok:
                            current_offset = self._initial_limit+(i-1)*limit
                            response = cubes.cube_instance_id(connection=self._connection, cube_id=self._cube_id,
                                                              instance_id=_instance_id, offset=current_offset,
                                                              limit=limit)
                        fetch_pbar.update()
                        fetch_pbar.set_postfix(rows=str(min(self._initial_limit+i*limit, _pagination['total'])))
                        p.parse(response.json())
                    fetch_pbar.close()
            else:
                self.__fetch_chunks(p, _pagination, it_total, _instance_id, limit)

        # return parsed data as a data frame
        self._dataframe = p.dataframe
        # split dataframe to dataframes matching tables in Cube
        if multi_df:
            # save the multitable_definition response to the instance
            self.__multitable_definition()
            # split dataframe to dataframes matching tables in Cube
            self._dataframes = [self._dataframe[columns].copy() for _, columns in self._table_definition.items()]
            return self._dataframes
        else:
            return self._dataframe
예제 #6
0
    def to_dataframe(self, limit: int = None) -> pd.DataFrame:
        """Extract contents of a report instance into a Pandas `DataFrame`.

        Args:
            limit (None or int, optional): Used to control data extract
                behavior. By default (None) the limit is calculated
                automatically, based on an optimized physical size of one chunk.
                Setting limit manually will force the number of rows per chunk.
                Depending on system resources, a higher limit (e.g. 50,000) may
                reduce the total time required to extract the entire dataset.

        Returns:
            Pandas Data Frame containing the report contents.
        """
        if limit:
            self._initial_limit = limit
            self.instance_id = None

        if self.instance_id is None:
            res = self.__initialize_report(self._initial_limit)
        else:
            # try to get first chunk from already initialized instance of report
            # if not possible, initialize new instance
            try:
                res = self.__get_chunk(instance_id=self.instance_id,
                                       offset=0,
                                       limit=self._initial_limit)
            except requests.HTTPError:
                res = self.__initialize_report(self._initial_limit)

        # Gets the pagination totals from the response object
        _instance = res.json()
        self.instance_id = _instance['instanceId']
        paging = _instance['data']['paging']

        # initialize parser and process first response
        p = Parser(response=_instance, parse_cube=False)
        p.parse(response=_instance)

        # If there are more rows to fetch, fetch them
        if paging['current'] != paging['total']:
            if not limit:
                limit = max(
                    1000,
                    int((self._initial_limit * self._size_limit) /
                        len(res.content)))
            # Count the number of additional iterations
            it_total = int((paging['total'] - self._initial_limit) / limit) + \
                ((paging['total'] - self._initial_limit) % limit != 0)

            if self.parallel and it_total > 1:
                threads = helper.get_parallel_number(it_total)
                with FuturesSession(
                        executor=ThreadPoolExecutor(max_workers=threads),
                        session=self._connection.session) as session:
                    fetch_pbar = tqdm(desc="Downloading",
                                      total=it_total + 1,
                                      disable=(not self.progress_bar))
                    future = self.__fetch_chunks_future(
                        session, paging, self.instance_id, limit)
                    fetch_pbar.update()
                    for i, f in enumerate(future, start=1):
                        response = f.result()
                        if not response.ok:
                            helper.response_handler(
                                response, "Error getting report contents.")
                        fetch_pbar.update()
                        fetch_pbar.set_postfix(rows=str(
                            min(self._initial_limit +
                                i * limit, paging['total'])))
                        p.parse(response.json())
                    fetch_pbar.close()
            else:
                self.__fetch_chunks(p, paging, it_total, self.instance_id,
                                    limit)

        # return parsed data as a data frame
        self._dataframe = p.dataframe

        # filter dataframe if report had crosstabs and filters were applied
        if self.cross_tab_filter != {}:
            if self.cross_tab_filter['metrics'] is not None:
                # drop metrics columns from dataframe
                metr_names = [
                    el['name'] for el in list(
                        filter(
                            lambda x: x['id'] not in self.cross_tab_filter[
                                'metrics'], self.metrics))
                ]
                self._dataframe = self._dataframe.drop(metr_names, axis=1)

            if self.cross_tab_filter['attr_elements'] is not None:
                # create dict of attributes and elements to iterate through
                attr_dict = {}
                for attribute in self.cross_tab_filter['attr_elements']:
                    key = attribute[:32]
                    attr_dict.setdefault(key, []).append(attribute[33:])
                # initialize indexes series for filter
                indexes = pd.Series([False] * len(self._dataframe))

                # logical OR for filtered attribute elements
                for attribute in attr_dict:
                    attr_name = list(
                        filter(lambda x: x['id'] in attribute,
                               self.attributes))[0]['name']
                    elements = attr_dict[attribute]
                    indexes = indexes | self._dataframe[attr_name].isin(
                        elements)
                # select datframe indexes with
                self._dataframe = self._dataframe[indexes]

            if self.cross_tab_filter['attributes'] is not None:
                attr_names = [
                    el['name'] for el in list(
                        filter(
                            lambda x: x['id'] not in self.cross_tab_filter[
                                'attributes'], self.attributes))
                ]
                # filtering out attribute forms cloumns
                to_be_removed = []
                to_be_added = []
                for attr in attr_names:
                    forms = [
                        column for column in self._dataframe.columns
                        if column.startswith(attr + '@')
                    ]
                    if forms:
                        to_be_removed.append(attr)
                        to_be_added.extend(forms)
                for elem in to_be_removed:
                    attr_names.remove(elem)
                attr_names.extend(to_be_added)
                # drop filtered out columns
                self._dataframe = self._dataframe.drop(attr_names, axis=1)
        return self._dataframe
예제 #7
0
    def to_dataframe(self, limit=None):
        """Extract contents of a report instance into a Pandas Data Frame. Formerly `microstrategy.Connection.get_report()`.

        Args:
            limit (None or int, optional): Used to control data extract behavior. By default (None)
                the limit is calculated automatically, based on an optimized physical size of one
                chunk. Setting limit manually will force the number of rows per chunk. Depending on
                system resources, a higher limit (e.g. 50,000) may reduce the total time required
                to extract the entire dataset.

        Returns:
            Pandas Data Frame containing the report contents
        """
        inst_pbar = tqdm(desc='Initializing an instance of a report. Please wait...',
                         bar_format='{desc}', leave=False, ncols=285, disable=(not self.progress_bar))

        # Switch off subtotals if I-Server version is higher than 11.2.1
        body = self._filter.filter_body()
        if version.parse(self._connection.iserver_version) >= version.parse("11.2.0100"):
            self._subtotals["visible"] = False
            body["subtotals"] = {"visible": self._subtotals["visible"]}

        if limit:
            self._initial_limit = limit
        # Request a new instance, set instance id
        res = reports.report_instance(connection=self._connection,
                                      report_id=self._report_id,
                                      body=body,
                                      offset=self.__OFFSET,
                                      limit=self._initial_limit)
        inst_pbar.close()

        # Gets the pagination totals from the response object
        _instance = res.json()
        _instance_id = _instance['instanceId']
        _pagination = _instance['data']['paging']

        # initialize parser and process first response
        p = Parser(response=_instance, parse_cube=False)
        p.parse(response=_instance)

        # If there are more rows to fetch, fetch them
        if _pagination['current'] != _pagination['total']:
            if not limit:
                limit = max(1000, int((self._initial_limit * self._size_limit) / len(res.content)))
            it_total = int((_pagination['total']-self._initial_limit)/limit) + ((_pagination['total']-self._initial_limit) % limit != 0)

            if self.parallel and it_total > 1:
                threads = helper.get_parallel_number(it_total)
                with FuturesSession(executor=ThreadPoolExecutor(max_workers=threads)) as session:
                    fetch_pbar = tqdm(desc="Downloading", total=it_total+1, disable=(not self.progress_bar))
                    future = self.__fetch_chunks_future(session, _pagination, _instance_id, limit)
                    fetch_pbar.update()
                    for i, f in enumerate(future, start=1):
                        response = f.result()
                        if not response.ok:
                            current_offset = self._initial_limit+(i-1)*limit
                            response = reports.report_instance_id(connection=self._connection,
                                                                  report_id=self._report_id, instance_id=_instance_id,
                                                                  offset=current_offset, limit=limit)
                        fetch_pbar.update()
                        fetch_pbar.set_postfix(rows=str(min(self._initial_limit+i*limit, _pagination['total'])))
                        p.parse(response.json())
                    fetch_pbar.close()
            else:
                self.__fetch_chunks(p, _pagination, it_total, _instance_id, limit)

        # return parsed data as a data frame
        self._dataframe = p.dataframe

        # filter received dataframe if report had crosstabs and filters were applied
        if self.cross_tab_filter != {}:
            if self.cross_tab_filter['metrics'] is not None:
                # drop metrics columns from dataframe
                metr_names = [el['name'] for el in list(filter(lambda x: x['id'] not in self.cross_tab_filter['metrics'],
                              self._metrics))]
                self._dataframe = self._dataframe.drop(metr_names, axis=1)

            if self.cross_tab_filter['attr_elements'] is not None:
                # create dict of attributes and elements to iterate through
                attr_dict = {}
                for attribute in self.cross_tab_filter['attr_elements']:
                    key = attribute[:32]
                    attr_dict.setdefault(key, []).append(attribute[33:])
                # initialize indexes series for filter
                indexes = pd.Series([False] * len(self._dataframe))

                # logical OR for filtered attribute elements
                for attribute in attr_dict:
                    attr_name = list(filter(lambda x: x['id'] in attribute, self._attributes))[0]['name']
                    elements = attr_dict[attribute]
                    indexes = indexes | self._dataframe[attr_name].isin(elements)
                # select datframe indexes with 
                self._dataframe = self._dataframe[indexes]

            if self.cross_tab_filter['attributes'] is not None:
                attr_names = [el['name'] for el in list(filter(lambda x: x['id'] not in self.cross_tab_filter['attributes'],
                              self._attributes))]
                self._dataframe = self._dataframe.drop(attr_names, axis=1)

        return self._dataframe