Example #1
0
 def __get_chunk(self, instance_id, offset, limit):
     return cubes.cube_instance_id(connection=self._connection,
                                   cube_id=self._cube_id,
                                   instance_id=instance_id,
                                   offset=offset,
                                   limit=limit,
                                   verbose=helper.debug())
 def generate_cube_instance_id(self, offset, limit=5000):
     res = cubes.cube_instance_id(connection=self.connection,
                                  cube_id=self.cube_id,
                                  instance_id=self.instance_id,
                                  offset=offset,
                                  limit=limit)
     return res.json()
Example #3
0
 def __fetch_chunks(self, parser, pagination, it_total, instance_id, limit):
     """Fetch add'l rows from this object instance from the Intelligence
     Server."""
     with tqdm(desc="Downloading", total=it_total + 1,
               disable=(not self._progress_bar)) as fetch_pbar:
         fetch_pbar.update()
         for _offset in range(self._initial_limit, pagination['total'], limit):
             response = cubes.cube_instance_id(connection=self.connection, cube_id=self._id,
                                               instance_id=instance_id, offset=_offset,
                                               limit=limit)
             fetch_pbar.update()
             fetch_pbar.set_postfix(rows=str(min(_offset + limit, pagination['total'])))
             parser.parse(response=response.json())
Example #4
0
    def get_cube(self, cube_id, offset=0, limit=1000):
        """
        Extracts the contents of a cube into a Pandas Data Frame

        :param cube_id: Unique ID of the cube you wish to extract information from.
        :param offset: (optional) To extract all data from the report, use 0 (default)
        :param limit: (optional) Used to control data extract behavior on datasets with a large
        number of rows. The default is 1000. As an example, if the dataset has 50,000 rows,
        get_cube() will incrementally extract all 50,000 rows in 1,000 row chunks. Depending
        on system resources, a higher limit (e.g. 10,000) may reduce the total time
        required to extract the entire dataset
        :return: Pandas Data Frame containing the cube contents
        """

        # warning for future deprecation / replacement by Cube class
        warnings.warn(
            "This method will be deprecated. The Cube constructor is preferred and supports multi-table data.",
            DeprecationWarning)

        response = cubes.cube_instance(connection=self,
                                       cube_id=cube_id,
                                       offset=offset,
                                       limit=limit)

        if not response.ok:
            msg = "Error getting cube contents."
            self.__response_handler(response=response, msg=msg)
        else:
            json_response = response.json()
            instance_id = json_response['instanceId']

            # Gets the pagination totals from the response object
            pagination = json_response['result']['data']['paging']

            # If there are more rows to fetch, fetch them
            if pagination['current'] != pagination['total']:

                # initialize a list to capture slices from each query, and append the first request's result to the list
                table_data = [parsejson(response=json_response)]

                # Fetch add'l rows from this object instance from the intelligence server
                for _offset in range(limit, pagination['total'], limit):
                    response = cubes.cube_instance_id(connection=self,
                                                      cube_id=cube_id,
                                                      instance_id=instance_id,
                                                      offset=_offset,
                                                      limit=limit)
                    table_data.append(parsejson(response=response.json()))
                return pd.concat(table_data)
            else:
                return parsejson(response=json_response)
Example #5
0
    def test_cube_instance_id(self, mock_get):

        conn = microstrategy.Connection(base_url=BASE_URL,
                                        username=USERNAME,
                                        password=PASSWORD,
                                        project_name=PROJECT_NAME)

        mock_get.return_value.status_code = 200

        response = cubes.cube_instance_id(conn,
                                          cube_id=CUBE_ID,
                                          instance_id=INSTANCE_ID)

        self.assertEqual(response.status_code, 200)
Example #6
0
 def __get_chunk(self, instance_id, offset, limit):
     return cubes.cube_instance_id(connection=self._connection,
                                   cube_id=self._cube_id,
                                   instance_id=instance_id,
                                   offset=offset,
                                   limit=limit)
Example #7
0
    def to_dataframe(self, limit=None, multi_df=False):
        """Extract contents of a cube into a Pandas Data Frame. Previously `microstrategy.Connection.get_cube()`.

        Args:
            limit (None or int, optional): Used to control data extract behavior. By default (None)
                the limit is calculated automatically, based on an optimized physical size of one
                chunk. Setting limit manually will force the number of rows per chunk. Depending on
                system resources, a higher limit (e.g. 50,000) may reduce the total time required
                to extract the entire dataset.
            multi_df (bool, optional): If True, return a list of data frames resembling the table
                structure of the cube. If False (default), returns one data frame.

        Returns:
            Pandas Data Frame containing the cube contents
        """
        inst_pbar = tqdm(desc='Initializing an instance of a cube. Please wait...',
                         bar_format='{desc}', leave=False, ncols=280, disable=(not self.progress_bar))
        if limit:
            self._initial_limit = limit

        # Request a new instance, set instance id
        res = cubes.cube_instance(connection=self._connection,
                                  cube_id=self._cube_id,
                                  body=self._filter.filter_body(),
                                  offset=self.__OFFSET,
                                  limit=self._initial_limit)
        inst_pbar.close()

        # Gets the pagination totals and instance_id from the response object
        _instance = res.json()
        _instance_id = _instance['instanceId']
        _pagination = _instance['data']['paging']

        # initialize parser and process first response
        p = Parser(response=_instance, parse_cube=True)
        p.parse(response=_instance)

        # If there are more rows to fetch, fetch them
        if _pagination['current'] != _pagination['total']:
            if not limit:
                limit = max(1000, int((self._initial_limit * self._size_limit) / len(res.content)))
            # Count the number of additional iterations
            it_total = int((_pagination['total']-self._initial_limit)/limit) + ((_pagination['total']-self._initial_limit) % limit != 0)

            if self.parallel and it_total > 1:
                threads = helper.get_parallel_number(it_total)
                with FuturesSession(executor=ThreadPoolExecutor(max_workers=threads)) as session:
                    fetch_pbar = tqdm(desc="Downloading", total=it_total+1, disable=(not self.progress_bar))
                    future = self.__fetch_chunks_future(session, _pagination, _instance_id, limit)
                    fetch_pbar.update()
                    for i, f in enumerate(future, start=1):
                        response = f.result()
                        if not response.ok:
                            current_offset = self._initial_limit+(i-1)*limit
                            response = cubes.cube_instance_id(connection=self._connection, cube_id=self._cube_id,
                                                              instance_id=_instance_id, offset=current_offset,
                                                              limit=limit)
                        fetch_pbar.update()
                        fetch_pbar.set_postfix(rows=str(min(self._initial_limit+i*limit, _pagination['total'])))
                        p.parse(response.json())
                    fetch_pbar.close()
            else:
                self.__fetch_chunks(p, _pagination, it_total, _instance_id, limit)

        # return parsed data as a data frame
        self._dataframe = p.dataframe
        # split dataframe to dataframes matching tables in Cube
        if multi_df:
            # save the multitable_definition response to the instance
            self.__multitable_definition()
            # split dataframe to dataframes matching tables in Cube
            self._dataframes = [self._dataframe[columns].copy() for _, columns in self._table_definition.items()]
            return self._dataframes
        else:
            return self._dataframe
Example #8
0
    def to_dataframe(self, limit: Optional[int] = None, multi_df: bool = False):
        """Extract contents of a cube into a Pandas `DataFrame`.

        Args:
            limit (None or int, optional): Used to control data extract
                behavior. By default (None) the limit is calculated
                automatically, based on an optimized physical size of one chunk.
                Setting limit manually will force the number of rows per chunk.
                Depending on system resources, a higher limit (e.g. 50,000) may
                reduce the total time required to extract the entire dataset.
            multi_df (bool, optional): If True, return a list of data frames
                resembling the table structure of the cube. If False (default),
                returns one data frame.

        Returns:
            Pandas Data Frame containing the cube contents
        """
        if limit:
            self._initial_limit = limit

        if self.instance_id is None:
            res = self.__create_cube_instance(self._initial_limit)
        else:

            # try to get first chunk from already initialized instance of cube,
            # if not possible, initialize new instance
            try:
                res = cubes.cube_instance_id(connection=self.connection, cube_id=self._id,
                                             instance_id=self.instance_id, offset=0,
                                             limit=self._initial_limit)
            except requests.HTTPError:
                res = self.__create_cube_instance(self._initial_limit)

        # Gets the pagination totals and instance_id from the response object
        _instance = res.json()
        self.instance_id = _instance['instanceId']
        paging = _instance['data']['paging']

        # initialize parser and process first response
        p = Parser(response=_instance, parse_cube=True)
        p.parse(response=_instance)

        # If there are more rows to fetch, fetch them
        if paging['current'] != paging['total']:
            if not limit:
                limit = max(1000, int((self._initial_limit * self._SIZE_LIMIT) / len(res.content)))
            # Count the number of additional iterations
            it_total = int((paging['total'] - self._initial_limit) / limit) + \
                ((paging['total'] - self._initial_limit) % limit != 0)

            if self._parallel and it_total > 1:
                threads = helper.get_parallel_number(it_total)
                with FuturesSessionWithRenewal(connection=self._connection,
                                               max_workers=threads) as session:
                    fetch_pbar = tqdm(desc="Downloading", total=it_total + 1,
                                      disable=(not self._progress_bar))
                    future = self.__fetch_chunks_future(session, paging, self.instance_id, limit)
                    fetch_pbar.update()
                    for i, f in enumerate(future, start=1):
                        response = f.result()
                        if not response.ok:
                            helper.response_handler(response, "Error getting cube contents.")
                        fetch_pbar.update()
                        fetch_pbar.set_postfix(
                            rows=str(min(self._initial_limit + i * limit, paging['total'])))
                        p.parse(response.json())
                    fetch_pbar.close()
            else:
                self.__fetch_chunks(p, paging, it_total, self.instance_id, limit)

        # return parsed data as a data frame
        self._dataframe = p.dataframe
        # split dataframe to dataframes matching tables in Cube
        if multi_df:
            # split dataframe to dataframes matching tables in Cube
            self._dataframes = [
                self._dataframe[columns].copy()
                for _, columns in self.__multitable_definition().items()
            ]
            return self._dataframes
        else:
            return self._dataframe
Example #9
0
    def to_dataframe(self, limit=25000, progress_bar=True, multi_df=False):
        """
        Extract contents of a cube into a Pandas Data Frame. Previously `microstrategy.Connection.get_cube()`.

        Args:
            limit (int, optional): Used to control data extract behavior on datasets with a large number of rows.
                The default is 25000. As an example, if the dataset has 50,000 rows, get_cube() will incrementally
                extract all 50,000 rows in 1,000 row chunks. Depending on system resources, a higher limit (e.g. 10,000)
                may reduce the total time required to extract the entire dataset.
            progress_bar(bool, optional): If True (default), show the upload progress bar.
            multi_df (bool, optional): If True (default), return a list of df resembling the table structure of the cube. If False, returns one df.

        Returns:
            Pandas Data Frame containing the cube contents
        """
        inst_pbar = tqdm(
            desc='Connecting to MicroStrategy I-Server. Please wait...',
            bar_format='{desc}',
            leave=False,
            ncols=310)

        # Request a new instance, set instance id
        res = cubes.cube_instance(connection=self._connection,
                                  cube_id=self._cube_id,
                                  body=self._filter.filter_body(),
                                  offset=self.__OFFSET,
                                  limit=limit)
        inst_pbar.close()
        if not res.ok:
            msg = "Error getting cube contents."
            self.__response_handler(response=res, msg=msg)
        else:
            _instance = res.json()
            _instance_id = _instance['instanceId']

            # Gets the pagination totals from the response object
            _pagination = _instance['result']['data']['paging']

            # If there are more rows to fetch, fetch them
            if _pagination['current'] != _pagination['total']:

                # initialize a list to capture slices from each query, and append the first request's result to the list
                table_data = [parsejson(response=_instance)]

                # Count the number of iterations
                it_total = int(_pagination['total'] /
                               limit) + (_pagination['total'] % limit != 0)

                # Fetch add'l rows from this object instance from the intelligence server
                with tqdm(total=it_total,
                          disable=(not progress_bar)) as fetch_pbar:
                    if progress_bar:
                        fetch_pbar.update()
                        fetch_pbar.set_description("Downloading")
                        fetch_pbar.set_postfix(rows=limit)
                    for _offset in range(limit, _pagination['total'], limit):
                        if progress_bar:
                            fetch_pbar.update()
                            fetch_pbar.set_description("Downloading")
                            fetch_pbar.set_postfix(
                                rows=min(_offset +
                                         limit, _pagination['total']))
                        response = cubes.cube_instance_id(
                            connection=self._connection,
                            cube_id=self._cube_id,
                            instance_id=_instance_id,
                            offset=_offset,
                            limit=limit)
                        table_data.append(parsejson(response=response.json()))

                # concatenate and return the list of result data as a data frame
                self._dataframe = pd.concat(table_data).reset_index(drop=True)
            else:
                # otherwise parse the first result and return it as a dataframe
                self._dataframe = parsejson(response=_instance)

            # split dataframe to dataframes matching tables in Cube
            _tables = self.multitable_definition()
            if multi_df:
                self._dataframes = [
                    self._dataframe[columns].copy()
                    for _, columns in _tables.items()
                ]
                return self._dataframes
            else:
                return self._dataframe