def __get_chunk(self, instance_id, offset, limit): return cubes.cube_instance_id(connection=self._connection, cube_id=self._cube_id, instance_id=instance_id, offset=offset, limit=limit, verbose=helper.debug())
def generate_cube_instance_id(self, offset, limit=5000): res = cubes.cube_instance_id(connection=self.connection, cube_id=self.cube_id, instance_id=self.instance_id, offset=offset, limit=limit) return res.json()
def __fetch_chunks(self, parser, pagination, it_total, instance_id, limit): """Fetch add'l rows from this object instance from the Intelligence Server.""" with tqdm(desc="Downloading", total=it_total + 1, disable=(not self._progress_bar)) as fetch_pbar: fetch_pbar.update() for _offset in range(self._initial_limit, pagination['total'], limit): response = cubes.cube_instance_id(connection=self.connection, cube_id=self._id, instance_id=instance_id, offset=_offset, limit=limit) fetch_pbar.update() fetch_pbar.set_postfix(rows=str(min(_offset + limit, pagination['total']))) parser.parse(response=response.json())
def get_cube(self, cube_id, offset=0, limit=1000): """ Extracts the contents of a cube into a Pandas Data Frame :param cube_id: Unique ID of the cube you wish to extract information from. :param offset: (optional) To extract all data from the report, use 0 (default) :param limit: (optional) Used to control data extract behavior on datasets with a large number of rows. The default is 1000. As an example, if the dataset has 50,000 rows, get_cube() will incrementally extract all 50,000 rows in 1,000 row chunks. Depending on system resources, a higher limit (e.g. 10,000) may reduce the total time required to extract the entire dataset :return: Pandas Data Frame containing the cube contents """ # warning for future deprecation / replacement by Cube class warnings.warn( "This method will be deprecated. The Cube constructor is preferred and supports multi-table data.", DeprecationWarning) response = cubes.cube_instance(connection=self, cube_id=cube_id, offset=offset, limit=limit) if not response.ok: msg = "Error getting cube contents." self.__response_handler(response=response, msg=msg) else: json_response = response.json() instance_id = json_response['instanceId'] # Gets the pagination totals from the response object pagination = json_response['result']['data']['paging'] # If there are more rows to fetch, fetch them if pagination['current'] != pagination['total']: # initialize a list to capture slices from each query, and append the first request's result to the list table_data = [parsejson(response=json_response)] # Fetch add'l rows from this object instance from the intelligence server for _offset in range(limit, pagination['total'], limit): response = cubes.cube_instance_id(connection=self, cube_id=cube_id, instance_id=instance_id, offset=_offset, limit=limit) table_data.append(parsejson(response=response.json())) return pd.concat(table_data) else: return parsejson(response=json_response)
def test_cube_instance_id(self, mock_get): conn = microstrategy.Connection(base_url=BASE_URL, username=USERNAME, password=PASSWORD, project_name=PROJECT_NAME) mock_get.return_value.status_code = 200 response = cubes.cube_instance_id(conn, cube_id=CUBE_ID, instance_id=INSTANCE_ID) self.assertEqual(response.status_code, 200)
def __get_chunk(self, instance_id, offset, limit): return cubes.cube_instance_id(connection=self._connection, cube_id=self._cube_id, instance_id=instance_id, offset=offset, limit=limit)
def to_dataframe(self, limit=None, multi_df=False): """Extract contents of a cube into a Pandas Data Frame. Previously `microstrategy.Connection.get_cube()`. Args: limit (None or int, optional): Used to control data extract behavior. By default (None) the limit is calculated automatically, based on an optimized physical size of one chunk. Setting limit manually will force the number of rows per chunk. Depending on system resources, a higher limit (e.g. 50,000) may reduce the total time required to extract the entire dataset. multi_df (bool, optional): If True, return a list of data frames resembling the table structure of the cube. If False (default), returns one data frame. Returns: Pandas Data Frame containing the cube contents """ inst_pbar = tqdm(desc='Initializing an instance of a cube. Please wait...', bar_format='{desc}', leave=False, ncols=280, disable=(not self.progress_bar)) if limit: self._initial_limit = limit # Request a new instance, set instance id res = cubes.cube_instance(connection=self._connection, cube_id=self._cube_id, body=self._filter.filter_body(), offset=self.__OFFSET, limit=self._initial_limit) inst_pbar.close() # Gets the pagination totals and instance_id from the response object _instance = res.json() _instance_id = _instance['instanceId'] _pagination = _instance['data']['paging'] # initialize parser and process first response p = Parser(response=_instance, parse_cube=True) p.parse(response=_instance) # If there are more rows to fetch, fetch them if _pagination['current'] != _pagination['total']: if not limit: limit = max(1000, int((self._initial_limit * self._size_limit) / len(res.content))) # Count the number of additional iterations it_total = int((_pagination['total']-self._initial_limit)/limit) + ((_pagination['total']-self._initial_limit) % limit != 0) if self.parallel and it_total > 1: threads = helper.get_parallel_number(it_total) with FuturesSession(executor=ThreadPoolExecutor(max_workers=threads)) as session: fetch_pbar = tqdm(desc="Downloading", total=it_total+1, disable=(not self.progress_bar)) future = self.__fetch_chunks_future(session, _pagination, _instance_id, limit) fetch_pbar.update() for i, f in enumerate(future, start=1): response = f.result() if not response.ok: current_offset = self._initial_limit+(i-1)*limit response = cubes.cube_instance_id(connection=self._connection, cube_id=self._cube_id, instance_id=_instance_id, offset=current_offset, limit=limit) fetch_pbar.update() fetch_pbar.set_postfix(rows=str(min(self._initial_limit+i*limit, _pagination['total']))) p.parse(response.json()) fetch_pbar.close() else: self.__fetch_chunks(p, _pagination, it_total, _instance_id, limit) # return parsed data as a data frame self._dataframe = p.dataframe # split dataframe to dataframes matching tables in Cube if multi_df: # save the multitable_definition response to the instance self.__multitable_definition() # split dataframe to dataframes matching tables in Cube self._dataframes = [self._dataframe[columns].copy() for _, columns in self._table_definition.items()] return self._dataframes else: return self._dataframe
def to_dataframe(self, limit: Optional[int] = None, multi_df: bool = False): """Extract contents of a cube into a Pandas `DataFrame`. Args: limit (None or int, optional): Used to control data extract behavior. By default (None) the limit is calculated automatically, based on an optimized physical size of one chunk. Setting limit manually will force the number of rows per chunk. Depending on system resources, a higher limit (e.g. 50,000) may reduce the total time required to extract the entire dataset. multi_df (bool, optional): If True, return a list of data frames resembling the table structure of the cube. If False (default), returns one data frame. Returns: Pandas Data Frame containing the cube contents """ if limit: self._initial_limit = limit if self.instance_id is None: res = self.__create_cube_instance(self._initial_limit) else: # try to get first chunk from already initialized instance of cube, # if not possible, initialize new instance try: res = cubes.cube_instance_id(connection=self.connection, cube_id=self._id, instance_id=self.instance_id, offset=0, limit=self._initial_limit) except requests.HTTPError: res = self.__create_cube_instance(self._initial_limit) # Gets the pagination totals and instance_id from the response object _instance = res.json() self.instance_id = _instance['instanceId'] paging = _instance['data']['paging'] # initialize parser and process first response p = Parser(response=_instance, parse_cube=True) p.parse(response=_instance) # If there are more rows to fetch, fetch them if paging['current'] != paging['total']: if not limit: limit = max(1000, int((self._initial_limit * self._SIZE_LIMIT) / len(res.content))) # Count the number of additional iterations it_total = int((paging['total'] - self._initial_limit) / limit) + \ ((paging['total'] - self._initial_limit) % limit != 0) if self._parallel and it_total > 1: threads = helper.get_parallel_number(it_total) with FuturesSessionWithRenewal(connection=self._connection, max_workers=threads) as session: fetch_pbar = tqdm(desc="Downloading", total=it_total + 1, disable=(not self._progress_bar)) future = self.__fetch_chunks_future(session, paging, self.instance_id, limit) fetch_pbar.update() for i, f in enumerate(future, start=1): response = f.result() if not response.ok: helper.response_handler(response, "Error getting cube contents.") fetch_pbar.update() fetch_pbar.set_postfix( rows=str(min(self._initial_limit + i * limit, paging['total']))) p.parse(response.json()) fetch_pbar.close() else: self.__fetch_chunks(p, paging, it_total, self.instance_id, limit) # return parsed data as a data frame self._dataframe = p.dataframe # split dataframe to dataframes matching tables in Cube if multi_df: # split dataframe to dataframes matching tables in Cube self._dataframes = [ self._dataframe[columns].copy() for _, columns in self.__multitable_definition().items() ] return self._dataframes else: return self._dataframe
def to_dataframe(self, limit=25000, progress_bar=True, multi_df=False): """ Extract contents of a cube into a Pandas Data Frame. Previously `microstrategy.Connection.get_cube()`. Args: limit (int, optional): Used to control data extract behavior on datasets with a large number of rows. The default is 25000. As an example, if the dataset has 50,000 rows, get_cube() will incrementally extract all 50,000 rows in 1,000 row chunks. Depending on system resources, a higher limit (e.g. 10,000) may reduce the total time required to extract the entire dataset. progress_bar(bool, optional): If True (default), show the upload progress bar. multi_df (bool, optional): If True (default), return a list of df resembling the table structure of the cube. If False, returns one df. Returns: Pandas Data Frame containing the cube contents """ inst_pbar = tqdm( desc='Connecting to MicroStrategy I-Server. Please wait...', bar_format='{desc}', leave=False, ncols=310) # Request a new instance, set instance id res = cubes.cube_instance(connection=self._connection, cube_id=self._cube_id, body=self._filter.filter_body(), offset=self.__OFFSET, limit=limit) inst_pbar.close() if not res.ok: msg = "Error getting cube contents." self.__response_handler(response=res, msg=msg) else: _instance = res.json() _instance_id = _instance['instanceId'] # Gets the pagination totals from the response object _pagination = _instance['result']['data']['paging'] # If there are more rows to fetch, fetch them if _pagination['current'] != _pagination['total']: # initialize a list to capture slices from each query, and append the first request's result to the list table_data = [parsejson(response=_instance)] # Count the number of iterations it_total = int(_pagination['total'] / limit) + (_pagination['total'] % limit != 0) # Fetch add'l rows from this object instance from the intelligence server with tqdm(total=it_total, disable=(not progress_bar)) as fetch_pbar: if progress_bar: fetch_pbar.update() fetch_pbar.set_description("Downloading") fetch_pbar.set_postfix(rows=limit) for _offset in range(limit, _pagination['total'], limit): if progress_bar: fetch_pbar.update() fetch_pbar.set_description("Downloading") fetch_pbar.set_postfix( rows=min(_offset + limit, _pagination['total'])) response = cubes.cube_instance_id( connection=self._connection, cube_id=self._cube_id, instance_id=_instance_id, offset=_offset, limit=limit) table_data.append(parsejson(response=response.json())) # concatenate and return the list of result data as a data frame self._dataframe = pd.concat(table_data).reset_index(drop=True) else: # otherwise parse the first result and return it as a dataframe self._dataframe = parsejson(response=_instance) # split dataframe to dataframes matching tables in Cube _tables = self.multitable_definition() if multi_df: self._dataframes = [ self._dataframe[columns].copy() for _, columns in _tables.items() ] return self._dataframes else: return self._dataframe