def generate_report_instance_id(self, offset, limit=5000): res = reports.report_instance_id(self.connection, report_id=self.report_id, instance_id=self.instance_id, offset=offset, limit=limit) return res.json()
def __get_chunk(self, instance_id, offset, limit): return reports.report_instance_id(connection=self._connection, report_id=self._report_id, instance_id=instance_id, offset=offset, limit=limit, verbose=helper.debug())
def __get_chunk(self, instance_id: str, offset: int, limit: int) -> requests.Response: return reports.report_instance_id( connection=self._connection, report_id=self._id, instance_id=instance_id, offset=offset, limit=limit, )
def test_report_instance_id(self, mock_get): conn = microstrategy.Connection(base_url=BASE_URL, username=USERNAME, password=PASSWORD, project_name=PROJECT_NAME) mock_get.return_value.status_code = 200 response = reports.report_instance_id(conn, report_id=REPORT_ID, instance_id=INSTANCE_ID, offset=OFFSET, limit=LIMIT) self.assertEqual(response.status_code, 200)
def get_report(self, report_id, offset=0, limit=1000): """ Extracts the contents of a report into a Pandas Data Frame :param report_id: Unique ID of the report you wish to extract information from :param offset: (optional) To extract all data from the report, use 0 (default) :param limit: (optional) Used to control data extract behavior on datasets with a large number of rows. The default is 1000. As an example, if the dataset has 50,000 rows, get_report() will incrementally extract all 50,000 rows in 1,000 row chunks. Depending on system resources, a higher limit (e.g. 10,000) may reduce the total time required to extract the entire dataset :return: Pandas Data Frame containing the report contents """ # warning for future deprecation / replacement by Report class warnings.warn( "This method will be deprecated. The Report constructor is preferred.", DeprecationWarning) response = reports.report_instance(connection=self, report_id=report_id, offset=offset, limit=limit) if not response.ok: msg = "Error getting report contents." self.__response_handler(response=response, msg=msg) else: json_response = response.json() instance_id = json_response['instanceId'] # Gets the pagination totals from the response object pagination = json_response['result']['data']['paging'] # If there are more rows to fetch, fetch them if pagination['current'] != pagination['total']: # initialize a list to capture slices from each query, and append the first request's result to the list table_data = [parsejson(response=json_response)] # Fetch add'l rows from this object instance from the intelligence server for _offset in range(limit, pagination['total'], limit): response = reports.report_instance_id( connection=self, report_id=report_id, instance_id=instance_id, offset=_offset, limit=limit) table_data.append(parsejson(response=response.json())) return pd.concat(table_data) else: return parsejson(response=json_response)
def __fetch_chunks(self, parser, pagination, it_total, instance_id, limit): # Fetch add'l rows from this object instance from the intelligence server with tqdm(desc="Downloading", total=it_total+1, disable=(not self.progress_bar)) as fetch_pbar: fetch_pbar.update() for _offset in range(self._initial_limit, pagination['total'], limit): response = reports.report_instance_id(connection=self._connection, report_id=self._report_id, instance_id=instance_id, offset=_offset, limit=limit) fetch_pbar.update() fetch_pbar.set_postfix(rows=str(min(_offset+limit, pagination['total']))) parser.parse(response=response.json())
def to_dataframe(self, limit=25000, progress_bar=True): """Extract contents of a report instance into a Pandas Data Frame. Formerly `microstrategy.Connection.get_report()`. Args: limit (int, optional): Used to control data extract behavior on datasets with a large number of rows. The default is 25000. As an example, if the dataset has 50,000 rows, get_report() will incrementally extract all 50,000 rows in 1,000 row chunks. Depending on system resources, a higher limit (e.g. 10,000) may reduce the total time required to extract the entire dataset. progress_bar(bool, optional): If True (default), show the upload progress bar. Returns: Pandas Data Frame containing the report contents """ inst_pbar = tqdm( desc='Connecting to MicroStrategy I-Server. Please wait...', bar_format='{desc}', leave=False, ncols=310) # Request a new instance, set instance id res = reports.report_instance(connection=self._connection, report_id=self._report_id, body=self._filter.filter_body(), offset=self.__OFFSET, limit=limit) inst_pbar.close() if not res.ok: msg = "Error getting report contents." self.__response_handler(response=res, msg=msg) else: _instance = res.json() _instance_id = _instance['instanceId'] # Gets the pagination totals from the response object _pagination = _instance['result']['data']['paging'] # If there are more rows to fetch, fetch them if _pagination['current'] != _pagination['total']: # initialize a list to capture slices from each query, and append the first request's result to the list table_data = [parsejson(response=_instance)] # Count the number of iterations it_total = int(_pagination['total'] / limit) + (_pagination['total'] % limit != 0) # Fetch add'l rows from this object instance from the intelligence server with tqdm(total=it_total, disable=(not progress_bar)) as fetch_pbar: if progress_bar: fetch_pbar.update() fetch_pbar.set_description("Downloading") fetch_pbar.set_postfix(rows=limit) for _offset in range(limit, _pagination['total'], limit): if progress_bar: fetch_pbar.update() fetch_pbar.set_description("Downloading") fetch_pbar.set_postfix( rows=min(_offset + limit, _pagination['total'])) response = reports.report_instance_id( connection=self._connection, report_id=self._report_id, instance_id=_instance_id, offset=_offset, limit=limit) table_data.append(parsejson(response=response.json())) # concatenate and return the list of result data as a data frame self._dataframe = pd.concat(table_data).reset_index(drop=True) else: # otherwise parse the first result and return it as a dataframe self._dataframe = parsejson(response=_instance) return self._dataframe
def to_dataframe(self, limit=None): """Extract contents of a report instance into a Pandas Data Frame. Formerly `microstrategy.Connection.get_report()`. Args: limit (None or int, optional): Used to control data extract behavior. By default (None) the limit is calculated automatically, based on an optimized physical size of one chunk. Setting limit manually will force the number of rows per chunk. Depending on system resources, a higher limit (e.g. 50,000) may reduce the total time required to extract the entire dataset. Returns: Pandas Data Frame containing the report contents """ inst_pbar = tqdm(desc='Initializing an instance of a report. Please wait...', bar_format='{desc}', leave=False, ncols=285, disable=(not self.progress_bar)) # Switch off subtotals if I-Server version is higher than 11.2.1 body = self._filter.filter_body() if version.parse(self._connection.iserver_version) >= version.parse("11.2.0100"): self._subtotals["visible"] = False body["subtotals"] = {"visible": self._subtotals["visible"]} if limit: self._initial_limit = limit # Request a new instance, set instance id res = reports.report_instance(connection=self._connection, report_id=self._report_id, body=body, offset=self.__OFFSET, limit=self._initial_limit) inst_pbar.close() # Gets the pagination totals from the response object _instance = res.json() _instance_id = _instance['instanceId'] _pagination = _instance['data']['paging'] # initialize parser and process first response p = Parser(response=_instance, parse_cube=False) p.parse(response=_instance) # If there are more rows to fetch, fetch them if _pagination['current'] != _pagination['total']: if not limit: limit = max(1000, int((self._initial_limit * self._size_limit) / len(res.content))) it_total = int((_pagination['total']-self._initial_limit)/limit) + ((_pagination['total']-self._initial_limit) % limit != 0) if self.parallel and it_total > 1: threads = helper.get_parallel_number(it_total) with FuturesSession(executor=ThreadPoolExecutor(max_workers=threads)) as session: fetch_pbar = tqdm(desc="Downloading", total=it_total+1, disable=(not self.progress_bar)) future = self.__fetch_chunks_future(session, _pagination, _instance_id, limit) fetch_pbar.update() for i, f in enumerate(future, start=1): response = f.result() if not response.ok: current_offset = self._initial_limit+(i-1)*limit response = reports.report_instance_id(connection=self._connection, report_id=self._report_id, instance_id=_instance_id, offset=current_offset, limit=limit) fetch_pbar.update() fetch_pbar.set_postfix(rows=str(min(self._initial_limit+i*limit, _pagination['total']))) p.parse(response.json()) fetch_pbar.close() else: self.__fetch_chunks(p, _pagination, it_total, _instance_id, limit) # return parsed data as a data frame self._dataframe = p.dataframe # filter received dataframe if report had crosstabs and filters were applied if self.cross_tab_filter != {}: if self.cross_tab_filter['metrics'] is not None: # drop metrics columns from dataframe metr_names = [el['name'] for el in list(filter(lambda x: x['id'] not in self.cross_tab_filter['metrics'], self._metrics))] self._dataframe = self._dataframe.drop(metr_names, axis=1) if self.cross_tab_filter['attr_elements'] is not None: # create dict of attributes and elements to iterate through attr_dict = {} for attribute in self.cross_tab_filter['attr_elements']: key = attribute[:32] attr_dict.setdefault(key, []).append(attribute[33:]) # initialize indexes series for filter indexes = pd.Series([False] * len(self._dataframe)) # logical OR for filtered attribute elements for attribute in attr_dict: attr_name = list(filter(lambda x: x['id'] in attribute, self._attributes))[0]['name'] elements = attr_dict[attribute] indexes = indexes | self._dataframe[attr_name].isin(elements) # select datframe indexes with self._dataframe = self._dataframe[indexes] if self.cross_tab_filter['attributes'] is not None: attr_names = [el['name'] for el in list(filter(lambda x: x['id'] not in self.cross_tab_filter['attributes'], self._attributes))] self._dataframe = self._dataframe.drop(attr_names, axis=1) return self._dataframe