def test_select_invalid_object_id(self): """Tests adding an invalid object id does not add the object id to selected filters.""" f = Filter(attributes=self.attributes, metrics=self.metrics, attr_elements=self.elements) with self.assertRaises(ValueError): f._select(object_id=self.invalid_id) self.assertEqual(f.attr_elem_selected, [])
def test_select_attribute_id(self): """Test adding an object adds the id to filter property.""" f = Filter(attributes=self.attributes, metrics=self.metrics, attr_elements=self.elements) f._select(object_id=self.attribute_sel) self.assertIn([self.attribute_sel], f.attr_selected) self.assertEqual(f.attr_elem_selected, [])
def test_filter_body_metric_list(self): """Test for presence of metric ids in the filter body.""" f = Filter(attributes=self.attributes, metrics=self.metrics, attr_elements=self.elements) f._select(self.metric_sel_list) ro = f._requested_objects() fb = f._filter_body() self.assertListEqual(fb["requestedObjects"]["metrics"], ro["metrics"])
def test_select_metric_id_list(self): """Test adding an object via list of ids adds the ids to filter property.""" f = Filter(attributes=self.attributes, metrics=self.metrics, attr_elements=self.elements) f._select(object_id=self.metric_sel_list) for obj_id in self.metric_sel_list: self.assertIn(obj_id, f.metr_selected) self.assertEqual(f.attr_elem_selected, [])
def test_requested_objects_two_metric(self): """Test that choosing 2 met should return matching requested object in body.""" f = Filter(attributes=self.attributes, metrics=self.metrics, attr_elements=self.elements) f._select(self.metric_sel_list) ro = f._requested_objects() self.assertIn("metrics", ro) self.assertCountEqual([m['id'] for m in ro['metrics']], [m['id'] for m in self.metrics])
def test_requested_objects_one_attribute(self): """Test that choosing 1 att should return matching requested object in body.""" f = Filter(attributes=self.attributes, metrics=self.metrics, attr_elements=self.elements) f._select(self.attribute_sel) ro = f._requested_objects() self.assertIn("attributes", ro) self.assertCountEqual([a['id'] for a in ro['attributes']], [a['id'] for a in self.attributes])
def test_filter_body_keys(self): """Test correctness of filter body.""" f = Filter(attributes=self.attributes, metrics=self.metrics, attr_elements=self.elements) # it should be none self.assertIsNotNone(f._filter_body()) # it should have requested objects f._select(self.attribute_sel) self.assertIn("requestedObjects", f._filter_body()) self.assertNotIn("viewFilter", f._filter_body()) f._clear() f._select(self.metric_sel) self.assertIn("requestedObjects", f._filter_body()) self.assertNotIn("viewFilter", f._filter_body()) f._clear() # it should have view filter f._select_attr_el(self.element_sel) self.assertIn("viewFilter", f._filter_body()) f._clear() # it should have requested objects and view filter f._select(self.attribute_sel) f._select(self.metric_sel) f._select_attr_el(self.element_sel) self.assertIn("requestedObjects", f._filter_body()) self.assertIn("viewFilter", f._filter_body())
def test_requested_objects_both_list(self): """Test that adding lists of attributes and metrics yields requested objects with correct elements. """ f = Filter(attributes=self.attributes, metrics=self.metrics, attr_elements=self.elements) f._select(self.attribute_sel_list) f._select(self.metric_sel_list) ro = f._requested_objects() self.assertIn("attributes", ro) self.assertIn("metrics", ro) self.assertCountEqual([a['id'] for a in ro['attributes']], [a['id'] for a in self.attributes]) self.assertCountEqual([m['id'] for m in ro['metrics']], [m['id'] for m in self.metrics])
def test_select_duplicate_attribute_id(self): """Tests adding a duplicate id does not add the second id to the selected filter.""" f = Filter(attributes=self.attributes, metrics=self.metrics, attr_elements=self.elements) f._select(object_id=self.attribute_sel) # add a duplicate with self.assertWarns(Warning): f._select(object_id=self.attribute_sel) # object id be here self.assertIn([self.attribute_sel], f.attr_selected) self.assertEqual(len(f.attr_selected), len(self.attributes)) # object id shouldnt be here self.assertEqual(f.attr_elem_selected, [])
def test_clear(self): """Test that clearing filters works.""" obj_id = self.attribute_sel_list + self.metric_sel_list el_id = self.element_sel_list f = Filter(attributes=self.attributes, metrics=self.metrics, attr_elements=self.elements) f._select(obj_id) f._select_attr_el(el_id) self.assertIsNotNone(f.attr_selected) self.assertIsNotNone(f.metr_selected) self.assertIsNotNone(f.attr_elem_selected) # reset f._clear() self.assertEqual(f.attr_elem_selected, [])
class Cube: """Access, filter, publish, and extract data from MicroStrategy in-memory cubes. Create a Cube object to load basic information on a cube dataset. Specify subset of cube to be fetched through `Cube.apply_filters()` and `Cube.clear_filters()`. Fetch dataset through `Cube.to_dataframe()` method. Attributes: connection: MicroStrategy connection object returned by `connection.Connection()`. cube_id: Identifier of a pre-existing cube containing the required data. instance_id (str): Identifier of an instance if cube instance has been already initialized, NULL by default. parallel (bool, optional): If True (default), utilize optimal number of threads to increase the download speed. If False, this feature will be disabled. progress_bar(bool, optional): If True (default), show the download progress bar. """ def __init__(self, connection, cube_id, instance_id=None, parallel=True, progress_bar=True): """Initialize an instance of a cube. Args: connection: MicroStrategy connection object returned by `connection.Connection()`. cube_id (str): Identifier of a pre-existing cube containing the required data. instance_id (str): Identifier of an instance if cube instance has been already initialized, NULL by default. parallel (bool, optional): If True (default), utilize optimal number of threads to increase the download speed. If False, this feature will be disabled. progress_bar(bool, optional): If True (default), show the download progress bar. """ if not connection.project_id: helper.exception_handler( "Please provide a project id or project name when creating the Connection object.", ConnectionError) self._connection = connection self._cube_id = cube_id self.instance_id = instance_id self.parallel = parallel self.progress_bar = True if progress_bar and config.progress_bar else False self._size_limit = 10000000 # this sets desired chunk size in bytes self._initial_limit = 1000 # initial limit for the cube_instance request self._table_definition = {} self._dataframe = None self._dataframes = [] self._attr_elements = None # load dataset information self.__info() self.__definition() self.__remove_row_count() self.__filter = Filter(attributes=self.attributes, metrics=self.metrics, row_count_metrics=self._row_counts) def to_dataframe(self, limit=None, multi_df=False): """Extract contents of a cube into a Pandas `DataFrame`. Args: limit (None or int, optional): Used to control data extract behavior. By default (None) the limit is calculated automatically, based on an optimized physical size of one chunk. Setting limit manually will force the number of rows per chunk. Depending on system resources, a higher limit (e.g. 50,000) may reduce the total time required to extract the entire dataset. multi_df (bool, optional): If True, return a list of data frames resembling the table structure of the cube. If False (default), returns one data frame. Returns: Pandas Data Frame containing the cube contents """ if limit: self._initial_limit = limit if self.instance_id is None: res = self.__initialize_cube(self._initial_limit) else: # try to get first chunk from already initialized instance of cube, # if not possible, initialize new instance try: res = self.__get_chunk(instance_id=self.instance_id, offset=0, limit=self._initial_limit) except requests.HTTPError: res = self.__initialize_cube(self._initial_limit) # Gets the pagination totals and instance_id from the response object _instance = res.json() _instance_id = _instance['instanceId'] _pagination = _instance['data']['paging'] # initialize parser and process first response p = Parser(response=res.json(), parse_cube=True) p.parse(response=_instance) # If there are more rows to fetch, fetch them if _pagination['current'] != _pagination['total']: if not limit: limit = max( 1000, int((self._initial_limit * self._size_limit) / len(res.content))) # Count the number of additional iterations it_total = int((_pagination['total'] - self._initial_limit) / limit) + \ ((_pagination['total'] - self._initial_limit) % limit != 0) if self.parallel and it_total > 1: threads = helper.get_parallel_number(it_total) with FuturesSession( executor=ThreadPoolExecutor(max_workers=threads), session=self._connection.session) as session: fetch_pbar = tqdm(desc="Downloading", total=it_total + 1, disable=(not self.progress_bar)) future = self.__fetch_chunks_future( session, _pagination, _instance_id, limit) fetch_pbar.update() for i, f in enumerate(future, start=1): response = f.result() if not response.ok: helper.response_handler( response, "Error getting cube contents.") fetch_pbar.update() fetch_pbar.set_postfix(rows=str( min(self._initial_limit + i * limit, _pagination['total']))) p.parse(response.json()) fetch_pbar.close() else: self.__fetch_chunks(p, _pagination, it_total, _instance_id, limit) # return parsed data as a data frame self._dataframe = p.dataframe # split dataframe to dataframes matching tables in Cube if multi_df: # split dataframe to dataframes matching tables in Cube self._dataframes = [ self._dataframe[columns].copy() for _, columns in self.__multitable_definition().items() ] return self._dataframes else: return self._dataframe def __fetch_chunks_future(self, future_session, pagination, instance_id, limit): # Fetch add'l rows from this object instance from the intelligence server return [ cubes.cube_instance_id_coroutine(future_session, connection=self._connection, cube_id=self._cube_id, instance_id=instance_id, offset=_offset, limit=limit) for _offset in range(self._initial_limit, pagination['total'], limit) ] def __fetch_chunks(self, parser, pagination, it_total, instance_id, limit): # Fetch add'l rows from this object instance from the intelligence server with tqdm(desc="Downloading", total=it_total + 1, disable=(not self.progress_bar)) as fetch_pbar: fetch_pbar.update() for _offset in range(self._initial_limit, pagination['total'], limit): response = self.__get_chunk(instance_id=instance_id, offset=_offset, limit=limit) fetch_pbar.update() fetch_pbar.set_postfix( rows=str(min(_offset + limit, pagination['total']))) parser.parse(response=response.json()) def __initialize_cube(self, limit): inst_pbar = tqdm( desc='Initializing an instance of a cube. Please wait...', bar_format='{desc}', leave=False, ncols=280, disable=(not self.progress_bar)) # Request a new instance, set instance id response = cubes.cube_instance(connection=self._connection, cube_id=self._cube_id, body=self.__filter._filter_body(), offset=0, limit=self._initial_limit) inst_pbar.close() return response def __get_chunk(self, instance_id, offset, limit): return cubes.cube_instance_id(connection=self._connection, cube_id=self._cube_id, instance_id=instance_id, offset=offset, limit=limit) def apply_filters(self, attributes=None, metrics=None, attr_elements=None, operator='In'): """Apply filters on the cube's objects. Filter by attributes, metrics and attribute elements. Args: attributes (list or None, optional): ids of attributes to be included in the filter. If list is empty, no attributes will be selected and metric data will be aggregated. metrics (list or None, optional): ids of metrics to be included in the filter. If list is empty, no metrics will be selected. attr_elements (list or None, optional): attribute elements to be included in the filter. operator (str, optional): a str flag used to specify if the attribute elements selected inside the filter should be included or excluded. Allowed values are: 'In', 'NotIn'. """ params = [attributes, metrics, attr_elements] filtering_is_requested = bool(not all(el is None for el in params)) if filtering_is_requested: self.__filter._clear(attributes=attributes, metrics=metrics, attr_elements=attr_elements) self.__filter.operator = operator self._select_attribute_filter_conditionally(attributes) self._select_metric_filter_conditionally(metrics) self._select_attr_el_filter_conditionally(attr_elements) def _select_attribute_filter_conditionally(self, attributes_filtered): if attributes_filtered: self.__filter._select(object_id=attributes_filtered) elif attributes_filtered is not None: self.__filter.attr_selected = [] def _select_metric_filter_conditionally(self, metrics_filtered): if metrics_filtered: self.__filter._select(object_id=metrics_filtered) elif metrics_filtered is not None: self.__filter.metr_selected = [] def _select_attr_el_filter_conditionally(self, attr_el_filtered): if attr_el_filtered is not None: self.__filter._select_attr_el(element_id=attr_el_filtered) def clear_filters(self): """Clear previously set filters, allowing all attributes, metrics, and attribute elements to be retrieved.""" self.__filter._clear() # once again remove Row Count metrics metrics_ids = [metric_id['id'] for metric_id in self.metrics] self.__filter._select(metrics_ids) def update(self, update_policy='upsert'): """Update single-table cube easily with the data frame stored in the Cube instance (cube.dataframe). Before the update, make sure that the data frame has been modified Args: update_policy(str): Update operation to perform. One of 'add' (inserts new, unique rows), 'update' (updates data in existing rows and columns), 'upsert' (updates existing data and inserts new rows), or 'replace' (replaces the existing data with new data). """ if len(self._tables) > 1: helper.exception_handler( msg="""This feature works only for the single-table cubes. \rTo update multi-table cube use Dataset class.""" ) else: table_name = self._tables[0]["name"] dataset = Dataset(self._connection, dataset_id=self._cube_id) dataset.add_table(name=table_name, data_frame=self.dataframe, update_policy=update_policy) dataset.update() def save_as(self, name, description=None, folder_id=None, table_name=None): """Creates a new single-table cube with the data frame stored in the Cube instance (cube.dataframe). Before the update, make sure that the data exists. Args: name(str): Name of cube. description(str): Description of the cube. folder_id (str, optional): ID of the shared folder that the dataset should be created within. If `None`, defaults to the user's My Reports folder. table_name (str, optional): Name of the table. If None (default), the first table name of the original cube will be used. """ if len(self._tables) > 1: helper.exception_handler( msg="""This feature works only for the single-table cubes. \rTo export multi-table cube use Dataset class.""" ) else: if table_name is None: table_name = self._tables[0]["name"] dataset = Dataset(self._connection, name=name, description=description) dataset.add_table(name=table_name, data_frame=self.dataframe, update_policy="add") dataset.create(folder_id=folder_id) def __multitable_definition(self): """Return all tables names and columns as a dictionary.""" if not self._table_definition: res_tables = datasets.dataset_definition( connection=self._connection, dataset_id=self._cube_id, fields=['tables', 'columns'], whitelist=[('ERR001', 500)]) if res_tables.ok: ds_definition = res_tables.json() for table in ds_definition['result']['definition'][ 'availableObjects']['tables']: column_list = [ column['columnName'] for column in ds_definition['result']['definition'] ['availableObjects']['columns'] if table['name'] == column['tableName'] ] self._table_definition[table['name']] = column_list return self._table_definition def __remove_row_count(self): """Remove all Row Count metrics from cube.""" row_counts = list(map(itemgetter('name'), self._row_counts)) self._metrics = list( filter(lambda x: x['name'] not in row_counts, self.metrics)) def __info(self): """Get metadata for specific cubes. Implements GET /cubes to retrieve basic metadata. """ res = cubes.cube_info(connection=self._connection, cube_id=self._cube_id) _info = res.json()["cubesInfos"][0] self._name = _info["cubeName"] self._owner_id = _info["ownerId"] self._path = _info["path"] self._last_modified = _info["modificationTime"] self._server_mode = _info["serverMode"] self._size = _info["size"] self._status = _info["status"] def __definition(self): """Get the definition of a cube, including attributes and metrics. Implements GET /v2/cubes/<cube_id>. """ res = cubes.cube_definition(connection=self._connection, cube_id=self._cube_id) _definition = res.json() full_attributes = _definition["definition"]["availableObjects"][ "attributes"] full_metrics = _definition["definition"]["availableObjects"]["metrics"] self._attributes = [{ 'name': attr['name'], 'id': attr['id'] } for attr in full_attributes] self._metrics = [{ 'name': metr['name'], 'id': metr['id'] } for metr in full_metrics] self._tables = self.__multitable_definition().keys() row_counts = [ 'Row Count - {}'.format(table_name) for table_name in self._tables ] self._row_counts = list( filter(lambda x: x['name'] in row_counts, self.metrics)) def __get_attr_elements(self, limit=50000): """Get elements of report attributes synchronously. Implements GET /reports/<report_id>/attributes/<attribute_id>/elements. """ def fetch_for_attribute(attribute): @fallback_on_timeout() def fetch_for_attribute_given_limit(limit): response = cubes.cube_single_attribute_elements( connection=self._connection, cube_id=self._cube_id, attribute_id=attribute['id'], offset=0, limit=limit) # Get total number of rows from headers. total = int(response.headers['x-mstr-total-count']) # Get attribute elements from the response. elements = response.json() # If total number of elements is bigger than the chunk size (limit), fetch them incrementally. for _offset in range(limit, total, limit): response = cubes.cube_single_attribute_elements( connection=self._connection, cube_id=self._cube_id, attribute_id=attribute['id'], offset=_offset, limit=limit) elements.extend(response.json()) # Return attribute data. return { "attribute_name": attribute['name'], "attribute_id": attribute['id'], "elements": elements } return fetch_for_attribute_given_limit(limit)[0] attr_elements = [] if self.attributes: pbar = tqdm(self.attributes, desc="Loading attribute elements", leave=False, disable=(not self.progress_bar)) attr_elements = [ fetch_for_attribute(attribute) for attribute in pbar ] pbar.close() return attr_elements def __get_attr_elements_async(self, limit=50000): """Get attribute elements. Implements GET /cubes/<cube_id>/attributes/<attribute_id>/elements. """ attr_elements = [] if self.attributes: threads = helper.get_parallel_number(len(self.attributes)) with FuturesSession( executor=ThreadPoolExecutor(max_workers=threads), session=self._connection.session) as session: # Fetch first chunk of attribute elements. futures = self.__fetch_attribute_elements_chunks( session, limit) pbar = tqdm(futures, desc="Loading attribute elements", leave=False, disable=(not self.progress_bar)) for i, future in enumerate(pbar): attr = self.attributes[i] response = future.result() if not response.ok: helper.response_handler( response, "Error getting attribute " + attr["name"] + " elements") elements = response.json() # Get total number of rows from headers. total = int(response.headers['x-mstr-total-count']) for _offset in range(limit, total, limit): response = cubes.cube_single_attribute_elements( connection=self._connection, cube_id=self._cube_id, attribute_id=attr["id"], offset=_offset, limit=limit) elements.extend(response.json()) # Append attribute data to the list of attributes. attr_elements.append({ "attribute_name": attr['name'], "attribute_id": attr['id'], "elements": elements }) pbar.close() return attr_elements def __fetch_attribute_elements_chunks(self, future_session, limit): # Fetch add'l rows from this object instance from the intelligence server return [ cubes.cube_single_attribute_elements_coroutine( future_session, connection=self._connection, cube_id=self._cube_id, attribute_id=attribute['id'], offset=0, limit=limit) for attribute in self.attributes ] @property def name(self): return self._name @property def size(self): return self._size @property def status(self): return self._status @property def path(self): return self._path @property def last_modified(self): return self._last_modified @property def owner_id(self): return self._owner_id @property def attributes(self): return self._attributes @property def metrics(self): return self._metrics @property def attr_elements(self): if not self._attr_elements: if self.parallel is True: # TODO: move the fallback inside the function to apply per-attribute, like with non-async version. self._attr_elements = fallback_on_timeout()( self.__get_attr_elements_async)(50000)[0] else: self._attr_elements = self.__get_attr_elements() self.__filter.attr_elem_selected = self._attr_elements return self._attr_elements @property def selected_attributes(self): return self.__filter.attr_selected @property def selected_metrics(self): return self.__filter.metr_selected @property def selected_attr_elements(self): return self.__filter.attr_elem_selected @property def dataframe(self): if self._dataframe is None: helper.exception_handler( msg= "Dataframe not loaded. Retrieve with Report.to_dataframe().", exception_type=Warning, throw_error=False) return self._dataframe @property def dataframes(self): if len(self._dataframes) == 0: helper.exception_handler( msg= "Dataframe not loaded. Retrieve with Report.to_dataframe().", exception_type=Warning, throw_error=False) return self._dataframes @property def table_definition(self): return self._table_definition
class Report: """Access, filter, publish, and extract data from in-memory reports. Create a Report object to load basic information on a report dataset. Specify subset of report to be fetched through `Report.apply_filters()` and `Report.clear_filters()`. Fetch dataset through `Report.to_dataframe()` method. Attributes: connection: MicroStrategy connection object returned by `connection.Connection()`. report_id: Identifier of a pre-existing report containing the required data. instance_id (str): Identifier of an instance if report instance has been already initialized, NULL by default. parallel (bool, optional): If True (default), utilize optimal number of threads to increase the download speed. If False, this feature will be disabled. progress_bar(bool, optional): If True (default), show the download progress bar. """ def __init__(self, connection: "Connection", report_id: str, instance_id: str = None, parallel: bool = True, progress_bar: bool = True): """Initialize an instance of a report. Args: connection: MicroStrategy connection object returned by `connection.Connection()`. report_id (str): Identifier of a pre-existing report containing the required data. instance_id (str): Identifier of an instance if report instance has been already initialized, NULL by default. parallel (bool, optional): If True (default), utilize optimal number of threads to increase the download speed. If False, this feature will be disabled. progress_bar(bool, optional): If True (default), show the download progress bar. """ if not connection.application_id: helper.exception_handler(( "Please provide an application id or application name when creating the" "Connection object."), ConnectionError) self._connection = connection self._report_id = report_id self.instance_id = instance_id self.parallel = parallel self.progress_bar = True if progress_bar and config.progress_bar else False self._subtotals = None self.cross_tab = False self.cross_tab_filter = {} self._size_limit = 10000000 # this sets desired chunk size in bytes self._initial_limit = 1000 # initial limit for the report_instance request self._dataframe = None self._attr_elements = None # load report information self.__definition() self.__filter = Filter(attributes=self.attributes, metrics=self.metrics) def to_dataframe(self, limit: int = None) -> pd.DataFrame: """Extract contents of a report instance into a Pandas `DataFrame`. Args: limit (None or int, optional): Used to control data extract behavior. By default (None) the limit is calculated automatically, based on an optimized physical size of one chunk. Setting limit manually will force the number of rows per chunk. Depending on system resources, a higher limit (e.g. 50,000) may reduce the total time required to extract the entire dataset. Returns: Pandas Data Frame containing the report contents. """ if limit: self._initial_limit = limit self.instance_id = None if self.instance_id is None: res = self.__initialize_report(self._initial_limit) else: # try to get first chunk from already initialized instance of report # if not possible, initialize new instance try: res = self.__get_chunk(instance_id=self.instance_id, offset=0, limit=self._initial_limit) except requests.HTTPError: res = self.__initialize_report(self._initial_limit) # Gets the pagination totals from the response object _instance = res.json() self.instance_id = _instance['instanceId'] paging = _instance['data']['paging'] # initialize parser and process first response p = Parser(response=_instance, parse_cube=False) p.parse(response=_instance) # If there are more rows to fetch, fetch them if paging['current'] != paging['total']: if not limit: limit = max( 1000, int((self._initial_limit * self._size_limit) / len(res.content))) # Count the number of additional iterations it_total = int((paging['total'] - self._initial_limit) / limit) + \ ((paging['total'] - self._initial_limit) % limit != 0) if self.parallel and it_total > 1: threads = helper.get_parallel_number(it_total) with FuturesSession( executor=ThreadPoolExecutor(max_workers=threads), session=self._connection.session) as session: fetch_pbar = tqdm(desc="Downloading", total=it_total + 1, disable=(not self.progress_bar)) future = self.__fetch_chunks_future( session, paging, self.instance_id, limit) fetch_pbar.update() for i, f in enumerate(future, start=1): response = f.result() if not response.ok: helper.response_handler( response, "Error getting report contents.") fetch_pbar.update() fetch_pbar.set_postfix(rows=str( min(self._initial_limit + i * limit, paging['total']))) p.parse(response.json()) fetch_pbar.close() else: self.__fetch_chunks(p, paging, it_total, self.instance_id, limit) # return parsed data as a data frame self._dataframe = p.dataframe # filter dataframe if report had crosstabs and filters were applied if self.cross_tab_filter != {}: if self.cross_tab_filter['metrics'] is not None: # drop metrics columns from dataframe metr_names = [ el['name'] for el in list( filter( lambda x: x['id'] not in self.cross_tab_filter[ 'metrics'], self.metrics)) ] self._dataframe = self._dataframe.drop(metr_names, axis=1) if self.cross_tab_filter['attr_elements'] is not None: # create dict of attributes and elements to iterate through attr_dict = {} for attribute in self.cross_tab_filter['attr_elements']: key = attribute[:32] attr_dict.setdefault(key, []).append(attribute[33:]) # initialize indexes series for filter indexes = pd.Series([False] * len(self._dataframe)) # logical OR for filtered attribute elements for attribute in attr_dict: attr_name = list( filter(lambda x: x['id'] in attribute, self.attributes))[0]['name'] elements = attr_dict[attribute] indexes = indexes | self._dataframe[attr_name].isin( elements) # select datframe indexes with self._dataframe = self._dataframe[indexes] if self.cross_tab_filter['attributes'] is not None: attr_names = [ el['name'] for el in list( filter( lambda x: x['id'] not in self.cross_tab_filter[ 'attributes'], self.attributes)) ] # filtering out attribute forms cloumns to_be_removed = [] to_be_added = [] for attr in attr_names: forms = [ column for column in self._dataframe.columns if column.startswith(attr + '@') ] if forms: to_be_removed.append(attr) to_be_added.extend(forms) for elem in to_be_removed: attr_names.remove(elem) attr_names.extend(to_be_added) # drop filtered out columns self._dataframe = self._dataframe.drop(attr_names, axis=1) return self._dataframe def __fetch_chunks_future(self, future_session, pagination, instance_id, limit): # Fetch add'l rows from this object instance return [ reports.report_instance_id_coroutine( future_session, connection=self._connection, report_id=self._report_id, instance_id=instance_id, offset=_offset, limit=limit, ) for _offset in range(self._initial_limit, pagination['total'], limit) ] def __fetch_chunks(self, parser, pagination, it_total, instance_id, limit): # Fetch add'l rows from this object instance with tqdm(desc="Downloading", total=it_total + 1, disable=(not self.progress_bar)) as fetch_pbar: fetch_pbar.update() for _offset in range(self._initial_limit, pagination['total'], limit): response = self.__get_chunk(instance_id=instance_id, offset=_offset, limit=limit) fetch_pbar.update() fetch_pbar.set_postfix( rows=str(min(_offset + limit, pagination['total']))) parser.parse(response=response.json()) def __initialize_report(self, limit: int) -> requests.Response: inst_pbar = tqdm( desc='Initializing an instance of a report. Please wait...', bar_format='{desc}', leave=False, ncols=285, disable=(not self.progress_bar)) # Switch off subtotals if I-Server version is higher than 11.2.1 body = self.__filter._filter_body() if version.parse(self._connection.iserver_version) >= version.parse( "11.2.0100"): self._subtotals["visible"] = False body["subtotals"] = {"visible": self._subtotals["visible"]} # Request a new instance, set instance id response = reports.report_instance( connection=self._connection, report_id=self._report_id, body=body, offset=0, limit=self._initial_limit, ) inst_pbar.close() return response def __get_chunk(self, instance_id: str, offset: int, limit: int) -> requests.Response: return reports.report_instance_id( connection=self._connection, report_id=self._report_id, instance_id=instance_id, offset=offset, limit=limit, ) def apply_filters(self, attributes: list = None, metrics: list = None, attr_elements: list = None, operator: str = 'In') -> None: """Apply filters on the reports's objects. Filter by attributes, metrics and attribute elements. Args: attributes (list or None, optional): ids of attributes to be included in the filter. If list is empty, no attributes will be selected and metric data will be aggregated. metrics (list or None, optional): ids of metrics to be included in the filter. If list is empty, no metrics will be selected. attr_elements (list or None, optional): attribute elements to be included in the filter. operator (str, optional): a str flag used to specify if the attribute elements selected inside the filter should be included or excluded. Allowed values are: 'In', 'NotIn'. """ filtering_is_requested = bool(not all( element is None for element in [attributes, metrics, attr_elements])) if self.cross_tab: self.cross_tab_filter = { 'attributes': attributes, 'metrics': metrics, 'attr_elements': attr_elements } elif filtering_is_requested: self.__filter._clear(attributes=attributes, metrics=metrics, attr_elements=attr_elements) self.__filter.operator = operator self._select_attribute_filter_conditionally(attributes) self._select_metric_filter_conditionally(metrics) self._select_attr_el_filter_conditionally(attr_elements) # Clear instance, to generate new with new filters self.instance_id = None def _select_attribute_filter_conditionally(self, attributes_filtered) -> None: if attributes_filtered: self.__filter._select(object_id=attributes_filtered) elif attributes_filtered is not None: self.__filter.attr_selected = [] def _select_metric_filter_conditionally(self, metrics_filtered) -> None: if metrics_filtered: self.__filter._select(object_id=metrics_filtered) elif metrics_filtered is not None: self.__filter.metr_selected = [] def _select_attr_el_filter_conditionally(self, attr_el_filtered) -> None: if attr_el_filtered is not None: self.__filter._select_attr_el(element_id=attr_el_filtered) def clear_filters(self) -> None: """Clear previously set filters, allowing all attributes, metrics, and attribute elements to be retrieved.""" self.__filter._clear() if self.cross_tab: self.__filter._select( object_id=[el['id'] for el in self.attributes]) self.__filter._select(object_id=[el['id'] for el in self.metrics]) # Clear instance, to generate new with new filters self.instance_id = None def __definition(self) -> None: """Get the definition of a report, including attributes and metrics. Implements GET /v2/reports/<report_id>. """ response = reports.report_definition(connection=self._connection, report_id=self._report_id).json() grid = response["definition"]["grid"] available_objects = response['definition']['availableObjects'] if version.parse(self._connection.iserver_version) >= version.parse( "11.2.0100"): self._subtotals = grid["subtotals"] self._name = response["name"] self.cross_tab = grid["crossTab"] # Check if report have custom groups or consolidations if available_objects['customGroups']: helper.exception_handler( msg="Reports with custom groups are not supported.", exception_type=ImportError) if available_objects['consolidations']: helper.exception_handler( msg="Reports with consolidations are not supported.", exception_type=ImportError) full_attributes = [] for row in grid["rows"]: if row["type"] == "attribute": full_attributes.append(row) for column in grid["columns"]: if column["type"] == "attribute": full_attributes.append(column) self._attributes = [{ 'name': attr['name'], 'id': attr['id'] } for attr in full_attributes] # Retrieve metrics from the report grid (only selected metrics) metrics_position = grid.get("metricsPosition") if metrics_position is None: self._metrics = [] else: full_metrics = grid[metrics_position["axis"]][ metrics_position["index"]]["elements"] self._metrics = [{ 'name': metr['name'], 'id': metr['id'] } for metr in full_metrics] def __get_attr_elements(self, limit: int = 50000) -> list: """Get elements of report attributes synchronously. Implements GET /reports/<report_id>/attributes/<attribute_id>/elements. """ def fetch_for_attribute(attribute): @fallback_on_timeout() def fetch_for_attribute_given_limit(limit): response = reports.report_single_attribute_elements( connection=self._connection, report_id=self._report_id, attribute_id=attribute['id'], offset=0, limit=limit, ) # Get total number of rows from headers. total = int(response.headers['x-mstr-total-count']) # Get attribute elements from the response. elements = response.json() # If total number of elements is bigger than the chunk size # (limit), fetch them incrementally. for _offset in range(limit, total, limit): response = reports.report_single_attribute_elements( connection=self._connection, report_id=self._report_id, attribute_id=attribute['id'], offset=_offset, limit=limit, ) elements.extend(response.json()) # Return attribute data. return { "attribute_name": attribute['name'], "attribute_id": attribute['id'], "elements": elements } return fetch_for_attribute_given_limit(limit)[0] attr_elements = [] if self.attributes: pbar = tqdm(self.attributes, desc="Loading attribute elements", leave=False, disable=(not self.progress_bar)) attr_elements = [ fetch_for_attribute(attribute) for attribute in pbar ] pbar.close() return attr_elements def __get_attr_elements_async(self, limit: int = 50000) -> list: """Get elements of report attributes asynchronously. Implements GET /reports/<report_id>/attributes/<attribute_id>/elements. """ attr_elements = [] if self.attributes: threads = helper.get_parallel_number(len(self.attributes)) with FuturesSession( executor=ThreadPoolExecutor(max_workers=threads), session=self._connection.session) as session: # Fetch first chunk of attribute elements. futures = self.__fetch_attribute_elements_chunks( session, limit) pbar = tqdm(futures, desc="Loading attribute elements", leave=False, disable=(not self.progress_bar)) for i, future in enumerate(pbar): attr = self.attributes[i] response = future.result() if not response.ok: helper.response_handler( response, "Error getting attribute " + attr['name'] + " elements") elements = response.json() # Get total number of rows from headers. total = int(response.headers['x-mstr-total-count']) for _offset in range(limit, total, limit): response = reports.report_single_attribute_elements( connection=self._connection, report_id=self._report_id, attribute_id=attr["id"], offset=_offset, limit=limit, ) elements.extend(response.json()) # Append attribute data to the list of attributes. attr_elements.append({ "attribute_name": attr['name'], "attribute_id": attr['id'], "elements": elements }) pbar.close() return attr_elements def __fetch_attribute_elements_chunks(self, future_session, limit: int) -> list: # Fetch add'l rows from this object instance return [ reports.report_single_attribute_elements_coroutine( future_session, connection=self._connection, report_id=self._report_id, attribute_id=attribute['id'], offset=0, limit=limit, ) for attribute in self.attributes ] @property def name(self): return self._name @property def attributes(self): return self._attributes @property def metrics(self): return self._metrics @property def attr_elements(self): if not self._attr_elements: if self.parallel is True: # TODO: move the fallback inside the function to apply # per-attribute, like with non-async version. self._attr_elements = fallback_on_timeout()( self.__get_attr_elements_async)(50000)[0] else: self._attr_elements = self.__get_attr_elements() self.__filter.attr_elem_selected = self._attr_elements return self._attr_elements @property def selected_attributes(self): return self.__filter.attr_selected @property def selected_metrics(self): return self.__filter.metr_selected @property def selected_attr_elements(self): return self.__filter.attr_elem_selected @property def dataframe(self): if self._dataframe is None: helper.exception_handler( msg= "Dataframe not loaded. Retrieve with Report.to_dataframe().", exception_type=Warning) return self._dataframe