Python Parser.Parser 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: mstrio.utils.parser

클래스/타입: Parser

메소드/함수: Parser

hotexamples.com에서의 예제들: 4

Python Parser.Parser - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 mstrio.utils.parser.Parser.Parser에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Parser(4)

parse(4)

자주 사용되는 메소드들

Parser (4)

parse (4)

예제 #1

파일 보기

파일: cube.py 프로젝트: jonaqp/mstrio-py

    def to_dataframe(self, limit=None, multi_df=False):
        """Extract contents of a cube into a Pandas `DataFrame`.

        Args:
            limit (None or int, optional): Used to control data extract
                behavior. By default (None) the limit is calculated
                automatically, based on an optimized physical size of one chunk.
                Setting limit manually will force the number of rows per chunk.
                Depending on system resources, a higher limit (e.g. 50,000) may
                reduce the total time required to extract the entire dataset.
            multi_df (bool, optional): If True, return a list of data frames
                resembling the table structure of the cube. If False (default),
                returns one data frame.

        Returns:
            Pandas Data Frame containing the cube contents
        """
        if limit:
            self._initial_limit = limit

        if self.instance_id is None:
            res = self.__initialize_cube(self._initial_limit)
        else:
            # try to get first chunk from already initialized instance of cube,
            # if not possible, initialize new instance
            try:
                res = self.__get_chunk(instance_id=self.instance_id,
                                       offset=0,
                                       limit=self._initial_limit)
            except requests.HTTPError:
                res = self.__initialize_cube(self._initial_limit)

        # Gets the pagination totals and instance_id from the response object
        _instance = res.json()
        _instance_id = _instance['instanceId']
        _pagination = _instance['data']['paging']

        # initialize parser and process first response
        p = Parser(response=res.json(), parse_cube=True)
        p.parse(response=_instance)

        # If there are more rows to fetch, fetch them
        if _pagination['current'] != _pagination['total']:
            if not limit:
                limit = max(
                    1000,
                    int((self._initial_limit * self._size_limit) /
                        len(res.content)))
            # Count the number of additional iterations
            it_total = int((_pagination['total'] - self._initial_limit) / limit) + \
                ((_pagination['total'] - self._initial_limit) % limit != 0)

            if self.parallel and it_total > 1:
                threads = helper.get_parallel_number(it_total)
                with FuturesSession(
                        executor=ThreadPoolExecutor(max_workers=threads),
                        session=self._connection.session) as session:
                    fetch_pbar = tqdm(desc="Downloading",
                                      total=it_total + 1,
                                      disable=(not self.progress_bar))
                    future = self.__fetch_chunks_future(
                        session, _pagination, _instance_id, limit)
                    fetch_pbar.update()
                    for i, f in enumerate(future, start=1):
                        response = f.result()
                        if not response.ok:
                            helper.response_handler(
                                response, "Error getting cube contents.")
                        fetch_pbar.update()
                        fetch_pbar.set_postfix(rows=str(
                            min(self._initial_limit +
                                i * limit, _pagination['total'])))
                        p.parse(response.json())
                    fetch_pbar.close()
            else:
                self.__fetch_chunks(p, _pagination, it_total, _instance_id,
                                    limit)

        # return parsed data as a data frame
        self._dataframe = p.dataframe
        # split dataframe to dataframes matching tables in Cube
        if multi_df:
            # split dataframe to dataframes matching tables in Cube
            self._dataframes = [
                self._dataframe[columns].copy()
                for _, columns in self.__multitable_definition().items()
            ]
            return self._dataframes
        else:
            return self._dataframe

예제 #2

파일 보기

파일: report.py 프로젝트: drmehling/mstrio-py

    def to_dataframe(self, limit: int = None) -> pd.DataFrame:
        """Extract contents of a report instance into a Pandas `DataFrame`.

        Args:
            limit (None or int, optional): Used to control data extract
                behavior. By default (None) the limit is calculated
                automatically, based on an optimized physical size of one chunk.
                Setting limit manually will force the number of rows per chunk.
                Depending on system resources, a higher limit (e.g. 50,000) may
                reduce the total time required to extract the entire dataset.

        Returns:
            Pandas Data Frame containing the report contents.
        """
        if limit:
            self._initial_limit = limit
            self.instance_id = None

        if self.instance_id is None:
            res = self.__initialize_report(self._initial_limit)
        else:
            # try to get first chunk from already initialized instance of report
            # if not possible, initialize new instance
            try:
                res = self.__get_chunk(instance_id=self.instance_id,
                                       offset=0,
                                       limit=self._initial_limit)
            except requests.HTTPError:
                res = self.__initialize_report(self._initial_limit)

        # Gets the pagination totals from the response object
        _instance = res.json()
        self.instance_id = _instance['instanceId']
        paging = _instance['data']['paging']

        # initialize parser and process first response
        p = Parser(response=_instance, parse_cube=False)
        p.parse(response=_instance)

        # If there are more rows to fetch, fetch them
        if paging['current'] != paging['total']:
            if not limit:
                limit = max(
                    1000,
                    int((self._initial_limit * self._size_limit) /
                        len(res.content)))
            # Count the number of additional iterations
            it_total = int((paging['total'] - self._initial_limit) / limit) + \
                ((paging['total'] - self._initial_limit) % limit != 0)

            if self.parallel and it_total > 1:
                threads = helper.get_parallel_number(it_total)
                with FuturesSession(
                        executor=ThreadPoolExecutor(max_workers=threads),
                        session=self._connection.session) as session:
                    fetch_pbar = tqdm(desc="Downloading",
                                      total=it_total + 1,
                                      disable=(not self.progress_bar))
                    future = self.__fetch_chunks_future(
                        session, paging, self.instance_id, limit)
                    fetch_pbar.update()
                    for i, f in enumerate(future, start=1):
                        response = f.result()
                        if not response.ok:
                            helper.response_handler(
                                response, "Error getting report contents.")
                        fetch_pbar.update()
                        fetch_pbar.set_postfix(rows=str(
                            min(self._initial_limit +
                                i * limit, paging['total'])))
                        p.parse(response.json())
                    fetch_pbar.close()
            else:
                self.__fetch_chunks(p, paging, it_total, self.instance_id,
                                    limit)

        # return parsed data as a data frame
        self._dataframe = p.dataframe

        # filter dataframe if report had crosstabs and filters were applied
        if self.cross_tab_filter != {}:
            if self.cross_tab_filter['metrics'] is not None:
                # drop metrics columns from dataframe
                metr_names = [
                    el['name'] for el in list(
                        filter(
                            lambda x: x['id'] not in self.cross_tab_filter[
                                'metrics'], self.metrics))
                ]
                self._dataframe = self._dataframe.drop(metr_names, axis=1)

            if self.cross_tab_filter['attr_elements'] is not None:
                # create dict of attributes and elements to iterate through
                attr_dict = {}
                for attribute in self.cross_tab_filter['attr_elements']:
                    key = attribute[:32]
                    attr_dict.setdefault(key, []).append(attribute[33:])
                # initialize indexes series for filter
                indexes = pd.Series([False] * len(self._dataframe))

                # logical OR for filtered attribute elements
                for attribute in attr_dict:
                    attr_name = list(
                        filter(lambda x: x['id'] in attribute,
                               self.attributes))[0]['name']
                    elements = attr_dict[attribute]
                    indexes = indexes | self._dataframe[attr_name].isin(
                        elements)
                # select datframe indexes with
                self._dataframe = self._dataframe[indexes]

            if self.cross_tab_filter['attributes'] is not None:
                attr_names = [
                    el['name'] for el in list(
                        filter(
                            lambda x: x['id'] not in self.cross_tab_filter[
                                'attributes'], self.attributes))
                ]
                # filtering out attribute forms cloumns
                to_be_removed = []
                to_be_added = []
                for attr in attr_names:
                    forms = [
                        column for column in self._dataframe.columns
                        if column.startswith(attr + '@')
                    ]
                    if forms:
                        to_be_removed.append(attr)
                        to_be_added.extend(forms)
                for elem in to_be_removed:
                    attr_names.remove(elem)
                attr_names.extend(to_be_added)
                # drop filtered out columns
                self._dataframe = self._dataframe.drop(attr_names, axis=1)
        return self._dataframe

예제 #3

파일 보기

    def to_dataframe(self, limit=None, multi_df=False):
        """Extract contents of a cube into a Pandas Data Frame. Previously `microstrategy.Connection.get_cube()`.

        Args:
            limit (None or int, optional): Used to control data extract behavior. By default (None)
                the limit is calculated automatically, based on an optimized physical size of one
                chunk. Setting limit manually will force the number of rows per chunk. Depending on
                system resources, a higher limit (e.g. 50,000) may reduce the total time required
                to extract the entire dataset.
            multi_df (bool, optional): If True, return a list of data frames resembling the table
                structure of the cube. If False (default), returns one data frame.

        Returns:
            Pandas Data Frame containing the cube contents
        """
        inst_pbar = tqdm(desc='Initializing an instance of a cube. Please wait...',
                         bar_format='{desc}', leave=False, ncols=280, disable=(not self.progress_bar))
        if limit:
            self._initial_limit = limit

        # Request a new instance, set instance id
        res = cubes.cube_instance(connection=self._connection,
                                  cube_id=self._cube_id,
                                  body=self._filter.filter_body(),
                                  offset=self.__OFFSET,
                                  limit=self._initial_limit)
        inst_pbar.close()

        # Gets the pagination totals and instance_id from the response object
        _instance = res.json()
        _instance_id = _instance['instanceId']
        _pagination = _instance['data']['paging']

        # initialize parser and process first response
        p = Parser(response=_instance, parse_cube=True)
        p.parse(response=_instance)

        # If there are more rows to fetch, fetch them
        if _pagination['current'] != _pagination['total']:
            if not limit:
                limit = max(1000, int((self._initial_limit * self._size_limit) / len(res.content)))
            # Count the number of additional iterations
            it_total = int((_pagination['total']-self._initial_limit)/limit) + ((_pagination['total']-self._initial_limit) % limit != 0)

            if self.parallel and it_total > 1:
                threads = helper.get_parallel_number(it_total)
                with FuturesSession(executor=ThreadPoolExecutor(max_workers=threads)) as session:
                    fetch_pbar = tqdm(desc="Downloading", total=it_total+1, disable=(not self.progress_bar))
                    future = self.__fetch_chunks_future(session, _pagination, _instance_id, limit)
                    fetch_pbar.update()
                    for i, f in enumerate(future, start=1):
                        response = f.result()
                        if not response.ok:
                            current_offset = self._initial_limit+(i-1)*limit
                            response = cubes.cube_instance_id(connection=self._connection, cube_id=self._cube_id,
                                                              instance_id=_instance_id, offset=current_offset,
                                                              limit=limit)
                        fetch_pbar.update()
                        fetch_pbar.set_postfix(rows=str(min(self._initial_limit+i*limit, _pagination['total'])))
                        p.parse(response.json())
                    fetch_pbar.close()
            else:
                self.__fetch_chunks(p, _pagination, it_total, _instance_id, limit)

        # return parsed data as a data frame
        self._dataframe = p.dataframe
        # split dataframe to dataframes matching tables in Cube
        if multi_df:
            # save the multitable_definition response to the instance
            self.__multitable_definition()
            # split dataframe to dataframes matching tables in Cube
            self._dataframes = [self._dataframe[columns].copy() for _, columns in self._table_definition.items()]
            return self._dataframes
        else:
            return self._dataframe

예제 #4

파일 보기

    def to_dataframe(self, limit=None):
        """Extract contents of a report instance into a Pandas Data Frame. Formerly `microstrategy.Connection.get_report()`.

        Args:
            limit (None or int, optional): Used to control data extract behavior. By default (None)
                the limit is calculated automatically, based on an optimized physical size of one
                chunk. Setting limit manually will force the number of rows per chunk. Depending on
                system resources, a higher limit (e.g. 50,000) may reduce the total time required
                to extract the entire dataset.

        Returns:
            Pandas Data Frame containing the report contents
        """
        inst_pbar = tqdm(desc='Initializing an instance of a report. Please wait...',
                         bar_format='{desc}', leave=False, ncols=285, disable=(not self.progress_bar))

        # Switch off subtotals if I-Server version is higher than 11.2.1
        body = self._filter.filter_body()
        if version.parse(self._connection.iserver_version) >= version.parse("11.2.0100"):
            self._subtotals["visible"] = False
            body["subtotals"] = {"visible": self._subtotals["visible"]}

        if limit:
            self._initial_limit = limit
        # Request a new instance, set instance id
        res = reports.report_instance(connection=self._connection,
                                      report_id=self._report_id,
                                      body=body,
                                      offset=self.__OFFSET,
                                      limit=self._initial_limit)
        inst_pbar.close()

        # Gets the pagination totals from the response object
        _instance = res.json()
        _instance_id = _instance['instanceId']
        _pagination = _instance['data']['paging']

        # initialize parser and process first response
        p = Parser(response=_instance, parse_cube=False)
        p.parse(response=_instance)

        # If there are more rows to fetch, fetch them
        if _pagination['current'] != _pagination['total']:
            if not limit:
                limit = max(1000, int((self._initial_limit * self._size_limit) / len(res.content)))
            it_total = int((_pagination['total']-self._initial_limit)/limit) + ((_pagination['total']-self._initial_limit) % limit != 0)

            if self.parallel and it_total > 1:
                threads = helper.get_parallel_number(it_total)
                with FuturesSession(executor=ThreadPoolExecutor(max_workers=threads)) as session:
                    fetch_pbar = tqdm(desc="Downloading", total=it_total+1, disable=(not self.progress_bar))
                    future = self.__fetch_chunks_future(session, _pagination, _instance_id, limit)
                    fetch_pbar.update()
                    for i, f in enumerate(future, start=1):
                        response = f.result()
                        if not response.ok:
                            current_offset = self._initial_limit+(i-1)*limit
                            response = reports.report_instance_id(connection=self._connection,
                                                                  report_id=self._report_id, instance_id=_instance_id,
                                                                  offset=current_offset, limit=limit)
                        fetch_pbar.update()
                        fetch_pbar.set_postfix(rows=str(min(self._initial_limit+i*limit, _pagination['total'])))
                        p.parse(response.json())
                    fetch_pbar.close()
            else:
                self.__fetch_chunks(p, _pagination, it_total, _instance_id, limit)

        # return parsed data as a data frame
        self._dataframe = p.dataframe

        # filter received dataframe if report had crosstabs and filters were applied
        if self.cross_tab_filter != {}:
            if self.cross_tab_filter['metrics'] is not None:
                # drop metrics columns from dataframe
                metr_names = [el['name'] for el in list(filter(lambda x: x['id'] not in self.cross_tab_filter['metrics'],
                              self._metrics))]
                self._dataframe = self._dataframe.drop(metr_names, axis=1)

            if self.cross_tab_filter['attr_elements'] is not None:
                # create dict of attributes and elements to iterate through
                attr_dict = {}
                for attribute in self.cross_tab_filter['attr_elements']:
                    key = attribute[:32]
                    attr_dict.setdefault(key, []).append(attribute[33:])
                # initialize indexes series for filter
                indexes = pd.Series([False] * len(self._dataframe))

                # logical OR for filtered attribute elements
                for attribute in attr_dict:
                    attr_name = list(filter(lambda x: x['id'] in attribute, self._attributes))[0]['name']
                    elements = attr_dict[attribute]
                    indexes = indexes | self._dataframe[attr_name].isin(elements)
                # select datframe indexes with 
                self._dataframe = self._dataframe[indexes]

            if self.cross_tab_filter['attributes'] is not None:
                attr_names = [el['name'] for el in list(filter(lambda x: x['id'] not in self.cross_tab_filter['attributes'],
                              self._attributes))]
                self._dataframe = self._dataframe.drop(attr_names, axis=1)

        return self._dataframe