Python FilterRowsの例

プログラミング言語: Python

名前空間/パッケージ名: cereslib.dfutils.filter

クラス/型: FilterRows

hotexamples.comのコード掲載数: 5

Python FilterRows - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのcereslib.dfutils.filter.FilterRowsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

FilterRows(5)

filter_(5)

よく使われるメソッド

FilterRows (5)

filter_ (5)

コード例 #1

ファイルを表示

ファイル: areas_code.py プロジェクト: alpgarcia/ceres

def eventize_and_enrich(commits, git_enrich):
    logging.info("New commits: " + str(len(commits)))

    # Create events from commits
    # TODO add tests for eventize method
    git_events = Git(commits, git_enrich)
    events_df = git_events.eventize(2)

    logging.info("New events: " + str(len(events_df)))

    # Filter information
    data_filtered = FilterRows(events_df)
    events_df = data_filtered.filter_(["filepath"], "-")

    logging.info("New events filtered: " + str(len(events_df)))

    # Add filetype info
    enriched_filetype = FileType(events_df)
    events_df = enriched_filetype.enrich('filepath')

    logging.info("New Filetype events: " + str(len(events_df)))

    # Split filepath info
    enriched_filepath = FilePath(events_df)
    events_df = enriched_filepath.enrich('filepath')

    logging.info("New Filepath events: " + str(len(events_df)))

    # Deal with surrogates
    convert = ToUTF8(events_df)
    events_df = convert.enrich(["owner"])

    logging.info("Final new events: " + str(len(events_df)))

    return events_df

コード例 #2

ファイルを表示

ファイル: test_filter.py プロジェクト: vchrombie/grimoirelab-cereslib

 def test_column_not_exists(self):
     """ Test empty dataframe looking for the corresponding ValueError exception
     """
     df = pandas.DataFrame()
     data_filtered = FilterRows(df)
     with self.assertRaisesRegex(
             ValueError,
             "Column filepath not in DataFrame columns: \[\]") as context:
         data_filtered.filter_(["filepath"], "-")

コード例 #3

ファイルを表示

    def process(self, items_block):
        """Process items to add file related information.

        Eventize items creating one new item per each file found in the commit (excluding
        files with no actions performed on them). For each event, file path, file name,
        path parts, file type and file extension are added as fields.

        :param items_block: items to be processed. Expects to find ElasticSearch hits _source part only.
        """

        logger.debug("{} New commits: {}".format(self.__log_prefix,
                                                 len(items_block)))

        # Create events from commits
        git_events = Git(items_block, self._git_enrich)
        events_df = git_events.eventize(2)

        logger.debug("{} New events: {}".format(self.__log_prefix,
                                                len(events_df)))

        if len(events_df) > 0:
            # Filter information
            data_filtered = FilterRows(events_df)
            events_df = data_filtered.filter_(["filepath"], "-")

            logger.debug("{} New events filtered: {}".format(
                self.__log_prefix, len(events_df)))

            events_df['message'] = events_df['message'].str.slice(
                stop=AreasOfCode.MESSAGE_MAX_SIZE)
            logger.debug("{} Remove message content".format(self.__log_prefix))

            # Add filetype info
            enriched_filetype = FileType(events_df)
            events_df = enriched_filetype.enrich('filepath')

            logger.debug("{} New Filetype events: {}".format(
                self.__log_prefix, len(events_df)))

            # Split filepath info
            enriched_filepath = FilePath(events_df)
            events_df = enriched_filepath.enrich('filepath')

            logger.debug("{} New Filepath events: {}".format(
                self.__log_prefix, len(events_df)))

            events_df['origin'] = events_df['repository']

            # Deal with surrogates
            convert = ToUTF8(events_df)
            events_df = convert.enrich(["owner"])

        logger.debug("{} Final new events: {}".format(self.__log_prefix,
                                                      len(events_df)))

        return self.ProcessResults(processed=len(events_df),
                                   out_items=events_df)

コード例 #4

ファイルを表示

ファイル: test_filter.py プロジェクト: vchrombie/grimoirelab-cereslib

    def test_filter_rows(self):
        """ Test several cases for filtering rows by column value
        """

        # One column, values of different types
        df = pandas.DataFrame()
        filepaths = [
            '', None, '-', '/file/path', 1, True, pandas.np.nan, '-', [1, 2]
        ]
        df["filepath"] = filepaths
        data_filtered = FilterRows(df)
        df = data_filtered.filter_(["filepath"], "-")

        self.assertEqual(len(df), 7)

        # One empty column
        df = pandas.DataFrame()
        df["filepath"] = []
        data_filtered = FilterRows(df)
        df = data_filtered.filter_(["filepath"], "-")

        self.assertEqual(len(df), 0)

        # Several columns and just one empty
        df = pandas.DataFrame()
        df["filepath"] = []
        df["name"] = ["name", "-", "other", "-"]
        df["dirname"] = ["dir", "-", "-", "-"]
        data_filtered = FilterRows(df)
        df = data_filtered.filter_(["filepath", "name", "dirname"], "-")

        self.assertEqual(len(df), 1)

コード例 #5

ファイルを表示

    def process(self, items_block):
        """Process items to add file related information.

        Eventize items creating one new item per each file found in the commit (excluding
        files with no actions performed on them). For each event, file path, file name,
        path parts, file type and file extension are added as fields.

        :param items_block: items to be processed. Expects to find ElasticSearch hits _source part only.
        """

        logger.info("New commits: " + str(len(items_block)))

        # Create events from commits
        git_events = Git(items_block, self._git_enrich)
        events_df = git_events.eventize(2)

        logger.info("New events: " + str(len(events_df)))

        if len(events_df) > 0:
            # Filter information
            data_filtered = FilterRows(events_df)
            events_df = data_filtered.filter_(["filepath"], "-")

            logger.info("New events filtered: " + str(len(events_df)))

            # Add filetype info
            enriched_filetype = FileType(events_df)
            events_df = enriched_filetype.enrich('filepath')

            logger.info("New Filetype events: " + str(len(events_df)))

            # Split filepath info
            enriched_filepath = FilePath(events_df)
            events_df = enriched_filepath.enrich('filepath')

            logger.info("New Filepath events: " + str(len(events_df)))

            # Deal with surrogates
            convert = ToUTF8(events_df)
            events_df = convert.enrich(["owner"])

        logger.info("Final new events: " + str(len(events_df)))

        return self.ProcessResults(processed=len(events_df),
                                   out_items=events_df)