Python summarizeの例

プログラミング言語: Python

名前空間/パッケージ名: plydata

メソッド/関数: summarize

hotexamples.comのコード掲載数: 7

Python summarize - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのplydata.summarizeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def summarize_fd_by_subject(df):

    return (
        df
        >> p.group_by("subject_id", "condition", "data_id", "headcase")
        >> p.summarize(
            fd_mean="mean(FramewiseDisplacement)",
            fd_median="median(FramewiseDisplacement)",
            fd_mean_filter="filter_mean(FramewiseDisplacement)",
            fd_median_filter="filter_median(FramewiseDisplacement)",
            perc_spikes="perc_high_motion(FramewiseDisplacement)",
        )
        >> p.do(
            lambda df: df.melt(
                id_vars=["subject_id", "data_id", "condition", "headcase"],
                value_vars=[
                    "fd_mean",
                    "fd_median",
                    "fd_mean_filter",
                    "fd_median_filter",
                    "perc_spikes",
                ],
                var_name="measure",
                value_name="val",
            )
        )
        >> p.arrange("subject_id")
        >> p.call(".reset_index", drop=True)
    )

コード例 #2

ファイルを表示

def test_count():
    df = pd.DataFrame({
        'x': [1, 2, 3, 4, 5, 6],
        'y': ['a', 'b', 'a', 'b', 'a', 'b'],
        'w': [1, 2, 1, 2, 1, 2]
    })

    result = df >> count()
    assert result.loc[0, 'n'] == 6

    result = df >> count('y')
    assert result.loc[:, 'n'].tolist() == [3, 3]

    result = df >> count('y', 'w')
    assert result.loc[:, 'n'].tolist() == [3, 3]

    result = df >> count('y', weights='w')
    assert result.loc[:, 'n'].tolist() == [3, 6]

    result2 = df >> group_by('y') >> summarize(n='sum(w)')
    assert result.equals(result2)

    result = df >> count('w-1')
    assert result.loc[:, 'w-1'].tolist() == [0, 1]
    assert result.loc[:, 'n'].tolist() == [3, 3]

    result1 = df >> group_by('y') >> count('w')
    result2 = df >> count('y', 'w')
    assert result1.plydata_groups == ['y']
    assert pd.DataFrame(result1).equals(result2)

コード例 #3

ファイルを表示

def test_tally():
    df = pd.DataFrame({
        'x': [1, 2, 3, 4, 5, 6],
        'y': ['a', 'b', 'a', 'b', 'a', 'b'],
        'w': [1, 2, 1, 2, 1, 2]
    })

    result = df >> tally()
    assert result.loc[0, 'n'] == 6

    result = df >> group_by('y') >> tally()
    assert result.loc[:, 'n'].tolist() == [3, 3]

    result = df >> group_by('y') >> tally('w')
    assert result.loc[:, 'n'].tolist() == [3, 6]

    result2 = df >> group_by('y') >> summarize(n='sum(w)')
    assert result.equals(result2)

    # External weights
    result = df >> tally(range(5))
    assert result.loc[0, 'n'] == 10

    # Sort
    result = df >> group_by('y') >> tally('w', sort=True)
    assert result.loc[:, 'n'].tolist() == [6, 3]

コード例 #4

ファイルを表示

def summarize_mpars_by_subject(df):
    return (
        df
        >> p.group_by("subject_id", "condition", "data_id", "headcase")
        >> p.summarize(
            x_mean="mean(x)",
            x_median="median(x)",
            x_std="std(x)",
            y_mean="mean(y)",
            y_median="median(y)",
            y_std="std(y)",
            z_mean="mean(z)",
            z_median="median(z)",
            z_std="std(z)",
            pitch_mean="mean(pitch)",
            pitch_median="median(pitch)",
            pitch_std="std(pitch)",
            roll_mean="mean(roll)",
            roll_median="median(roll)",
            roll_std="std(roll)",
            yaw_mean="mean(yaw)",
            yaw_median="median(yaw)",
            yaw_std="std(yaw)",
        )
        >> p.call(
            ".melt",
            id_vars=["subject_id", "data_id", "condition", "headcase"],
            value_vars=[
                "x_mean",
                "y_mean",
                "z_mean",
                "x_median",
                "y_median",
                "z_median",
                "x_std",
                "y_std",
                "z_std",
                "pitch_mean",
                "roll_mean",
                "yaw_mean",
                "pitch_median",
                "roll_median",
                "yaw_median",
                "pitch_std",
                "roll_std",
                "yaw_std",
            ],
            var_name="measure",
            value_name="val",
        )
        >> p.arrange("subject_id")
        >> p.call(".reset_index", drop=True)
    )

コード例 #5

ファイルを表示

def combine_sherlock_view_runs_and_replace(df):
    """
    Given a summarized dataframe (i.e. 1 datapoint per subject per condition/motion-direction),
    average the summary params for runs view1 and view2 for sherlocks subs and reattach to 
    the original frame such that it only contains 'view' and 'recall' conditions rather than
    'view1', 'view2', and 'recall' conditions. This is because realignment is computed on a per
    run basis, of which sherlock subs have 2 'view' runs, but summary statistics (i.e. mean FD)
    are computed as: (mean of run1 + mean of run2 / 2)
    """

    sherlock_combined = (
        df
        >> p.query("data_id == 'sherlock' and condition != 'recall'")
        >> p.group_by("subject_id", "measure", "data_id", "headcase")
        >> p.summarize(val="mean(val)")
        >> p.call(".assign", condition="view")
        >> p.select("subject_id", "data_id", "condition", "headcase", "measure", "val")
    )
    df_no_sherlock = df.query("condition == 'view' or condition == 'recall'")
    return pd.concat([df_no_sherlock, sherlock_combined], axis=0).reset_index(drop=True)

コード例 #6

ファイルを表示

ファイル: test_dataframe.py プロジェクト: yassermustfa/plydata

    def test_no_groups(self):
        result = self.df >> summarize('min(x)')
        assert result.loc[0, 'min(x)'] == 0

        result = self.df >> summarize('first(x)')
        assert result.loc[0, 'first(x)'] == 0

        result = self.df >> summarize('last(x)')
        assert result.loc[0, 'last(x)'] == 5

        result = self.df >> summarize('nth(y, 4)')
        assert result.loc[0, 'nth(y, 4)'] == 2

        result = self.df >> summarize('n_distinct(y)')
        assert result.loc[0, 'n_distinct(y)'] == 4

        result = self.df >> summarize('n()')
        assert result.loc[0, 'n()'] == 6

        result = self.df >> summarize(nth='nth(x, 100)')
        assert np.isnan(result.loc[0, 'nth'])

コード例 #7

ファイルを表示

ファイル: test_dataframe.py プロジェクト: yassermustfa/plydata

    def test_groups(self):
        result = self.df >> group_by('y') >> summarize('mean(x)')
        assert all(result['mean(x)'] == [0.5, 2.5, 4, 5])

        result = self.df >> group_by('y') >> summarize('n()')
        assert all(result['n()'] == [2, 2, 1, 1])