Python Series.head 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: dask.dataframe

클래스/타입: Series

메소드/함수: head

hotexamples.com에서의 예제들: 2

Python Series.head - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 dask.dataframe.Series.head에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

max(9)

dropna(7)

astype(6)

isin(5)

map_partitions(4)

apply(2)

between(2)

compute(2)

count(2)

fillna(2)

head(2)

isna(2)

groupby(1)

예제 #1

파일 보기

def nom_comps(
    srs: dd.Series,
    ngroups: int,
    largest: bool,
    bins: int,
    top_words: int,
    stopword: bool,
    lemmatize: bool,
    stem: bool,
) -> Dict[str, Any]:
    """
    This function aggregates all of the computations required for plot(df, Nominal())

    Parameters
    ----------
    srs
        one categorical column
    ngroups
        Number of groups to return
    largest
        If true, show the groups with the largest count,
        else show the groups with the smallest count
    bins
        number of bins for the category length frequency histogram
    top_words
        Number of highest frequency words to show in the
        wordcloud and word frequency bar chart
    stopword
        If True, remove stop words, else keep them
    lemmatize
        If True, lemmatize the words before computing
        the word frequencies, else don't
    stem
        If True, extract the stem of the words before
        computing the word frequencies, else don't
    """  # pylint: disable=too-many-arguments

    data: Dict[str, Any] = {}

    # total rows
    data["nrows"] = srs.shape[0]
    # cast the column as string type if it contains a mutable type
    first_rows = srs.head()  # dd.Series.head() triggers a (small) data read
    try:
        first_rows.apply(hash)
    except TypeError:
        srs = srs.astype(str)
    # drop null values
    srs = drop_null(srs)

    (srs, ) = dask.persist(srs)

    ## if cfg.bar_enable or cfg.pie_enable
    # counts of unique values in the series
    grps = srs.value_counts(sort=False)
    # total number of groups
    data["nuniq"] = grps.shape[0]
    # select the largest or smallest groups
    data["bar"] = grps.nlargest(ngroups) if largest else grps.nsmallest(
        ngroups)
    ##     if cfg.barchart_bars == cfg.piechart_slices:
    data["pie"] = data["bar"]
    ##     else
    ##     data["pie"] = grps.nlargest(ngroups) if largest else grps.nsmallest(ngroups)
    ##     if cfg.insights.evenness_enable
    data["chisq"] = chisquare(grps.values)

    if not first_rows.apply(lambda x: isinstance(x, str)).all():
        srs = srs.astype(
            str)  # srs must be a string to compute the value lengths
    ## if cfg.stats_enable
    data.update(calc_cat_stats(srs, bins, data["nrows"], data["nuniq"]))
    ## if cfg.word_freq_enable
    data.update(calc_word_freq(srs, top_words, stopword, lemmatize, stem))

    return data

예제 #2

파일 보기

def dataframe_from_series_of_pandas(series_of_pandas_dataframes: Series,
                                    schema: Optional[Union[pandas.DataFrame, DataFrame, Dict[Any, Any]]] = None):
    """
    pandas.DataFrameが各行に保持されているdask.dataframe.Seriesをdask.dataframe.Dataframeに変換するAPI。
    schemaを指定すればそれを結果のスキーマとして使用し、Noneなら第１パーティションの第１行に保持されているdataframeを使用する。
    series内のdataframeのindexは無視され、元のseriesのindexで上書きされる。

    Args:
        series_of_pandas_dataframes: dask dataframe from series of pandas dataframes.
        schema: schema of resutl dataframe
    Return: dask dataframe
    Examples:
        >>> import dask.dataframe
        >>> import pandas
        >>> df = pandas.DataFrame({'a': range(100), 'b': range(100, 200)})
        >>> print(df)
        ... # doctest: +NORMALIZE_WHITESPACE
             a    b
        0    0  100
        1    1  101
        2    2  102
        3    3  103
        4    4  104
        ..  ..  ...
        95  95  195
        96  96  196
        97  97  197
        98  98  198
        99  99  199
        [100 rows x 2 columns]
        >>> ddf = dask.dataframe.from_pandas(df, npartitions=4)
        >>> def build_df_from_row(row):
        ...     pdf = pandas.DataFrame({'values': [row.a, row.a + 1, row.a + 2], 'support': [row.b, row.b, row.b]})
        ...     return pdf
        >>> df_ds = ddf.apply(build_df_from_row, axis=1)
        >>> print(df_ds)
        ... # doctest: +NORMALIZE_WHITESPACE
        Dask Series Structure:
        npartitions=4
        0     object
        25       ...
        50       ...
        75       ...
        99       ...
        dtype: object
        Dask Name: apply, 8 tasks
        >>> result = dataframe_from_series_of_pandas(df_ds)
        >>> print(result)
        ... # doctest: +NORMALIZE_WHITESPACE
        Dask DataFrame Structure:
                      values support
        npartitions=4
        0              int64   int64
        25               ...     ...
        50               ...     ...
        75               ...     ...
        99               ...     ...
        Dask Name: create_pandas_dataframe_in_partition, 12 tasks
        >>> print(result.compute())
        ... # doctest: +NORMALIZE_WHITESPACE
            values  support
        0        0      100
        0        1      100
        0        2      100
        1        1      101
        1        2      101
        ..     ...      ...
        98      99      198
        98     100      198
        99      99      199
        99     100      199
        99     101      199
        [300 rows x 2 columns]
    """
    if schema is None:
        schema = series_of_pandas_dataframes.head(1).iloc[0]

    def create_pandas_dataframe_in_partition(series_chunk: pandas.Series):
        for i, v in series_chunk.iteritems():
            v.set_axis([i] * len(v.index), axis=0, inplace=True)
        df = pandas.concat(list(series_chunk), axis=0)
        return df

    ddf = series_of_pandas_dataframes.map_partitions(create_pandas_dataframe_in_partition, meta=schema)
    return ddf