Python concat Examples

Programming Language: Python

Namespace/Package Name: dask.dataframe.methods

Method/Function: concat

Examples at hotexamples.com: 6

Python concat - 6 examples found. These are the top rated real world Python examples of dask.dataframe.methods.concat extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_multi.py Project: floriango/dask

 def check_and_return(ddfs, dfs, join):
     sol = concat(dfs, join=join)
     res = dd.concat(ddfs, join=join, interleave_partitions=divisions)
     assert_eq(res, sol)
     if known:
         parts = compute_as_if_collection(dd.DataFrame, res.dask,
                                          res.__dask_keys__())
         for p in [i.iloc[:0] for i in parts]:
             res._meta == p  # will error if schemas don't align
     assert not cat_index or has_known_categories(res.index) == known
     return res

Example #2

Show file

File: categorical.py Project: m-rossi/dask

def _get_categories_agg(parts):
    res = defaultdict(list)
    res_ind = []
    for p in parts:
        for k, v in p[0].items():
            res[k].append(v)
        res_ind.append(p[1])
    res = {
        k: methods.concat(v, ignore_index=True).drop_duplicates()
        for k, v in res.items()
    }
    if res_ind[0] is None:
        return res, None
    return res, res_ind[0].append(res_ind[1:]).drop_duplicates()

Example #3

Show file

File: rolling.py Project: m-rossi/dask

def _tail_timedelta(prevs, current, before):
    """Return the concatenated rows of each dataframe in ``prevs`` whose
    index is after the first observation in ``current`` - ``before``.

    Parameters
    ----------
    current : DataFrame
    prevs : list of DataFrame objects
    before : timedelta

    Returns
    -------
    overlapped : DataFrame
    """
    selected = methods.concat(
        [prev[prev.index > (current.index.min() - before)] for prev in prevs])
    return selected

Example #4

Show file

File: test_multi.py Project: floriango/dask

def test_concat_datetimeindex():
    # https://github.com/dask/dask/issues/2932
    b2 = pd.DataFrame({'x': ['a']},
                      index=pd.DatetimeIndex(['2015-03-24 00:00:16'],
                                             dtype='datetime64[ns]'))
    b3 = pd.DataFrame({'x': ['c']},
                      index=pd.DatetimeIndex(['2015-03-29 00:00:44'],
                                             dtype='datetime64[ns]'))

    b2['x'] = b2.x.astype('category').cat.set_categories(['a', 'c'])
    b3['x'] = b3.x.astype('category').cat.set_categories(['a', 'c'])

    db2 = dd.from_pandas(b2, 1)
    db3 = dd.from_pandas(b3, 1)

    result = concat([b2.iloc[:0], b3.iloc[:0]])
    assert result.index.dtype == '<M8[ns]'

    result = dd.concat([db2, db3])
    expected = pd.concat([b2, b3])
    assert_eq(result, expected)

Example #5

Show file

File: rolling.py Project: m-rossi/dask

def _combined_parts(prev_part, current_part, next_part, before, after):
    msg = ("Partition size is less than overlapping "
           "window size. Try using ``df.repartition`` "
           "to increase the partition size.")

    if prev_part is not None and isinstance(before, Integral):
        if prev_part.shape[0] != before:
            raise NotImplementedError(msg)

    if next_part is not None and isinstance(after, Integral):
        if next_part.shape[0] != after:
            raise NotImplementedError(msg)

    parts = [p for p in (prev_part, current_part, next_part) if p is not None]
    combined = methods.concat(parts)

    return CombinedOutput((
        combined,
        len(prev_part) if prev_part is not None else None,
        len(next_part) if next_part is not None else None,
    ))

Example #6

Show file

File: rolling.py Project: jakirkham/dask

def overlap_chunk(
    func, prev_part, current_part, next_part, before, after, args, kwargs
):

    msg = (
        "Partition size is less than overlapping "
        "window size. Try using ``df.repartition`` "
        "to increase the partition size."
    )

    if prev_part is not None and isinstance(before, Integral):
        if prev_part.shape[0] != before:
            raise NotImplementedError(msg)

    if next_part is not None and isinstance(after, Integral):
        if next_part.shape[0] != after:
            raise NotImplementedError(msg)

    parts = [p for p in (prev_part, current_part, next_part) if p is not None]
    combined = methods.concat(parts)
    out = func(combined, *args, **kwargs)
    if prev_part is None:
        before = None
    if isinstance(before, datetime.timedelta):
        before = len(prev_part)

    expansion = None
    if combined.shape[0] != 0:
        expansion = out.shape[0] // combined.shape[0]
    if before and expansion:
        before *= expansion
    if next_part is None:
        return out.iloc[before:]
    if isinstance(after, datetime.timedelta):
        after = len(next_part)
    if after and expansion:
        after *= expansion
    return out.iloc[before:-after]