Example #1
0
def _hstack_date_internal(dates, subclass=TypeRegister.Date):
    '''
    hstacks Date / DateSpan objects and returns a new Date / DateSpan object.

    Will be called by riptable.hstack() if the first item in the sequence is a Date object.

    Parameters
    ----------
    dates : list or tuple of Date / DateSpan objects

    Examples
    --------
    >>> d1 = Date('2015-02-01')
    >>> d2 = Date(['2016-02-01', '2017-02-01', '2018-02-01'])
    >>> hstack([d1, d2])
    Date([2015-02-01, 2016-02-01, 2017-02-01, 2018-02-01])
    '''
    if len(dates) == 0:
        return subclass([])
    for d in dates:
        if not isinstance(d, subclass):
            # maybe extend this to support stacking with regular DateTimeNano objects?
            raise TypeError(
                f'Could not perform Date.hstack() on item of type {type(d)}')
    if len(dates) == 1:
        return dates

    stacked = rc.HStack(dates)
    return subclass.newclassfrominstance(stacked, dates[0])
Example #2
0
def _hstack_timespan(tspans, destroy=False):
    '''
    TODO: maybe add type checking?
    This is a very simple class, rewrap the hstack result in class.
    NOTE: destroy ignored
    '''
    ts = rc.HStack(tspans)
    return TypeRegister.TimeSpan(ts)
Example #3
0
def _hstack_datetimenano(dtlist, destroy=False):
    '''
    Performs an hstack on a list of DateTimeNano objects.
    All items in list must have their display set to the same timezone.
    NOTE: destroy ignored
    '''
    # make sure all of the date time nano objects are set to be displayed relative to the same timezone
    timezone = dtlist[0]._timezone._timezone_str
    for dt in dtlist:
        if not isinstance(dt, TypeRegister.DateTimeNano):
            raise TypeError(
                f"Items to be hstacked must be DateTimeNano objects.")
        if dt._timezone._timezone_str != timezone:
            raise NotImplementedError(
                f"Can only hstack DateTimeNano objects in the same timezone.")
    if len(dtlist) == 1:
        return dtlist

    # hstack int64 utc nano arrays
    arr = rc.HStack(dtlist)

    # reconstruct with first item
    return TypeRegister.DateTimeNano.newclassfrominstance(arr, dtlist[0])
Example #4
0
def _hstack_categorical(cats: list,
                        verbose: bool = False,
                        destroy: bool = False):
    '''
    HStack Categoricals.

    The unique categories will be merged into a new unique list.
    The indices will be fixed to point to the new category array.
    The indices are hstacked and a new categorical is returned.

    Parameters
    ----------
    cats : list of Categorical
        Cats must be a list of categoricals.
    verbose : bool
        Enable verbose output. Defaults to False.
    destroy : bool
        This parameter is ignored by this function.

    Returns
    -------
    Categorical

    Examples
    --------
    >>> c1 = Categorical(['a','b','c'])
    >>> c2 = Categorical(['d','e','f'])
    >>> combined = Categorical.hstack([c1,c2])
    >>> combined
    Categorical([a, b, c, d, e, f]) Length: 6
        FastArray([1, 2, 3, 4, 5, 6]) Base Index: 1
        FastArray([b'a', b'b', b'c', b'd', b'e', b'f'], dtype='|S1') Unique count: 6
    '''
    def attrs_match(attrlist, name):
        # ensure certain attributes are the same for all categoricals being stacked
        attrs = set(attrlist)
        if len(attrs) != 1:
            raise TypeError(
                f"hstack found {len(attrlist)} different values of the '{name}' attribute in provided Categoricals. Must all be the same."
            )
        return list(attrs)[0]

    # collect all the categorical modes and all the base indexes
    modes = []
    bases = []
    for cat in cats:
        if not isinstance(cat, TypeRegister.Categorical):
            raise TypeError(
                f"Categorical hstack is for categoricals, not {type(cat)}")
        #if cat.base_index not in (1, None):
        #    raise TypeError(f"only categoricals with base index 1 can be merged (to preserve invalid values).")
        modes.append(cat.category_mode)
        bases.append(cat.base_index)

    # all categoricals must be in same mode and have same base index
    mode = attrs_match(modes, 'mode')
    base_index = attrs_match(bases, 'base index')

    # the first categorical determines the ordered kwarg
    ordered = cats[0].ordered
    sort_display = cats[0].sort_gb

    #==========================
    # todo: see _multistack_categoricals int rt_sds.py
    # stack indices
    # this will stack the fastarrays
    indices = rc.HStack(cats)
    idx_cutoffs = TypeRegister.FastArray([len(c._fa) for c in cats],
                                         dtype=np.int64).cumsum()

    #------------------------- start rebuild here
    if mode in (CategoryMode.Dictionary, CategoryMode.IntEnum):

        # -----------------------
        # use info from grouping objects to stack
        glist = [c.grouping for c in cats]

        underlying = hstack([[*g._grouping_dict.values()][0] for g in glist])
        # stack all unique string arrays
        listnames = hstack([g._enum.category_array for g in glist])

        # collect, measure, stack integer arrays
        listcodes = [g._enum.code_array for g in glist]
        unique_cutoffs = [
            TypeRegister.FastArray([len(c) for c in listcodes],
                                   dtype=np.int64).cumsum()
        ]
        listcodes = hstack(listcodes)

        # send in as two arrays
        listcats = [listcodes, listnames]

        # -----------------------
        base_index = None
        indices, listcats = merge_cats(indices,
                                       listcats,
                                       unique_cutoffs=unique_cutoffs,
                                       from_mapping=True,
                                       ordered=ordered,
                                       verbose=verbose)
        # TJD added check
        code = listcats[0][0]
        if isinstance(code, (int, np.integer)):
            # EXCEPT first value is string, and second is int
            newcats = dict(zip(listcats[1], listcats[0]))
        else:
            newcats = dict(zip(listcats[0], listcats[1]))

    else:
        category_dict = {}
        # now we need stack the unique cats
        for c in cats:

            # it might be multikey
            for i, v in enumerate(c.category_dict.values()):
                cv = category_dict.get(i, None)
                if cv is None:
                    category_dict[i] = [v]
                else:
                    cv.append(v)
                    category_dict[i] = cv

        listcats = []
        lastv = []
        for v in category_dict.values():
            listcats.append(hstack(v))
            lastv = v

        unique_cutoffs = [
            TypeRegister.FastArray([len(v) for v in lastv],
                                   dtype=np.int64).cumsum()
        ]

        indices, newcats = merge_cats(indices,
                                      listcats,
                                      idx_cutoffs=idx_cutoffs,
                                      unique_cutoffs=unique_cutoffs,
                                      verbose=verbose,
                                      base_index=base_index,
                                      ordered=ordered)

    newcats = TypeRegister.Grouping(indices,
                                    categories=newcats,
                                    _trusted=True,
                                    base_index=base_index,
                                    ordered=ordered,
                                    sort_display=sort_display)
    result = TypeRegister.Categorical(newcats)
    return result