def _hstack_date_internal(dates, subclass=TypeRegister.Date): ''' hstacks Date / DateSpan objects and returns a new Date / DateSpan object. Will be called by riptable.hstack() if the first item in the sequence is a Date object. Parameters ---------- dates : list or tuple of Date / DateSpan objects Examples -------- >>> d1 = Date('2015-02-01') >>> d2 = Date(['2016-02-01', '2017-02-01', '2018-02-01']) >>> hstack([d1, d2]) Date([2015-02-01, 2016-02-01, 2017-02-01, 2018-02-01]) ''' if len(dates) == 0: return subclass([]) for d in dates: if not isinstance(d, subclass): # maybe extend this to support stacking with regular DateTimeNano objects? raise TypeError( f'Could not perform Date.hstack() on item of type {type(d)}') if len(dates) == 1: return dates stacked = rc.HStack(dates) return subclass.newclassfrominstance(stacked, dates[0])
def _hstack_timespan(tspans, destroy=False): ''' TODO: maybe add type checking? This is a very simple class, rewrap the hstack result in class. NOTE: destroy ignored ''' ts = rc.HStack(tspans) return TypeRegister.TimeSpan(ts)
def _hstack_datetimenano(dtlist, destroy=False): ''' Performs an hstack on a list of DateTimeNano objects. All items in list must have their display set to the same timezone. NOTE: destroy ignored ''' # make sure all of the date time nano objects are set to be displayed relative to the same timezone timezone = dtlist[0]._timezone._timezone_str for dt in dtlist: if not isinstance(dt, TypeRegister.DateTimeNano): raise TypeError( f"Items to be hstacked must be DateTimeNano objects.") if dt._timezone._timezone_str != timezone: raise NotImplementedError( f"Can only hstack DateTimeNano objects in the same timezone.") if len(dtlist) == 1: return dtlist # hstack int64 utc nano arrays arr = rc.HStack(dtlist) # reconstruct with first item return TypeRegister.DateTimeNano.newclassfrominstance(arr, dtlist[0])
def _hstack_categorical(cats: list, verbose: bool = False, destroy: bool = False): ''' HStack Categoricals. The unique categories will be merged into a new unique list. The indices will be fixed to point to the new category array. The indices are hstacked and a new categorical is returned. Parameters ---------- cats : list of Categorical Cats must be a list of categoricals. verbose : bool Enable verbose output. Defaults to False. destroy : bool This parameter is ignored by this function. Returns ------- Categorical Examples -------- >>> c1 = Categorical(['a','b','c']) >>> c2 = Categorical(['d','e','f']) >>> combined = Categorical.hstack([c1,c2]) >>> combined Categorical([a, b, c, d, e, f]) Length: 6 FastArray([1, 2, 3, 4, 5, 6]) Base Index: 1 FastArray([b'a', b'b', b'c', b'd', b'e', b'f'], dtype='|S1') Unique count: 6 ''' def attrs_match(attrlist, name): # ensure certain attributes are the same for all categoricals being stacked attrs = set(attrlist) if len(attrs) != 1: raise TypeError( f"hstack found {len(attrlist)} different values of the '{name}' attribute in provided Categoricals. Must all be the same." ) return list(attrs)[0] # collect all the categorical modes and all the base indexes modes = [] bases = [] for cat in cats: if not isinstance(cat, TypeRegister.Categorical): raise TypeError( f"Categorical hstack is for categoricals, not {type(cat)}") #if cat.base_index not in (1, None): # raise TypeError(f"only categoricals with base index 1 can be merged (to preserve invalid values).") modes.append(cat.category_mode) bases.append(cat.base_index) # all categoricals must be in same mode and have same base index mode = attrs_match(modes, 'mode') base_index = attrs_match(bases, 'base index') # the first categorical determines the ordered kwarg ordered = cats[0].ordered sort_display = cats[0].sort_gb #========================== # todo: see _multistack_categoricals int rt_sds.py # stack indices # this will stack the fastarrays indices = rc.HStack(cats) idx_cutoffs = TypeRegister.FastArray([len(c._fa) for c in cats], dtype=np.int64).cumsum() #------------------------- start rebuild here if mode in (CategoryMode.Dictionary, CategoryMode.IntEnum): # ----------------------- # use info from grouping objects to stack glist = [c.grouping for c in cats] underlying = hstack([[*g._grouping_dict.values()][0] for g in glist]) # stack all unique string arrays listnames = hstack([g._enum.category_array for g in glist]) # collect, measure, stack integer arrays listcodes = [g._enum.code_array for g in glist] unique_cutoffs = [ TypeRegister.FastArray([len(c) for c in listcodes], dtype=np.int64).cumsum() ] listcodes = hstack(listcodes) # send in as two arrays listcats = [listcodes, listnames] # ----------------------- base_index = None indices, listcats = merge_cats(indices, listcats, unique_cutoffs=unique_cutoffs, from_mapping=True, ordered=ordered, verbose=verbose) # TJD added check code = listcats[0][0] if isinstance(code, (int, np.integer)): # EXCEPT first value is string, and second is int newcats = dict(zip(listcats[1], listcats[0])) else: newcats = dict(zip(listcats[0], listcats[1])) else: category_dict = {} # now we need stack the unique cats for c in cats: # it might be multikey for i, v in enumerate(c.category_dict.values()): cv = category_dict.get(i, None) if cv is None: category_dict[i] = [v] else: cv.append(v) category_dict[i] = cv listcats = [] lastv = [] for v in category_dict.values(): listcats.append(hstack(v)) lastv = v unique_cutoffs = [ TypeRegister.FastArray([len(v) for v in lastv], dtype=np.int64).cumsum() ] indices, newcats = merge_cats(indices, listcats, idx_cutoffs=idx_cutoffs, unique_cutoffs=unique_cutoffs, verbose=verbose, base_index=base_index, ordered=ordered) newcats = TypeRegister.Grouping(indices, categories=newcats, _trusted=True, base_index=base_index, ordered=ordered, sort_display=sort_display) result = TypeRegister.Categorical(newcats) return result