def make_symmetric(arg): """Make object symmetric along the diagonal.""" checks.assert_type(arg, (pd.Series, pd.DataFrame)) arg = to_2d(arg) checks.assert_same_type(arg.index, arg.columns) if isinstance(arg.index, pd.MultiIndex): checks.assert_same_len(arg.index.names, arg.columns.names) names1, names2 = tuple(arg.index.names), tuple(arg.columns.names) else: names1, names2 = arg.index.name, arg.columns.name if names1 == names2: new_name = names1 else: if isinstance(arg.index, pd.MultiIndex): new_name = tuple(zip(*[names1, names2])) else: new_name = (names1, names2) idx_vals = np.unique(np.concatenate((arg.index, arg.columns))) arg = arg.copy() if isinstance(arg.index, pd.MultiIndex): unique_index = pd.MultiIndex.from_tuples(idx_vals, names=new_name) arg.index.names = new_name arg.columns.names = new_name else: unique_index = pd.Index(idx_vals, name=new_name) arg.index.name = new_name arg.columns.name = new_name df_out = pd.DataFrame(index=unique_index, columns=unique_index) df_out.loc[:, :] = arg df_out[df_out.isnull()] = arg.transpose() return df_out
def __init__(self, param_mappers): checks.assert_same_len(param_names, param_mappers) class ParamLoc: def __init__(self, obj, mapper): checks.assert_type(mapper, pd.Series) self.obj = obj if mapper.dtype == 'O': # If params are objects, we must cast them to string first # The original mapper isn't touched mapper = mapper.astype(str) self.mapper = mapper def get_indices(self, key): if self.mapper.dtype == 'O': # We must also cast the key to string if isinstance(key, slice): start = str(key.start) if key.start is not None else None stop = str(key.stop) if key.stop is not None else None key = slice(start, stop, key.step) elif isinstance(key, (list, np.ndarray)): key = list(map(str, key)) else: # Tuples, objects, etc. key = str(key) mapper = self.mapper # Use pandas to perform indexing mapper = pd.Series(np.arange(len(mapper.index)), index=mapper.values) indices = mapper.loc.__getitem__(key) if isinstance(indices, pd.Series): indices = indices.values return indices def __getitem__(self, key): indices = self.get_indices(key) is_multiple = isinstance(key, (slice, list, np.ndarray)) level_name = self.mapper.name # name of the mapper should contain level names of the params def pd_indexing_func(obj): new_obj = obj.iloc[:, indices] if not is_multiple: # If we selected only one param, then remove its columns levels to keep it clean if level_name is not None: if checks.is_frame(new_obj): if isinstance(new_obj.columns, pd.MultiIndex): new_obj.columns = index_fns.drop_levels(new_obj.columns, level_name) return new_obj return indexing_func(self.obj, pd_indexing_func) for i, param_name in enumerate(param_names): setattr(self, f'_{param_name}_loc', ParamLoc(self, param_mappers[i]))
def make_symmetric(arg): """Make `arg` symmetric. The index and columns of the resulting DataFrame will be identical. Requires the index and columns to have the same number of levels. Example: ```python-repl >>> import pandas as pd >>> from vectorbt.utils.reshape_fns import make_symmetric >>> df = pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['c', 'd']) >>> print(make_symmetric(df)) a b c d a NaN NaN 1.0 2.0 b NaN NaN 3.0 4.0 c 1.0 3.0 NaN NaN d 2.0 4.0 NaN NaN ```""" checks.assert_type(arg, (pd.Series, pd.DataFrame)) arg = to_2d(arg) checks.assert_same_type(arg.index, arg.columns) if isinstance(arg.index, pd.MultiIndex): checks.assert_same_len(arg.index.names, arg.columns.names) names1, names2 = tuple(arg.index.names), tuple(arg.columns.names) else: names1, names2 = arg.index.name, arg.columns.name if names1 == names2: new_name = names1 else: if isinstance(arg.index, pd.MultiIndex): new_name = tuple(zip(*[names1, names2])) else: new_name = (names1, names2) idx_vals = np.unique(np.concatenate((arg.index, arg.columns))) arg = arg.copy() if isinstance(arg.index, pd.MultiIndex): unique_index = pd.MultiIndex.from_tuples(idx_vals, names=new_name) arg.index.names = new_name arg.columns.names = new_name else: unique_index = pd.Index(idx_vals, name=new_name) arg.index.name = new_name arg.columns.name = new_name df_out = pd.DataFrame(index=unique_index, columns=unique_index) df_out.loc[:, :] = arg df_out[df_out.isnull()] = arg.transpose() return df_out
def group_by_to_index(index, group_by): """Convert mapper to `pd.Index`. `group_by` can be integer (level by position), string (level by name), tuple or list (multiple levels), index or series (named index with groups), or NumPy array (raw groups). !!! note Index and mapper must have the same length.""" if isinstance(group_by, (int, str, tuple, list)): group_by = select_levels(index, group_by) if not isinstance(group_by, pd.Index): group_by = pd.Index(group_by) checks.assert_same_len(index, group_by) return group_by
def test_assert_same_len(self): checks.assert_same_len([[1]], [[2]]) checks.assert_same_len([[1]], [[2, 3]]) try: checks.assert_same_len([[1]], [[2], [3]]) raise Exception except: pass
def __init__(self, param_mappers, indexing_func): checks.assert_same_len(param_names, param_mappers) for i, param_name in enumerate(param_names): setattr(self, f'_{param_name}_loc', _ParamLoc(self, param_mappers[i], indexing_func))
def from_params_pipeline(ts_list, param_list, level_names, num_outputs, custom_func, *args, pass_lists=False, param_product=False, broadcast_kwargs={}, return_raw=False, **kwargs): """A pipeline for calculating an indicator, used by `IndicatorFactory`. Does the following: * Takes one or multiple time series objects in `ts_list` and broadcasts them. For example: ```python-repl >>> sr = pd.Series([1, 2], index=['x', 'y']) >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> ts_list = [sr, df] >>> ts_list = vbt.utils.reshape_fns.broadcast(*ts_list) >>> print(ts_list[0]) a b x 1 1 y 2 2 >>> print(ts_list[1]) a b x 3 4 y 5 6 ``` * Takes one or multiple parameters in `param_list`, converts them to NumPy arrays and broadcasts them. For example: ```python-repl >>> p1, p2, p3 = 1, [2, 3, 4], [False] >>> param_list = [p1, p2, p3] >>> param_list = vbt.utils.reshape_fns.broadcast(*param_list) >>> print(param_list[0]) array([1, 1, 1]) >>> print(param_list[1]) array([2, 3, 4]) >>> print(param_list[2]) array([False, False, False]) ``` * Performs calculation using `custom_func` to build output arrays (`output_list`) and other objects (`other_list`, optional). For example: ```python-repl >>> def custom_func(ts1, ts2, p1, p2, p3, *args, **kwargs): ... return pd.DataFrame.vbt.concat( ... (ts1.values + ts2.values) + p1[0] * p2[0], ... (ts1.values + ts2.values) + p1[1] * p2[1], ... (ts1.values + ts2.values) + p1[2] * p2[2] ... ) >>> output = custom_func(*ts_list, *param_list) >>> print(output) array([[ 6, 7, 7, 8, 8, 9], [ 9, 10, 10, 11, 11, 12]]) ``` * Creates new column hierarchy based on parameters and level names. For example: ```python-repl >>> p1_columns = pd.Index(param_list[0], name='p1') >>> p2_columns = pd.Index(param_list[1], name='p2') >>> p3_columns = pd.Index(param_list[2], name='p3') >>> p_columns = vbt.utils.index_fns.stack(p1_columns, p2_columns, p3_columns) >>> new_columns = vbt.utils.index_fns.combine(p_columns, ts_list[0].columns) >>> output_df = pd.DataFrame(output, columns=new_columns) >>> print(output_df) p1 1 1 1 1 1 1 p2 2 2 3 3 4 4 p3 False False False False False False a b a b a b 0 6 7 7 8 8 9 1 9 10 10 11 11 12 ``` * Broadcasts objects in `ts_list` to match the shape of objects in `output_list` through tiling. This is done to be able to compare them and generate signals, since you cannot compare NumPy arrays that have totally different shapes, such as (2, 2) and (2, 6). For example: ```python-repl >>> new_ts_list = [ ... ts_list[0].vbt.tile(len(param_list[0]), as_columns=p_columns), ... ts_list[1].vbt.tile(len(param_list[0]), as_columns=p_columns) ... ] >>> print(new_ts_list[0]) p1 1 1 1 1 1 1 p2 2 2 3 3 4 4 p3 False False False False False False a b a b a b 0 1 1 1 1 1 1 1 2 2 2 2 2 2 ``` * Builds parameter mappers that will link parameters from `param_list` to columns in `ts_list` and `output_list`. This is done to enable column indexing using parameter values. Args: ts_list (list of array_like): A list of time series objects. At least one must be a pandas object. param_list (list of array_like): A list of parameters. Each element is either an array-like object or a single value of any type. level_names (list of str): A list of column level names corresponding to each parameter. num_outputs (int): The number of output arrays. custom_func (function): A custom calculation function. See `IndicatorFactory.from_custom_func`. *args: Arguments passed to the `custom_func`. pass_lists (bool): If True, arguments are passed to the `custom_func` as lists. Defaults to False. param_product (bool): If True, builds a Cartesian product out of all parameters. Defaults to False. broadcast_kwargs (dict, optional): Keyword arguments passed to the `vectorbt.utils.reshape_fns.broadcast` on time series objects. return_raw (bool): If True, returns the raw output without post-processing. Defaults to False. **kwargs: Keyword arguments passed to the `custom_func`. Some common arguments include `return_cache` to return cache and `cache` to pass cache. Those are only applicable to `custom_func` that supports it (`custom_func` created using `IndicatorFactory.from_apply_func` are supported by default). Returns: A list of transformed inputs (`pandas_like`), a list of generated outputs (`pandas_like`), a list of parameter arrays (`numpy.ndarray`), a list of parameter mappers (`pandas.Series`), a list of other generated outputs that are outside of `num_outputs`. """ # Check time series objects checks.assert_type(ts_list[0], (pd.Series, pd.DataFrame)) for i in range(1, len(ts_list)): ts_list[i].vbt.timeseries.validate() if len(ts_list) > 1: # Broadcast time series ts_list = reshape_fns.broadcast(*ts_list, **broadcast_kwargs, writeable=True) # Check level names checks.assert_type(level_names, (list, tuple)) checks.assert_same_len(param_list, level_names) for ts in ts_list: # Every time series object should be free of the specified level names in its columns for level_name in level_names: checks.assert_level_not_exists(ts, level_name) # Convert params to 1-dim arrays param_list = list(map(reshape_fns.to_1d, param_list)) if len(param_list) > 1: if param_product: # Make Cartesian product out of all params param_list = list(map(reshape_fns.to_1d, param_list)) param_list = list(zip(*list(itertools.product(*param_list)))) param_list = list(map(np.asarray, param_list)) else: # Broadcast such that each array has the same length param_list = reshape_fns.broadcast(*param_list, writeable=True) # Perform main calculation if pass_lists: output_list = custom_func(ts_list, param_list, *args, **kwargs) else: output_list = custom_func(*ts_list, *param_list, *args, **kwargs) if return_raw or kwargs.get('return_cache', False): return output_list # return raw cache outputs if not isinstance(output_list, (tuple, list, List)): output_list = [output_list] else: output_list = list(output_list) # Other outputs should be returned without post-processing (for example cache_dict) if len(output_list) > num_outputs: other_list = output_list[num_outputs:] else: other_list = [] # Process only the num_outputs outputs output_list = output_list[:num_outputs] if len(param_list) > 0: # Build new column levels on top of time series levels new_columns = build_column_hierarchy( param_list, level_names, reshape_fns.to_2d(ts_list[0]).columns) # Wrap into new pandas objects both time series and output objects new_ts_list = list( map(lambda x: broadcast_ts(x, param_list[0].shape[0], new_columns), ts_list)) # Build mappers to easily map between parameters and columns mapper_list = [ build_mapper(x, ts_list[0], new_columns, level_names[i]) for i, x in enumerate(param_list) ] else: # Some indicators don't have any params new_columns = reshape_fns.to_2d(ts_list[0]).columns new_ts_list = list(ts_list) mapper_list = [] output_list = list( map(lambda x: wrap_output(x, ts_list[0], new_columns), output_list)) if len(mapper_list) > 1: # Tuple object is a mapper that accepts tuples of parameters tuple_mapper = build_tuple_mapper(mapper_list, new_columns, tuple(level_names)) mapper_list.append(tuple_mapper) return new_ts_list, output_list, param_list, mapper_list, other_list
def test_assert_same_len(self): checks.assert_same_len([[1]], [[2]]) checks.assert_same_len([[1]], [[2, 3]]) with pytest.raises(Exception) as e_info: checks.assert_same_len([[1]], [[2], [3]])