def get_chain(self, name=None, data_keys=None, filters=None, x=None, y=None, views=None, post_process=True, orient_on=None, select=None): """ Construct a "chain" shaped subset of Links and their Views from the Stack. A chain is a one-to-one or one-to-many relation with an orientation that defines from which axis (x or y) it is build. Parameters ---------- name : str, optional If not provided the name of the chain is generated automatically. data_keys, filters, x, y, views : str or list of str Views will be added reflecting the order in ``views`` parameter. If both ``x`` and ``y`` have multiple items, you must specify the ``orient_on`` parameter. post_process : bool, default True If file meta is found, views inside the chain will get their Values- axes codes checked against the category lists and missing codes will be added. orient_on : {'x', 'y'}, optional Must be specified if both ``x`` and ``y`` are lists of multiple items. select : tbc. :TODO: document this! Returns ------- chain : Chain object instance """ #Make sure all the given keys are in lists data_keys = self._force_key_as_list(data_keys) # filters = self._force_key_as_list(filters) views = self._force_key_as_list(views) if orient_on: if x is None: x = self.describe()['x'].drop_duplicates().values.tolist() if y is None: y = self.describe()['y'].drop_duplicates().values.tolist() if views is None: views = self._Stack__view_keys views = [v for v in views if '|default|' not in v] return self.__get_chains(name=name, data_keys=data_keys, filters=filters, x=x, y=y, views=views, post_process=post_process, orientation=orient_on, select=select) else: chain = Chain(name) found_views = [] missed_views = [] #Make sure all the given keys are in lists x = self._force_key_as_list(x) y = self._force_key_as_list(y) if data_keys is None: # Apply lazy data_keys if none given data_keys = self.keys() the_filter = "no_filter" if filters is None else filters if self.__has_list(data_keys): for key in data_keys: # Use describe method to get x keys if not supplied. if x is None: x_keys = self.describe()['x'].drop_duplicates( ).values.tolist() else: x_keys = x # Use describe method to get y keys if not supplied. if y is None: y_keys = self.describe()['y'].drop_duplicates( ).values.tolist() else: y_keys = y # Use describe method to get view keys if not supplied. if views is None: v_keys = self.describe()['view'].drop_duplicates( ).values.tolist() v_keys = [ v_key for v_key in v_keys if '|default|' not in v_key ] else: v_keys = views chain._validate_x_y_combination(x_keys, y_keys, orient_on) chain._derive_attributes(key, the_filter, x_keys, y_keys, views) # Apply lazy name if none given if name is None: chain._lazy_name() for x_key in x_keys: for y_key in y_keys: if views is None: chain[key][the_filter][x_key][y_key] = self[ key][the_filter][x_key][y_key] else: for view in views: try: chain[key][the_filter][x_key][y_key][ view] = self[key][the_filter][ x_key][y_key][view] if view not in found_views: found_views.append(view) except KeyError: if view not in missed_views: missed_views.append(view) else: raise ValueError( 'One or more of your data_keys ({data_keys}) is not in the stack ({stack_keys})' .format(data_keys=data_keys, stack_keys=self.keys())) if found_views: chain.views = [ view for view in chain.views if view in found_views ] for view in missed_views: if view in found_views: missed_views.remove(view) if post_process: chain._post_process_shapes(self[chain.data_key].meta) if select is not None: for view in chain[key][the_filter][x_key][y_key]: df = chain[key][the_filter][x_key][y_key][view].dataframe levels = df.index.levels selection = {} for var in select: level = functions.find_variable_level(levels, var) if level is not None: selection[var] = level #Don't do anything if the selection doesnt produce a result if selection: # selection = {var: functions.find_variable_level(levels, var) for var in select} list_of_dfs = [ df.xs(var, level=selection[var]) for var in selection.keys() ] new_df = pd.concat(list_of_dfs) # Reconstruct the index new_df.index = pd.MultiIndex.from_product( [levels[0], selection.keys()], names=df.index.names) chain[key][the_filter][x_key][y_key][ view].dataframe = new_df return chain
def get_chain(self, name=None, data_keys=None, filters=None, x=None, y=None, views=None, post_process=True, orient_on=None, select=None): """ Construct a "chain" shaped subset of Links and their Views from the Stack. A chain is a one-to-one or one-to-many relation with an orientation that defines from which axis (x or y) it is build. Parameters ---------- name : str, optional If not provided the name of the chain is generated automatically. data_keys, filters, x, y, views : str or list of str Views will be added reflecting the order in ``views`` parameter. If both ``x`` and ``y`` have multiple items, you must specify the ``orient_on`` parameter. post_process : bool, default True If file meta is found, views inside the chain will get their Values- axes codes checked against the category lists and missing codes will be added. orient_on : {'x', 'y'}, optional Must be specified if both ``x`` and ``y`` are lists of multiple items. select : tbc. :TODO: document this! Returns ------- chain : Chain object instance """ #Make sure all the given keys are in lists data_keys = self._force_key_as_list(data_keys) # filters = self._force_key_as_list(filters) views = self._force_key_as_list(views) if orient_on: if x is None: x = self.describe()['x'].drop_duplicates().values.tolist() if y is None: y = self.describe()['y'].drop_duplicates().values.tolist() if views is None: views = self._Stack__view_keys views = [v for v in views if '|default|' not in v] return self.__get_chains(name=name, data_keys=data_keys, filters=filters, x=x, y=y, views=views, post_process=post_process, orientation=orient_on, select=select) else: chain = Chain(name) found_views = [] missed_views = [] #Make sure all the given keys are in lists x = self._force_key_as_list(x) y = self._force_key_as_list(y) if data_keys is None: # Apply lazy data_keys if none given data_keys = self.keys() the_filter = "no_filter" if filters is None else filters if self.__has_list(data_keys): for key in data_keys: # Use describe method to get x keys if not supplied. if x is None: x_keys = self.describe()['x'].drop_duplicates().values.tolist() else: x_keys = x # Use describe method to get y keys if not supplied. if y is None: y_keys = self.describe()['y'].drop_duplicates().values.tolist() else: y_keys = y # Use describe method to get view keys if not supplied. if views is None: v_keys = self.describe()['view'].drop_duplicates().values.tolist() v_keys = [v_key for v_key in v_keys if '|default|' not in v_key] else: v_keys = views chain._validate_x_y_combination(x_keys, y_keys, orient_on) chain._derive_attributes(key,the_filter,x_keys,y_keys,views) # Apply lazy name if none given if name is None: chain._lazy_name() for x_key in x_keys: for y_key in y_keys: if views is None: chain[key][the_filter][x_key][y_key] = self[key][the_filter][x_key][y_key] else: for view in views: try: chain[key][the_filter][x_key][y_key][view] = self[key][the_filter][x_key][y_key][view] if view not in found_views: found_views.append(view) except KeyError: if view not in missed_views: missed_views.append(view) else: raise ValueError('One or more of your data_keys ({data_keys}) is not in the stack ({stack_keys})'.format(data_keys=data_keys, stack_keys=self.keys())) if found_views: chain.views = [view for view in chain.views if view in found_views] for view in missed_views: if view in found_views: missed_views.remove(view) if post_process: chain._post_process_shapes(self[chain.data_key].meta) if select is not None: for view in chain[key][the_filter][x_key][y_key]: df = chain[key][the_filter][x_key][y_key][view].dataframe levels = df.index.levels selection = {} for var in select: level = functions.find_variable_level(levels, var) if level is not None: selection[var] = level #Don't do anything if the selection doesnt produce a result if selection: # selection = {var: functions.find_variable_level(levels, var) for var in select} list_of_dfs = [df.xs(var, level=selection[var]) for var in selection.keys()] new_df = pd.concat(list_of_dfs) # Reconstruct the index new_df.index= pd.MultiIndex.from_product([levels[0],selection.keys()], names=df.index.names) chain[key][the_filter][x_key][y_key][view].dataframe = new_df return chain