def mdf_advance(self, parameter_s=""): """ Advance the current context one timestep (see %mdf_timestep). %mdf_advance [nodes...] If node is specified the value of node after the time has been advanced is returned. eg: %mdf_advance mdf.now """ args = tokenize(parameter_s) nodes = [] if args: nodes = map(lambda x: eval(x, self.shell.user_global_ns, self.shell.user_ns), args) for node in nodes: assert isinstance(node, MDFNode) cur_ctx = _get_current_context() root_ctx = cur_ctx.get_parent() or cur_ctx root_ctx.set_date(root_ctx.get_date() + self.__timestep) if len(nodes) > 0: if len(nodes) == 1: return cur_ctx[nodes[0]] return [cur_ctx[node] for node in nodes]
def mdf_advance(self, parameter_s=""): """ Advance the current context one timestep (see %mdf_timestep). %mdf_advance [nodes...] If node is specified the value of node after the time has been advanced is returned. eg: %mdf_advance mdf.now """ args = tokenize(parameter_s) nodes = [] if args: nodes = map( lambda x: eval(x, self.shell.user_global_ns, self.shell.user_ns ), args) for node in nodes: assert isinstance(node, MDFNode) cur_ctx = _get_current_context() root_ctx = cur_ctx.get_parent() or cur_ctx root_ctx.set_date(root_ctx.get_date() + self.__timestep) if len(nodes) > 0: if len(nodes) == 1: return cur_ctx[nodes[0]] return [cur_ctx[node] for node in nodes]
def mdf_evalto(self, parameter_s=""): """ Advances the current context to the end date and return a pandas dataframe of nodes evaluated on each timestep. %mdf_evalto <end_date> [nodes...] eg: %mdf_evalto 2020-01-01 <my node 1> <my node 2> """ args = tokenize(parameter_s) cur_ctx = _get_current_context() root_ctx = cur_ctx.get_parent() or cur_ctx end_date, nodes = args[0], args[1:] end_date = _parse_datetime(end_date, self.shell.user_global_ns, self.shell.user_ns) nodes = map(lambda x: eval(x, self.shell.user_global_ns, self.shell.user_ns), nodes) df_ctx = root_ctx if len(nodes) > 0 and isinstance(nodes[-1], (dict, list, tuple)): shift_sets = _get_shift_sets(args[-1], nodes.pop()) assert len(shift_sets) <= 1, "Only one shift set allowed for %mdf_evalto" if shift_sets: unused, shift_set = shift_sets[0] df_ctx = df_ctx.shift(shift_set=shift_set) df_builder = DataFrameBuilder(nodes, filter=True) date_range = pd.DateRange(cur_ctx.get_date(), end_date, offset=self.__timestep) for dt in date_range: root_ctx.set_date(dt) df_builder(dt, df_ctx) return df_builder.get_dataframe(df_ctx)
def mdf_show(self, parameter_s=""): """ Opens a new mdf viewer and adds nodes to it, or adds the nodes to an existing viewer if one is open. %mdf_show [nodes...] """ args = tokenize(parameter_s) nodes = map(lambda x: eval(x, self.shell.user_global_ns, self.shell.user_ns), args) ctx = _get_current_context() viewer.show(nodes, ctx=ctx)
def mdf_now(self, parameter_s=""): """ Gets or sets the date of the current context. %mdf_now [date] """ curr_ctx = _get_current_context() if parameter_s: now = _parse_datetime(parameter_s, self.shell.user_global_ns, self.shell.user_ns) root_ctx = curr_ctx.get_parent() or curr_ctx root_ctx.set_date(now) return curr_ctx.get_date()
def mdf_vars(self, parameter_s=""): """ Print the values of varnodes a node or list of nodes are dependent on. %mdf_vars [<node>] [[category,...]] If no nodes are specified all nodes that are currently known about will be examined. """ categories = None if parameter_s.strip().endswith("]") and "[" in parameter_s: parameter_s, categories = parameter_s.rstrip("]").rsplit("[", 1) categories = [x.strip() for x in categories.strip().split(",")] parameter_s = parameter_s.strip() nodes = parameter_s.strip().split(" ") if parameter_s else [] nodes = map(lambda x: eval(x, self.shell.user_global_ns, self.shell.user_ns), nodes) curr_ctx = _get_current_context() for node in nodes: if not (node.has_value(curr_ctx) or node.was_called(curr_ctx)): _log.warn("%s has not yet been evaluated" % node.name) # get all the varnode values varnode_values = {} def vistor(node, ctx): if isinstance(node, MDFVarNode) \ and ctx is curr_ctx \ and node is not now: varnode_values[node] = ctx[node] return True curr_ctx.visit_nodes(vistor, root_nodes=nodes or None, categories=categories or None) # put the results in a dataframe with the ctx ids as columns nodes = sorted(varnode_values.keys(), key=lambda x: (sorted(x.categories), x.short_name)) df = pd.DataFrame(data={}, index=nodes, columns=["Value", "Category"], dtype=object) for node, value in varnode_values.items(): df["Value"][node] = value df["Category"][node] = ",".join(["%s" % (c or "") for c in sorted(node.categories)]) if df.index.size == 0: print ("No matching dependencies found - has the node been evaluated?") return df.index = [n.short_name for n in df.index] print (df.to_string(float_format=lambda x: "%.3f" % x))
def mdf_show(self, parameter_s=""): """ Opens a new mdf viewer and adds nodes to it, or adds the nodes to an existing viewer if one is open. %mdf_show [nodes...] """ args = tokenize(parameter_s) nodes = map( lambda x: eval(x, self.shell.user_global_ns, self.shell.user_ns), args) ctx = _get_current_context() viewer.show(nodes, ctx=ctx)
def mdf_ctx(self, parameter_s=""): """ Gets or sets the current context. %mdf_ctx [new_ctx] """ cur_ctx = _get_current_context() if parameter_s: ctx = eval(parameter_s, self.shell.user_global_ns, self.shell.user_ns) assert isinstance(ctx, MDFContext) ctx._activate_ctx() cur_ctx = ctx return cur_ctx
def mdf_evalto(self, parameter_s=""): """ Advances the current context to the end date and return a pandas dataframe of nodes evaluated on each timestep. %mdf_evalto <end_date> [nodes...] eg: %mdf_evalto 2020-01-01 <my node 1> <my node 2> """ args = tokenize(parameter_s) cur_ctx = _get_current_context() root_ctx = cur_ctx.get_parent() or cur_ctx end_date, nodes = args[0], args[1:] end_date = _parse_datetime(end_date, self.shell.user_global_ns, self.shell.user_ns) nodes = map( lambda x: eval(x, self.shell.user_global_ns, self.shell.user_ns), nodes) df_ctx = root_ctx if len(nodes) > 0 and isinstance(nodes[-1], (dict, list, tuple)): shift_sets = _get_shift_sets(args[-1], nodes.pop()) assert len( shift_sets) <= 1, "Only one shift set allowed for %mdf_evalto" if shift_sets: unused, shift_set = shift_sets[0] df_ctx = df_ctx.shift(shift_set=shift_set) df_builder = DataFrameBuilder(nodes, filter=True) date_range = pd.DateRange(cur_ctx.get_date(), end_date, offset=self.__timestep) for dt in date_range: root_ctx.set_date(dt) df_builder(dt, df_ctx) return df_builder.get_dataframe(df_ctx)
def mdf_vars(self, parameter_s=""): """ Print the values of varnodes a node or list of nodes are dependent on. %mdf_vars [<node>] [[category,...]] If no nodes are specified all nodes that are currently known about will be examined. """ categories = None if parameter_s.strip().endswith("]") and "[" in parameter_s: parameter_s, categories = parameter_s.rstrip("]").rsplit("[", 1) categories = [x.strip() for x in categories.strip().split(",")] parameter_s = parameter_s.strip() nodes = parameter_s.strip().split(" ") if parameter_s else [] nodes = map( lambda x: eval(x, self.shell.user_global_ns, self.shell.user_ns), nodes) curr_ctx = _get_current_context() for node in nodes: if not (node.has_value(curr_ctx) or node.was_called(curr_ctx)): _log.warn("%s has not yet been evaluated" % node.name) # get all the varnode values varnode_values = {} def vistor(node, ctx): if isinstance(node, MDFVarNode) \ and ctx is curr_ctx \ and node is not now: varnode_values[node] = ctx[node] return True curr_ctx.visit_nodes(vistor, root_nodes=nodes or None, categories=categories or None) # put the results in a dataframe with the ctx ids as columns nodes = sorted(varnode_values.keys(), key=lambda x: (sorted(x.categories), x.short_name)) df = pd.DataFrame(data={}, index=nodes, columns=["Value", "Category"], dtype=object) for node, value in varnode_values.items(): df["Value"][node] = value df["Category"][node] = ",".join( ["%s" % (c or "") for c in sorted(node.categories)]) if df.index.size == 0: print( "No matching dependencies found - has the node been evaluated?" ) return df.index = [n.short_name for n in df.index] print(df.to_string(float_format=lambda x: "%.3f" % x))
def _magic_dataframe(self, parameter_s, widepanel=False, single_df=True): """Implementation for magic_dataframe and magic_widepanel""" # the first two arguments are dates, and after that it's a list of nodes # with some optional keyword args, ie %mdf_df <start> <end> node, node, node, shifts=[{x:1}, {x:2}] args = arg_names = tokenize(parameter_s) args = [ _try_eval(x, self.shell.user_global_ns, self.shell.user_ns) for x in args ] args = list(zip(arg_names, args)) start = None if len(args) > 0: arg_name, arg = args.pop(0) start = _parse_datetime(arg_name, self.shell.user_global_ns, self.shell.user_ns) end = None if len(args) > 0: arg_name, arg = args.pop(0) end = _parse_datetime(arg_name, self.shell.user_global_ns, self.shell.user_ns) # the final argument can be the number of processes to use num_processes = 0 if len(args) > 0: arg_name, arg = args[-1] if isinstance(arg, basestring) and arg.startswith("||"): arg_name, arg = args.pop() num_processes = int(arg[2:]) # the next to last parameter may be a shift set or list of # shift sets. has_shifts = False shift_sets = [{}] # always have at least one empty shift set shift_names = ["_0"] arg_name, arg = args[-1] if len(args) > 0 else (None, None) if not isinstance(arg, MDFNode): arg_name, arg = args.pop() named_shift_sets = _get_shift_sets(arg_name, arg) if named_shift_sets: shift_names, shift_sets = zip(*named_shift_sets) has_shifts = True # any remaining arguments are the nodes nodes = [] node_var_names = [] for arg_name, node in args: assert isinstance(node, MDFNode), "%s is not a node" % arg_name nodes.append(node) node_var_names.append(arg_name) curr_ctx = _get_current_context() ctxs = [None] * len(nodes) if not nodes: # get the selected nodes from the viewer if _viewer_imported: selected = viewer.get_selected() ctxs, nodes = zip(*selected) for i, (ctx, node) in enumerate(selected): assert ctx.is_shift_of(curr_ctx), \ "selected node '%s' is not in the current context" % node.name # replace any contexts that are simply the current context with None # so that shifting works correctly if ctx is curr_ctx: ctxs[i] = None # if there are shifts then all the contexts have to be None otherwise the # shifts won't work correctly. This could be relaxed later if it causes problems, # but for now this makes the code simpler. if has_shifts: assert np.array([x is None for x in ctxs]).all(), \ "Can't apply shifts when contexts are explicitly specified" # list df_builders, one per node or group of nodes callbacks = [] df_builders = [] if widepanel or not single_df: # build multiple dataframes for node, ctx in zip(nodes, ctxs): if ctx is None: df_builder = DataFrameBuilder([node], filter=True) else: df_builder = DataFrameBuilder([node], contexts=[ctx], filter=True) df_builders.append(df_builder) else: # build a single dataframe if np.array([x is None for x in ctxs]).all(): df_builder = DataFrameBuilder(nodes, filter=True) else: df_builder = DataFrameBuilder(nodes, contexts=ctxs, filter=True) df_builders.append(df_builder) # add all the dataframe builders to the callbacks callbacks.extend(df_builders) root_ctx = curr_ctx.get_parent() or curr_ctx date_range = pd.DateRange(start, end, offset=self.__timestep) # Add a progress bar to the callbacks callbacks.append(ProgressBar(date_range[0], date_range[-1])) shifted_ctxs = run(date_range, callbacks, ctx=root_ctx, shifts=shift_sets, num_processes=num_processes) if not has_shifts: shifted_ctxs = [root_ctx] # when returning a list of results because multiple shifts have been specified # use a named tuple with the items being the names of the shifts tuple_ctr = tuple if has_shifts: # Currying hell yeah tuple_ctr = partial(ShiftedResultsTuple, shift_names) if widepanel: wps = [] for shift_name, shift_set, shifted_ctx in zip( shift_names, shift_sets, shifted_ctxs): wp_dict = {} for node_var_name, df_builder in zip(node_var_names, df_builders): wp_dict[node_var_name] = df_builder.get_dataframe( shifted_ctx) wp = pd.WidePanel.from_dict(wp_dict) if has_shifts: wp = WidePanelWithShiftSet(wp, shift_name, shift_set) wps.append(wp) if len(wps) == 1: return wps[0] return tuple_ctr(*wps) # list a list of lists of dataframes # [[dfs for one shift set], [dfs for next shift set], ...] df_lists = [] for shift_name, shift_set, shifted_ctx in zip(shift_names, shift_sets, shifted_ctxs): dfs = [] for df_builder in df_builders: df = df_builder.get_dataframe(shifted_ctx) if has_shifts: df = DataFrameWithShiftSet(df, shift_name, shift_set) dfs.append(df) df_lists.append(dfs) if single_df: # flatten into a single list (there should be one dataframe per shift) dfs = reduce(operator.add, df_lists, []) if len(dfs) == 1: return dfs[0] return tuple_ctr(*dfs) if len(df_lists) == 1: return df_lists[0] return tuple_ctr(*df_lists)
def _magic_dataframe(self, parameter_s, widepanel=False, single_df=True): """Implementation for magic_dataframe and magic_widepanel""" # the first two arguments are dates, and after that it's a list of nodes # with some optional keyword args, ie %mdf_df <start> <end> node, node, node, shifts=[{x:1}, {x:2}] args = arg_names = tokenize(parameter_s) args = [_try_eval(x, self.shell.user_global_ns, self.shell.user_ns) for x in args] args = list(zip(arg_names, args)) start = None if len(args) > 0: arg_name, arg = args.pop(0) start = _parse_datetime(arg_name, self.shell.user_global_ns, self.shell.user_ns) end = None if len(args) > 0: arg_name, arg = args.pop(0) end = _parse_datetime(arg_name, self.shell.user_global_ns, self.shell.user_ns) # the final argument can be the number of processes to use num_processes = 0 if len(args) > 0: arg_name, arg = args[-1] if isinstance(arg, basestring) and arg.startswith("||"): arg_name, arg = args.pop() num_processes = int(arg[2:]) # the next to last parameter may be a shift set or list of # shift sets. has_shifts = False shift_sets = [{}] # always have at least one empty shift set shift_names = ["_0"] arg_name, arg = args[-1] if len(args) > 0 else (None, None) if not isinstance(arg, MDFNode): arg_name, arg = args.pop() named_shift_sets = _get_shift_sets(arg_name, arg) if named_shift_sets: shift_names, shift_sets = zip(*named_shift_sets) has_shifts = True # any remaining arguments are the nodes nodes = [] node_var_names = [] for arg_name, node in args: assert isinstance(node, MDFNode), "%s is not a node" % arg_name nodes.append(node) node_var_names.append(arg_name) curr_ctx = _get_current_context() ctxs = [None] * len(nodes) if not nodes: # get the selected nodes from the viewer if _viewer_imported: selected = viewer.get_selected() ctxs, nodes = zip(*selected) for i, (ctx, node) in enumerate(selected): assert ctx.is_shift_of(curr_ctx), "selected node '%s' is not in the current context" % node.name # replace any contexts that are simply the current context with None # so that shifting works correctly if ctx is curr_ctx: ctxs[i] = None # if there are shifts then all the contexts have to be None otherwise the # shifts won't work correctly. This could be relaxed later if it causes problems, # but for now this makes the code simpler. if has_shifts: assert np.array( [x is None for x in ctxs] ).all(), "Can't apply shifts when contexts are explicitly specified" # list df_builders, one per node or group of nodes callbacks = [] df_builders = [] if widepanel or not single_df: # build multiple dataframes for node, ctx in zip(nodes, ctxs): if ctx is None: df_builder = DataFrameBuilder([node], filter=True) else: df_builder = DataFrameBuilder([node], contexts=[ctx], filter=True) df_builders.append(df_builder) else: # build a single dataframe if np.array([x is None for x in ctxs]).all(): df_builder = DataFrameBuilder(nodes, filter=True) else: df_builder = DataFrameBuilder(nodes, contexts=ctxs, filter=True) df_builders.append(df_builder) # add all the dataframe builders to the callbacks callbacks.extend(df_builders) root_ctx = curr_ctx.get_parent() or curr_ctx date_range = pd.DateRange(start, end, offset=self.__timestep) # Add a progress bar to the callbacks callbacks.append(ProgressBar(date_range[0], date_range[-1])) shifted_ctxs = run(date_range, callbacks, ctx=root_ctx, shifts=shift_sets, num_processes=num_processes) if not has_shifts: shifted_ctxs = [root_ctx] # when returning a list of results because multiple shifts have been specified # use a named tuple with the items being the names of the shifts tuple_ctr = tuple if has_shifts: # Currying hell yeah tuple_ctr = partial(ShiftedResultsTuple, shift_names) if widepanel: wps = [] for shift_name, shift_set, shifted_ctx in zip(shift_names, shift_sets, shifted_ctxs): wp_dict = {} for node_var_name, df_builder in zip(node_var_names, df_builders): wp_dict[node_var_name] = df_builder.get_dataframe(shifted_ctx) wp = pd.WidePanel.from_dict(wp_dict) if has_shifts: wp = WidePanelWithShiftSet(wp, shift_name, shift_set) wps.append(wp) if len(wps) == 1: return wps[0] return tuple_ctr(*wps) # list a list of lists of dataframes # [[dfs for one shift set], [dfs for next shift set], ...] df_lists = [] for shift_name, shift_set, shifted_ctx in zip(shift_names, shift_sets, shifted_ctxs): dfs = [] for df_builder in df_builders: df = df_builder.get_dataframe(shifted_ctx) if has_shifts: df = DataFrameWithShiftSet(df, shift_name, shift_set) dfs.append(df) df_lists.append(dfs) if single_df: # flatten into a single list (there should be one dataframe per shift) dfs = reduce(operator.add, df_lists, []) if len(dfs) == 1: return dfs[0] return tuple_ctr(*dfs) if len(df_lists) == 1: return df_lists[0] return tuple_ctr(*df_lists)