def mdf_advance(self, parameter_s=""): """ Advance the current context one timestep (see %mdf_timestep). %mdf_advance [nodes...] If node is specified the value of node after the time has been advanced is returned. eg: %mdf_advance mdf.now """ args = tokenize(parameter_s) nodes = [] if args: nodes = map( lambda x: eval(x, self.shell.user_global_ns, self.shell.user_ns ), args) for node in nodes: assert isinstance(node, MDFNode) cur_ctx = _get_current_context() root_ctx = cur_ctx.get_parent() or cur_ctx root_ctx.set_date(root_ctx.get_date() + self.__timestep) if len(nodes) > 0: if len(nodes) == 1: return cur_ctx[nodes[0]] return [cur_ctx[node] for node in nodes]
def mdf_advance(self, parameter_s=""): """ Advance the current context one timestep (see %mdf_timestep). %mdf_advance [nodes...] If node is specified the value of node after the time has been advanced is returned. eg: %mdf_advance mdf.now """ args = tokenize(parameter_s) nodes = [] if args: nodes = map(lambda x: eval(x, self.shell.user_global_ns, self.shell.user_ns), args) for node in nodes: assert isinstance(node, MDFNode) cur_ctx = _get_current_context() root_ctx = cur_ctx.get_parent() or cur_ctx root_ctx.set_date(root_ctx.get_date() + self.__timestep) if len(nodes) > 0: if len(nodes) == 1: return cur_ctx[nodes[0]] return [cur_ctx[node] for node in nodes]
def mdf_xl(self, parameter_s=""): """ Export to excel a list of nodes evaluated over a date range, or DataFrames. %mdf_xl <start_date> <end_date> [nodes...] If no nodes are specified and the viewer is active the currently selected nodes are used. Alternatively, export one or more DataFrames directly: %mdf_xl df1 [, dfN ...] """ args = tokenize(parameter_s) args = [_try_eval(x, self.shell.user_global_ns, self.shell.user_ns) for x in args] if not args: raise AssertionError("Usage: %mdf_xl <start> <end> nodes...") dfs = [] # if there is at least one DataFrame at the beginning, export them directly if args and isinstance(args[0], pd.DataFrame): dfs.extend((x for x in args if isinstance(x, pd.DataFrame))) else: # create one DataFrame of nodes evaluated over a date range dfs.append(self.mdf_df(self, parameter_s)) excel.export_dataframe(dfs) # return the DataFrame if there is only 1, the complete list otherwise. can be []. if len(dfs) == 1: return dfs[0] return dfs
def mdf_evalto(self, parameter_s=""): """ Advances the current context to the end date and return a pandas dataframe of nodes evaluated on each timestep. %mdf_evalto <end_date> [nodes...] eg: %mdf_evalto 2020-01-01 <my node 1> <my node 2> """ args = tokenize(parameter_s) cur_ctx = _get_current_context() root_ctx = cur_ctx.get_parent() or cur_ctx end_date, nodes = args[0], args[1:] end_date = _parse_datetime(end_date, self.shell.user_global_ns, self.shell.user_ns) nodes = map(lambda x: eval(x, self.shell.user_global_ns, self.shell.user_ns), nodes) df_ctx = root_ctx if len(nodes) > 0 and isinstance(nodes[-1], (dict, list, tuple)): shift_sets = _get_shift_sets(args[-1], nodes.pop()) assert len(shift_sets) <= 1, "Only one shift set allowed for %mdf_evalto" if shift_sets: unused, shift_set = shift_sets[0] df_ctx = df_ctx.shift(shift_set=shift_set) df_builder = DataFrameBuilder(nodes, filter=True) date_range = pd.DateRange(cur_ctx.get_date(), end_date, offset=self.__timestep) for dt in date_range: root_ctx.set_date(dt) df_builder(dt, df_ctx) return df_builder.get_dataframe(df_ctx)
def _get_shift_sets(arg_name, shifts): """ takes a string passed to a magic function, eg "[shift_set_1,shift_set_2,{a:2}" and it's evaluated form and returns a list of shift sets: (name, shift_dict) """ if not isinstance(shifts, (dict, tuple, list)): raise Exception("Couldn't convert %s to a shift set" % arg_name) shift_sets = [] if isinstance(shifts, dict): # the shifts parameter can be a single dict shift_sets.append((_clean_varname(arg_name), shifts)) return shift_sets # or a collection of dicts # try and get the names of the shifts shift_names = [None] * len(shifts) if arg_name.startswith("[") and arg_name.endswith("]"): tokens = tokenize(arg_name.strip("[]")) if len(tokens) == len(shift_names): shift_names = map(_clean_varname, tokens) for shift_name, shift in zip(shift_names, shifts): # normally the shifts are a list of dicts if isinstance(shift, dict): if shift_name is None: shift_name = "_%d" % len(shift_sets) shift_sets.append((shift_name, shift)) # but we also allow lists of lists of dicts elif isinstance(shift, (list, tuple)): for i, x in enumerate(shift): inner_shift_name = shift_name if inner_shift_name is None: inner_shift_name = "_%d" % len(shift_sets) else: inner_shift_name = "%s_%d" % (shift_name, i) if not isinstance(x, dict): raise AssertionError( "shift %s was expected to be a dict, " "but it's a %s (%s)" % (inner_shift_name, type(x), x) ) shift_sets.append((inner_shift_name, x)) # anything else we can't deal with else: if shift_name is None: shift_name = "_%d" % len(shift_sets) raise AssertionError( "shift %s was expected to be a dict, but it's a %s (%s)" % (shift_name, type(shift), shift) ) return shift_sets
def _get_shift_sets(arg_name, shifts): """ takes a string passed to a magic function, eg "[shift_set_1,shift_set_2,{a:2}" and it's evaluated form and returns a list of shift sets: (name, shift_dict) """ if not isinstance(shifts, (dict, tuple, list)): raise Exception("Couldn't convert %s to a shift set" % arg_name) shift_sets = [] if isinstance(shifts, dict): # the shifts parameter can be a single dict shift_sets.append((_clean_varname(arg_name), shifts)) return shift_sets # or a collection of dicts # try and get the names of the shifts shift_names = [None] * len(shifts) if arg_name.startswith("[") and arg_name.endswith("]"): tokens = tokenize(arg_name.strip("[]")) if len(tokens) == len(shift_names): shift_names = map(_clean_varname, tokens) for shift_name, shift in zip(shift_names, shifts): # normally the shifts are a list of dicts if isinstance(shift, dict): if shift_name is None: shift_name = "_%d" % len(shift_sets) shift_sets.append((shift_name, shift)) # but we also allow lists of lists of dicts elif isinstance(shift, (list, tuple)): for i, x in enumerate(shift): inner_shift_name = shift_name if inner_shift_name is None: inner_shift_name = "_%d" % len(shift_sets) else: inner_shift_name = "%s_%d" % (shift_name, i) if not isinstance(x, dict): raise AssertionError("shift %s was expected to be a dict, " "but it's a %s (%s)" % (inner_shift_name, type(x), x)) shift_sets.append((inner_shift_name, x)) # anything else we can't deal with else: if shift_name is None: shift_name = "_%d" % len(shift_sets) raise AssertionError( "shift %s was expected to be a dict, but it's a %s (%s)" % (shift_name, type(shift), shift)) return shift_sets
def mdf_show(self, parameter_s=""): """ Opens a new mdf viewer and adds nodes to it, or adds the nodes to an existing viewer if one is open. %mdf_show [nodes...] """ args = tokenize(parameter_s) nodes = map(lambda x: eval(x, self.shell.user_global_ns, self.shell.user_ns), args) ctx = _get_current_context() viewer.show(nodes, ctx=ctx)
def mdf_show(self, parameter_s=""): """ Opens a new mdf viewer and adds nodes to it, or adds the nodes to an existing viewer if one is open. %mdf_show [nodes...] """ args = tokenize(parameter_s) nodes = map( lambda x: eval(x, self.shell.user_global_ns, self.shell.user_ns), args) ctx = _get_current_context() viewer.show(nodes, ctx=ctx)
def mdf_evalto(self, parameter_s=""): """ Advances the current context to the end date and return a pandas dataframe of nodes evaluated on each timestep. %mdf_evalto <end_date> [nodes...] eg: %mdf_evalto 2020-01-01 <my node 1> <my node 2> """ args = tokenize(parameter_s) cur_ctx = _get_current_context() root_ctx = cur_ctx.get_parent() or cur_ctx end_date, nodes = args[0], args[1:] end_date = _parse_datetime(end_date, self.shell.user_global_ns, self.shell.user_ns) nodes = map( lambda x: eval(x, self.shell.user_global_ns, self.shell.user_ns), nodes) df_ctx = root_ctx if len(nodes) > 0 and isinstance(nodes[-1], (dict, list, tuple)): shift_sets = _get_shift_sets(args[-1], nodes.pop()) assert len( shift_sets) <= 1, "Only one shift set allowed for %mdf_evalto" if shift_sets: unused, shift_set = shift_sets[0] df_ctx = df_ctx.shift(shift_set=shift_set) df_builder = DataFrameBuilder(nodes, filter=True) date_range = pd.DateRange(cur_ctx.get_date(), end_date, offset=self.__timestep) for dt in date_range: root_ctx.set_date(dt) df_builder(dt, df_ctx) return df_builder.get_dataframe(df_ctx)
def mdf_xl(self, parameter_s=""): """ Export to excel a list of nodes evaluated over a date range, or DataFrames. %mdf_xl <start_date> <end_date> [nodes...] If no nodes are specified and the viewer is active the currently selected nodes are used. Alternatively, export one or more DataFrames directly: %mdf_xl df1 [, dfN ...] """ args = tokenize(parameter_s) args = [ _try_eval(x, self.shell.user_global_ns, self.shell.user_ns) for x in args ] if not args: raise AssertionError("Usage: %mdf_xl <start> <end> nodes...") dfs = [] # if there is at least one DataFrame at the beginning, export them directly if args and isinstance(args[0], pd.DataFrame): dfs.extend((x for x in args if isinstance(x, pd.DataFrame))) else: # create one DataFrame of nodes evaluated over a date range dfs.append(self.mdf_df(self, parameter_s)) excel.export_dataframe(dfs) # return the DataFrame if there is only 1, the complete list otherwise. can be []. if len(dfs) == 1: return dfs[0] return dfs
def _magic_dataframe(self, parameter_s, widepanel=False, single_df=True): """Implementation for magic_dataframe and magic_widepanel""" # the first two arguments are dates, and after that it's a list of nodes # with some optional keyword args, ie %mdf_df <start> <end> node, node, node, shifts=[{x:1}, {x:2}] args = arg_names = tokenize(parameter_s) args = [ _try_eval(x, self.shell.user_global_ns, self.shell.user_ns) for x in args ] args = list(zip(arg_names, args)) start = None if len(args) > 0: arg_name, arg = args.pop(0) start = _parse_datetime(arg_name, self.shell.user_global_ns, self.shell.user_ns) end = None if len(args) > 0: arg_name, arg = args.pop(0) end = _parse_datetime(arg_name, self.shell.user_global_ns, self.shell.user_ns) # the final argument can be the number of processes to use num_processes = 0 if len(args) > 0: arg_name, arg = args[-1] if isinstance(arg, basestring) and arg.startswith("||"): arg_name, arg = args.pop() num_processes = int(arg[2:]) # the next to last parameter may be a shift set or list of # shift sets. has_shifts = False shift_sets = [{}] # always have at least one empty shift set shift_names = ["_0"] arg_name, arg = args[-1] if len(args) > 0 else (None, None) if not isinstance(arg, MDFNode): arg_name, arg = args.pop() named_shift_sets = _get_shift_sets(arg_name, arg) if named_shift_sets: shift_names, shift_sets = zip(*named_shift_sets) has_shifts = True # any remaining arguments are the nodes nodes = [] node_var_names = [] for arg_name, node in args: assert isinstance(node, MDFNode), "%s is not a node" % arg_name nodes.append(node) node_var_names.append(arg_name) curr_ctx = _get_current_context() ctxs = [None] * len(nodes) if not nodes: # get the selected nodes from the viewer if _viewer_imported: selected = viewer.get_selected() ctxs, nodes = zip(*selected) for i, (ctx, node) in enumerate(selected): assert ctx.is_shift_of(curr_ctx), \ "selected node '%s' is not in the current context" % node.name # replace any contexts that are simply the current context with None # so that shifting works correctly if ctx is curr_ctx: ctxs[i] = None # if there are shifts then all the contexts have to be None otherwise the # shifts won't work correctly. This could be relaxed later if it causes problems, # but for now this makes the code simpler. if has_shifts: assert np.array([x is None for x in ctxs]).all(), \ "Can't apply shifts when contexts are explicitly specified" # list df_builders, one per node or group of nodes callbacks = [] df_builders = [] if widepanel or not single_df: # build multiple dataframes for node, ctx in zip(nodes, ctxs): if ctx is None: df_builder = DataFrameBuilder([node], filter=True) else: df_builder = DataFrameBuilder([node], contexts=[ctx], filter=True) df_builders.append(df_builder) else: # build a single dataframe if np.array([x is None for x in ctxs]).all(): df_builder = DataFrameBuilder(nodes, filter=True) else: df_builder = DataFrameBuilder(nodes, contexts=ctxs, filter=True) df_builders.append(df_builder) # add all the dataframe builders to the callbacks callbacks.extend(df_builders) root_ctx = curr_ctx.get_parent() or curr_ctx date_range = pd.DateRange(start, end, offset=self.__timestep) # Add a progress bar to the callbacks callbacks.append(ProgressBar(date_range[0], date_range[-1])) shifted_ctxs = run(date_range, callbacks, ctx=root_ctx, shifts=shift_sets, num_processes=num_processes) if not has_shifts: shifted_ctxs = [root_ctx] # when returning a list of results because multiple shifts have been specified # use a named tuple with the items being the names of the shifts tuple_ctr = tuple if has_shifts: # Currying hell yeah tuple_ctr = partial(ShiftedResultsTuple, shift_names) if widepanel: wps = [] for shift_name, shift_set, shifted_ctx in zip( shift_names, shift_sets, shifted_ctxs): wp_dict = {} for node_var_name, df_builder in zip(node_var_names, df_builders): wp_dict[node_var_name] = df_builder.get_dataframe( shifted_ctx) wp = pd.WidePanel.from_dict(wp_dict) if has_shifts: wp = WidePanelWithShiftSet(wp, shift_name, shift_set) wps.append(wp) if len(wps) == 1: return wps[0] return tuple_ctr(*wps) # list a list of lists of dataframes # [[dfs for one shift set], [dfs for next shift set], ...] df_lists = [] for shift_name, shift_set, shifted_ctx in zip(shift_names, shift_sets, shifted_ctxs): dfs = [] for df_builder in df_builders: df = df_builder.get_dataframe(shifted_ctx) if has_shifts: df = DataFrameWithShiftSet(df, shift_name, shift_set) dfs.append(df) df_lists.append(dfs) if single_df: # flatten into a single list (there should be one dataframe per shift) dfs = reduce(operator.add, df_lists, []) if len(dfs) == 1: return dfs[0] return tuple_ctr(*dfs) if len(df_lists) == 1: return df_lists[0] return tuple_ctr(*df_lists)
def test_func_index(self): test = "2001-01-01 T nodes[0], get_nodes()[0].delaynode(periods=1)" expected = ["2001-01-01", "T", "nodes[0]", "get_nodes()[0].delaynode(periods=1)"] actual = tokenize(test) self.assertEqual(actual, expected)
def test_simple_parse(self): test = "2001-01-01 T node_a node_b" expected = ["2001-01-01", "T", "node_a", "node_b"] actual = tokenize(test) self.assertEqual(actual, expected)
def test_multiproc(self): test = "2001-01-01 T nodes[0], nodes[1] [a, b, c] || 2" expected = ["2001-01-01", "T", "nodes[0]", "nodes[1]", "[a, b, c]", "|| 2"] actual = tokenize(test) self.assertEqual(actual, expected)
def test_list(self): test = "2001-01-01 T nodes[0], nodes[1] [a, b, c]" expected = ["2001-01-01", "T", "nodes[0]", "nodes[1]", "[a, b, c]"] actual = tokenize(test) self.assertEqual(actual, expected)
def test_dict(self): test = "2001-01-01 T nodes[0], nodes[1] {a=foo(), b=hello[0], c=100}" expected = ["2001-01-01", "T", "nodes[0]", "nodes[1]", "{a=foo(), b=hello[0], c=100}"] actual = tokenize(test) self.assertEqual(actual, expected)
def _magic_dataframe(self, parameter_s, widepanel=False, single_df=True): """Implementation for magic_dataframe and magic_widepanel""" # the first two arguments are dates, and after that it's a list of nodes # with some optional keyword args, ie %mdf_df <start> <end> node, node, node, shifts=[{x:1}, {x:2}] args = arg_names = tokenize(parameter_s) args = [_try_eval(x, self.shell.user_global_ns, self.shell.user_ns) for x in args] args = list(zip(arg_names, args)) start = None if len(args) > 0: arg_name, arg = args.pop(0) start = _parse_datetime(arg_name, self.shell.user_global_ns, self.shell.user_ns) end = None if len(args) > 0: arg_name, arg = args.pop(0) end = _parse_datetime(arg_name, self.shell.user_global_ns, self.shell.user_ns) # the final argument can be the number of processes to use num_processes = 0 if len(args) > 0: arg_name, arg = args[-1] if isinstance(arg, basestring) and arg.startswith("||"): arg_name, arg = args.pop() num_processes = int(arg[2:]) # the next to last parameter may be a shift set or list of # shift sets. has_shifts = False shift_sets = [{}] # always have at least one empty shift set shift_names = ["_0"] arg_name, arg = args[-1] if len(args) > 0 else (None, None) if not isinstance(arg, MDFNode): arg_name, arg = args.pop() named_shift_sets = _get_shift_sets(arg_name, arg) if named_shift_sets: shift_names, shift_sets = zip(*named_shift_sets) has_shifts = True # any remaining arguments are the nodes nodes = [] node_var_names = [] for arg_name, node in args: assert isinstance(node, MDFNode), "%s is not a node" % arg_name nodes.append(node) node_var_names.append(arg_name) curr_ctx = _get_current_context() ctxs = [None] * len(nodes) if not nodes: # get the selected nodes from the viewer if _viewer_imported: selected = viewer.get_selected() ctxs, nodes = zip(*selected) for i, (ctx, node) in enumerate(selected): assert ctx.is_shift_of(curr_ctx), "selected node '%s' is not in the current context" % node.name # replace any contexts that are simply the current context with None # so that shifting works correctly if ctx is curr_ctx: ctxs[i] = None # if there are shifts then all the contexts have to be None otherwise the # shifts won't work correctly. This could be relaxed later if it causes problems, # but for now this makes the code simpler. if has_shifts: assert np.array( [x is None for x in ctxs] ).all(), "Can't apply shifts when contexts are explicitly specified" # list df_builders, one per node or group of nodes callbacks = [] df_builders = [] if widepanel or not single_df: # build multiple dataframes for node, ctx in zip(nodes, ctxs): if ctx is None: df_builder = DataFrameBuilder([node], filter=True) else: df_builder = DataFrameBuilder([node], contexts=[ctx], filter=True) df_builders.append(df_builder) else: # build a single dataframe if np.array([x is None for x in ctxs]).all(): df_builder = DataFrameBuilder(nodes, filter=True) else: df_builder = DataFrameBuilder(nodes, contexts=ctxs, filter=True) df_builders.append(df_builder) # add all the dataframe builders to the callbacks callbacks.extend(df_builders) root_ctx = curr_ctx.get_parent() or curr_ctx date_range = pd.DateRange(start, end, offset=self.__timestep) # Add a progress bar to the callbacks callbacks.append(ProgressBar(date_range[0], date_range[-1])) shifted_ctxs = run(date_range, callbacks, ctx=root_ctx, shifts=shift_sets, num_processes=num_processes) if not has_shifts: shifted_ctxs = [root_ctx] # when returning a list of results because multiple shifts have been specified # use a named tuple with the items being the names of the shifts tuple_ctr = tuple if has_shifts: # Currying hell yeah tuple_ctr = partial(ShiftedResultsTuple, shift_names) if widepanel: wps = [] for shift_name, shift_set, shifted_ctx in zip(shift_names, shift_sets, shifted_ctxs): wp_dict = {} for node_var_name, df_builder in zip(node_var_names, df_builders): wp_dict[node_var_name] = df_builder.get_dataframe(shifted_ctx) wp = pd.WidePanel.from_dict(wp_dict) if has_shifts: wp = WidePanelWithShiftSet(wp, shift_name, shift_set) wps.append(wp) if len(wps) == 1: return wps[0] return tuple_ctr(*wps) # list a list of lists of dataframes # [[dfs for one shift set], [dfs for next shift set], ...] df_lists = [] for shift_name, shift_set, shifted_ctx in zip(shift_names, shift_sets, shifted_ctxs): dfs = [] for df_builder in df_builders: df = df_builder.get_dataframe(shifted_ctx) if has_shifts: df = DataFrameWithShiftSet(df, shift_name, shift_set) dfs.append(df) df_lists.append(dfs) if single_df: # flatten into a single list (there should be one dataframe per shift) dfs = reduce(operator.add, df_lists, []) if len(dfs) == 1: return dfs[0] return tuple_ctr(*dfs) if len(df_lists) == 1: return df_lists[0] return tuple_ctr(*df_lists)
def test_func(self): test = "2001-01-01 T node_a node_b.queuenode(a, size=c)" expected = ["2001-01-01", "T", "node_a", "node_b.queuenode(a, size=c)"] actual = tokenize(test) self.assertEqual(actual, expected)