def run( self, n_simulations, percentile=95, query=None, query_stage_cost=False, random_state=None,): """Run a Monte Carlo simulation to evaluate the policy on the approximation model. Parameters ---------- n_simulations: int/-1 If int: the number of simulations; If -1: exhuastive evaluation. query: list, optional (default=None) The names of variables that are intended to query. query_stage_cost: bool, optional (default=False) Whether to query values of individual stage costs. percentile: float, optional (default=95) The percentile used to compute the confidence interval. random_state: int, RandomState instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by numpy.random. """ random_state = check_random_state(random_state) query = [] if query is None else list(query) MSP = self.MSP if n_simulations == -1: n_sample_paths, sample_paths = MSP._enumerate_sample_paths(MSP.T-1) else: n_sample_paths = n_simulations ub = [0] * n_sample_paths if query_stage_cost: stage_cost = [ [0 for _ in range(n_sample_paths)] for _ in range(MSP.T) ] solution = {item: [[] for _ in range(MSP.T)] for item in query} # forward Sampling for j in range(n_sample_paths): if n_simulations == -1: sample_path = sample_paths[j] state = 0 # time loop for t in range(MSP.T): if MSP.n_Markov_states == 1: m = MSP.models[t] else: if n_simulations == -1: m = MSP.models[t][sample_path[1][t]] else: if t == 0: m = MSP.models[t][0] else: state = random_state.choice( range(MSP.n_Markov_states[t]), p=MSP.transition_matrix[t][state], ) m = MSP.models[t][state] if t > 0: m._update_link_constrs(forward_solution) if MSP.n_Markov_states == 1: scenario_index = ( sample_path[t] if n_simulations == -1 else rand_int( m.n_samples, random_state, m.probability ) ) else: scenario_index = ( sample_path[0][t] if n_simulations == -1 else rand_int( m.n_samples, random_state, m.probability ) ) m._update_uncertainty(scenario_index) m.optimize() if m.status not in [2,11]: m.write_infeasible_model("evaluation_" + str(m.modelName)) forward_solution = MSP._get_forward_solution(m, t) for var in m.getVars(): if var.varName in query: solution[var.varName][t].append(var.X) if query_stage_cost: stage_cost[t][i] = MSP._get_stage_cost(m, t) ub[j] += MSP._get_stage_cost(m, t) #! time loop #! forward Sampling self.pv = ub if n_simulations == -1: self.epv = numpy.dot( ub, [ MSP._compute_weight_sample_path(sample_paths[j]) for j in range(n_sample_paths) ], ) if n_simulations not in [-1,1]: self.CI = compute_CI(ub, percentile) self._compute_gap() self.solution = {k: pandas.DataFrame(v) for k, v in solution.items()} if query_stage_cost: self.stage_cost = pandas.DataFrame(stage_cost)
def run( self, n_simulations, query=None, query_stage_cost=False, random_state=None, percentile=95): """Run a Monte Carlo simulation to evaluate a policy on the true problem. Parameters ---------- n_simulations: int The number of simulations. query: list, optional (default=None) The names of variables that are intended to query. percentile: float, optional (default=95) The percentile used to compute the confidence interval. query_stage_cost: bool, optional (default=False) Whether to query values of individual stage costs. random_state: int, RandomState instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by numpy.random. """ MSP = self.MSP if MSP.__class__.__name__ == 'MSIP': MSP._back_binarize() # discrete finite model should call evaluate instead if ( MSP._type in ["stage-wise independent", "Markov chain"] and MSP._individual_type == "original" and not hasattr(MSP,"bin_stage") ): return super().run( n_simulations=n_simulations, query=query, query_stage_cost=query_stage_cost, percentile=percentile, random_state=random_state, ) if n_simulations <= 0: raise ValueError("number of simulations must be bigger than 0") random_state = check_random_state(random_state) if MSP._type == "Markovian": samples = MSP.Markovian_uncertainty(random_state,n_simulations) label_all = numpy.zeros([n_simulations,MSP.T],dtype=int) for t in range(1,MSP.T): dist = numpy.empty([n_simulations,MSP.n_Markov_states[t]]) for idx, markov_state in enumerate(MSP.Markov_states[t]): temp = samples[:,t,:] - markov_state dist[:,idx] = numpy.sum(temp**2, axis=1) label_all[:,t] = numpy.argmin(dist,axis=1) query = [] if query is None else list(query) ub = [0] * n_simulations if query_stage_cost: stage_cost = [[0 for _ in range(n_simulations)] for _ in range(MSP.T)] solution = {item: [[] for _ in range(MSP.T)] for item in query} # forward Sampling for j in range(n_simulations): # Markov chain uncertainty state if MSP._type == "Markov chain": state = 0 # time loop for t in range(MSP.T): # sample Markovian uncertainties if MSP._type == "Markovian": if t == 0: m = MSP.models[t][0] else: # use the model with the closest markov state m = MSP.models[t][label_all[j][t]] # update Markovian uncertainty m._update_uncertainty_dependent(samples[j][t]) elif MSP._type == "Markov chain": if t == 0: m = MSP.models[t][0] else: state = random_state.choice( range(MSP.n_Markov_states[t]), p=MSP.transition_matrix[t][state], ) m = MSP.models[t][state] else: m = MSP.models[t] # sample independent uncertainties if t > 0: if m._type == "continuous": m._sample_uncertainty(random_state) elif m._flag_discrete == 1: m._update_uncertainty_discrete( rand_int( m.n_samples_discrete,random_state, m.probability) ) else: m._update_uncertainty( rand_int(m.n_samples, random_state, m.probability) ) m._update_link_constrs(forward_solution) m.optimize() if m.status not in [2,11]: m.write_infeasible_model("evaluation_true_" + str(m.modelName)) # get solutions forward_solution = MSP._get_forward_solution(m, t) for var in m.getVars(): if var.varName in query: solution[var.varName][t].append(var.X) if query_stage_cost: stage_cost[t].append(MSP._get_stage_cost(m, t)) ub[j] += MSP._get_stage_cost(m, t) if MSP._type == "Markovian": m._update_uncertainty_dependent( MSP.Markov_states[t][label_all[j][t]]) #! end time loop #! forward Sampling self.solution = {k: pandas.DataFrame(v) for k, v in solution.items()} if query_stage_cost: self.stage_cost = pandas.DataFrame(stage_cost) self.pv = ub if n_simulations != 1: self.CI = compute_CI(ub, percentile)
def plot_bounds(db, pv, sense=1, percentile=95, start=0, window=1, smooth=0, ax=None): """plot the evolution of bounds Parameters ---------- db: unidimensional array-like An T-length array of the determinstic bounds pv: bidimensional array-like An (n_iterations*n_steps) array of the policy values sense: -1/1 (default=1) The modelsense: 1 indicates min problem and 1 indicates max problem. percentile: float (default=95) The percentile used to construct confidence interval. ax: Matplotlib AxesSubplot instance, optional The specified subplot is used to plot; otherwise a new figure is created. window: int, optional (default=1) The length of the moving windows to aggregate the policy values. If length is bigger than 1, approximate confidence interval of the policy values and statistical bounds will be plotted. smooth: bool, optional (default=0) If 1, fit a smooth line to the policy values to better visualize the trend of statistical values/bounds. start: int, optional (default=0) The start iteration to plot the bounds. Set start to other values can zoom in the evolution of bounds in most recent iterations. Returns ------- matplotlib.pyplot.figure instance """ if ax == None: fig = plt.figure() ax = fig.add_subplot(111) else: fig = ax.figure from matplotlib.ticker import MaxNLocator from msppy.utils.statistics import compute_CI if smooth == 1: from msppy.utils.statistics import fit db = numpy.array(db) pv = numpy.array(pv) end = len(db) n_processes = pv.shape[1] x_value = range(start,end) ax.plot( x_value, db[start:end], '-b', label = 'deterministic bounds' ) pv_unpack = [item for alist in pv for item in alist] if n_processes != 1 or window != 1: x_value = range(max(start,window-1),end) CI = [ compute_CI( pv_unpack[n_processes*(i-window+1):n_processes*(i+1)], percentile, ) for i in range(window-1,end) ] CI = CI[max(start,window-1)-window+1:end] CI_lower_end = [item[0] for item in CI] CI_upper_end = [item[1] for item in CI] CI_mid = [sum(item)/len(item) for item in CI] ax.fill_between( x_value, CI_lower_end, CI_upper_end, facecolor='pink', alpha=0.5, edgecolor='none', label='expected policy values {}% CI'.format(percentile) ) if sense == 1: ax.plot( x_value, CI_upper_end, '-r', label='statistical bounds '+str(percentile)+'% C' ) if smooth == 1: ax.plot( x_value, fit(CI_mid, convex=1), '--g', label='smoothed policy values' ) else: ax.plot( x_value, CI_lower_end, '-r', label='statistical bounds '+str(percentile)+'% C' ) if smooth == 1: ax.plot( x_value, fit(CI_mid, convex=-1), '--g', label='smoothed policy values' ) else: pv = pv[start:end] pv = [item[0] for item in pv] ax.plot( x_value, pv, '-r', label='policy values' ) if smooth == 1: ax.plot( x_value, fit(pv,sense), '--g', label='smoothed policy values' ) ax.set_xlabel('Iterations') ax.set_ylabel('Values') ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.legend(loc = 'best') ax.set_title('Evolution of bounds') return fig
def run( self, n_simulations, percentile=95, query=None, query_T=None, query_dual=None, query_stage_cost=False, n_processes=1, ): """Run a Monte Carlo simulation to evaluate the policy. Parameters ---------- n_simulations: int/-1 If int: the number of simulations; If -1: exhuastive evaluation. percentile: float, optional (default=95) The percentile used to compute the confidence interval. query: list, optional (default=None) The names of variables that are intended to query. query_dual: list, optional (default=None) The names of constraints whose dual variables are intended to query. query_stage_cost: bool, optional (default=False) Whether to query values of individual stage costs. n_processes: int, optional (default=1) The number of processes to run the simulation. T: int, optional (default=None) For infinite horizon problem, the number stages to evaluate the policy. """ MSP = self.MSP query_T = query_T if query_T else MSP.T if not MSP._flag_infinity: from msppy.solver import SDDP self.solver = SDDP(MSP) else: from msppy.solver import PSDDP self.solver = PSDDP(MSP) self.solver.forward_T = query_T self.n_simulations = n_simulations self._compute_sample_path_idx_and_markovian_path(query_T) self.pv = numpy.zeros(self.n_sample_paths) stage_cost = solution = solution_dual = None if query_stage_cost: stage_cost = [ multiprocessing.RawArray("d", [0] * (query_T)) for _ in range(self.n_sample_paths) ] if query is not None: solution = { item: [ multiprocessing.RawArray("d", [0] * (query_T)) for _ in range(self.n_sample_paths) ] for item in query } if query_dual is not None: solution_dual = { item: [ multiprocessing.RawArray("d", [0] * (query_T)) for _ in range(self.n_sample_paths) ] for item in query_dual } n_processes = min(self.n_sample_paths, n_processes) jobs = allocate_jobs(self.n_sample_paths, n_processes) pv = multiprocessing.Array("d", [0] * self.n_sample_paths) procs = [None] * n_processes for p in range(n_processes): procs[p] = multiprocessing.Process( target=self.run_single, args=(pv, jobs[p], query, query_dual, query_stage_cost, stage_cost, solution, solution_dual)) procs[p].start() for proc in procs: proc.join() if self.n_simulations != 1: self.pv = [item for item in pv] else: self.pv = pv[0] if self.n_simulations == -1: self.epv = numpy.dot( pv, [ MSP._compute_weight_sample_path(self.sample_path_idx[j]) for j in range(self.n_sample_paths) ], ) if self.n_simulations not in [-1, 1]: self.CI = compute_CI(self.pv, percentile) self._compute_gap() if query is not None: self.solution = { k: pandas.DataFrame(numpy.array(v)) for k, v in solution.items() } if query_dual is not None: self.solution_dual = { k: pandas.DataFrame(numpy.array(v)) for k, v in solution_dual.items() } if query_stage_cost: self.stage_cost = pandas.DataFrame(numpy.array(stage_cost))
def run( self, n_simulations, percentile=95, query=None, query_T = None, query_dual=None, query_stage_cost=False, random_state=None, n_processes = 1,): """Run a Monte Carlo simulation to evaluate the policy on the approximation model. Parameters ---------- n_simulations: int/-1 If int: the number of simulations; If -1: exhuastive evaluation. query: list, optional (default=None) The names of variables that are intended to query. query_dual: list, optional (default=None) The names of constraints whose dual variables are intended to query. query_stage_cost: bool, optional (default=False) Whether to query values of individual stage costs. percentile: float, optional (default=95) The percentile used to compute the confidence interval. random_state: int, RandomState instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by numpy.random. """ from solver_penalty import SDDPPenalty, SDDPPenalty_infinity MSP = self.MSP query_T = query_T if query_T else MSP.T if not MSP._flag_infinity: self.solver = SDDPPenalty(MSP) stage = query_T else: self.solver = SDDPPenalty_infinity(MSP) self.solver.forward_T = query_T stage = MSP.T-1 self.n_simulations = n_simulations random_state = check_random_state(random_state) query = [] if query is None else list(query) query_dual = [] if query_dual is None else list(query_dual) MSP = self.MSP if n_simulations == -1: self.n_sample_paths, self.sample_path_idx = MSP._enumerate_sample_paths(query_T-1) else: self.n_sample_paths = n_simulations self.sample_path_idx = None self.pv = numpy.zeros(self.n_sample_paths) stage_cost = solution = solution_dual = None if query_stage_cost: stage_cost = [ multiprocessing.RawArray("d",[0] * (stage)) for _ in range(self.n_sample_paths) ] if query is not None: solution = { item: [ multiprocessing.RawArray("d",[0] * (stage)) for _ in range(self.n_sample_paths) ] for item in query } if query_dual is not None: solution_dual = { item: [ multiprocessing.RawArray("d",[0] * (stage)) for _ in range(self.n_sample_paths) ] for item in query_dual } n_processes = min(self.n_sample_paths, n_processes) jobs = allocate_jobs(self.n_sample_paths, n_processes) pv = multiprocessing.Array("d", [0] * self.n_sample_paths) procs = [None] * n_processes for p in range(n_processes): procs[p] = multiprocessing.Process( target=self.run_single, args=(pv,jobs[p],random_state,query,query_dual,query_stage_cost,stage_cost, solution,solution_dual) ) procs[p].start() for proc in procs: proc.join() if self.n_simulations != 1: self.pv = [item for item in pv] else: self.pv = pv[0] if self.n_simulations == -1: self.epv = numpy.dot( pv, [ MSP._compute_weight_sample_path(self.sample_path_idx[j]) for j in range(self.n_sample_paths) ], ) if self.n_simulations not in [-1,1]: self.CI = compute_CI(self.pv, percentile) self._compute_gap() if query is not None: self.solution = { k: pandas.DataFrame( numpy.array(v) ) for k, v in solution.items() } if query_dual is not None: self.solution_dual = { k: pandas.DataFrame( numpy.array(v) ) for k, v in solution_dual.items() } if query_stage_cost: self.stage_cost = pandas.DataFrame(numpy.array(stage_cost))
def solve(self, n_processes=1, n_steps=1, max_iterations=10000, max_stable_iterations=10000, max_time=1000000.0, tol=0.001, freq_evaluations=None, percentile=95, tol_diff=float("-inf"), random_state=None, freq_evaluations_true=None, freq_comparisons=None, n_simulations=3000, n_simulations_true=3000, freq_clean=None, logFile=1, logToConsole=1): """Solve approximation model. Parameters ---------- n_processes: int, optional (default=1) The number of processes to run in parallel. Run serial SDDP if 1. If n_steps is 1, n_processes is coerced to be 1. n_steps: int, optional (default=1) The number of forward/backward steps to run in each cut iteration. It is coerced to be 1 if n_processes is 1. max_iterations: int, optional (default=10000) The maximum number of iterations to run SDDP. max_stable_iterations: int, optional (default=10000) The maximum number of iterations to have same deterministic bound tol: float, optional (default=1e-3) tolerance for convergence of bounds freq_evaluations: int, optional (default=None) The frequency of evaluating gap on approximation model. It will be ignored if risk averse percentile: float, optional (default=95) The percentile used to compute confidence interval diff: float, optional (default=-inf) The stablization threshold freq_comparisons: int, optional (default=None) The frequency of comparisons of policies n_simulations: int, optional (default=10000) The number of simluations to run when evaluating a policy on approximation model freq_clean: int/list, optional (default=None) The frequency of removing redundant cuts. If int, perform cleaning at the same frequency for all stages. If list, perform cleaning at different frequency for each stage; must be of length T-1 (the last stage does not have any cuts). random_state: int, RandomState instance or None, optional (default=None) Used in evaluations and comparisons. (In the forward step, there is an internal random_state which is not supposed to be changed.) If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by numpy.random. logFile: binary, optional (default=1) Switch of logging to log file logToConsole: binary, optional (default=1) Switch of logging to console """ MSP = self.MSP if freq_clean is not None: if isinstance(freq_clean, (numbers.Integral, numpy.integer)): freq_clean = [freq_clean] * (MSP.T - 1) if isinstance(freq_clean, ((abc.Sequence, numpy.ndarray))): if len(freq_clean) != MSP.T - 1: raise ValueError("freq_clean list must be of length T-1!") else: raise TypeError( "freq_clean must be int/list instead of {}!".format( type(freq_clean))) if not MSP._flag_update: MSP._update() stable_iterations = 0 total_time = 0 a = time.time() gap = 1.0 right_end_of_CI = float("inf") db_past = MSP.bound self.percentile = percentile if MSP.measure != "risk neutral": freq_evaluations = None # distinguish pv_sim from pv pv_sim_past = None if n_processes != 1: self.n_steps = n_steps self.n_processes = min(n_steps, n_processes) self.jobs = allocate_jobs(self.n_steps, self.n_processes) logger_sddp = LoggerSDDP( logFile=logFile, logToConsole=logToConsole, n_processes=self.n_processes, percentile=self.percentile, ) logger_sddp.header() if freq_evaluations is not None or freq_comparisons is not None: logger_evaluation = LoggerEvaluation( n_simulations=n_simulations, percentile=percentile, logFile=logFile, logToConsole=logToConsole, ) logger_evaluation.header() if freq_comparisons is not None: logger_comparison = LoggerComparison( n_simulations=n_simulations, percentile=percentile, logFile=logFile, logToConsole=logToConsole, ) logger_comparison.header() try: while (self.iteration < max_iterations and total_time < max_time and stable_iterations < max_stable_iterations and tol < gap and tol_diff < right_end_of_CI): start = time.time() self._compute_cut_type() if self.n_processes == 1: pv = self._SDDP_single() else: pv = self._SDDP_multiprocessesing() m = (MSP.models[0] if MSP.n_Markov_states == 1 else MSP.models[0][0]) m.optimize() if m.status not in [2, 11]: m.write_infeasible_model("backward_" + str(m._model.modelName) + ".lp") db = m.objBound self.db.append(db) MSP.db = db if self.n_processes != 1: CI = compute_CI(pv, percentile) self.pv.append(pv) if self.iteration >= 1: if db_past == db: stable_iterations += 1 else: stable_iterations = 0 self.iteration += 1 db_past = db end = time.time() elapsed_time = end - start total_time += elapsed_time if self.n_processes == 1: logger_sddp.text( iteration=self.iteration, db=db, pv=pv[0], time=elapsed_time, ) else: logger_sddp.text( iteration=self.iteration, db=db, CI=CI, time=elapsed_time, ) if (freq_evaluations is not None and self.iteration % freq_evaluations == 0 or freq_comparisons is not None and self.iteration % freq_comparisons == 0): start = time.time() evaluation = Evaluation(MSP) evaluation.run( n_simulations=n_simulations, random_state=random_state, query_stage_cost=False, percentile=percentile, ) pandas.DataFrame({ 'pv': evaluation.pv }).to_csv("evaluation.csv") elapsed_time = time.time() - start gap = evaluation.gap if n_simulations == -1: logger_evaluation.text( iteration=self.iteration, db=db, pv=evaluation.epv, gap=gap, time=elapsed_time, ) elif n_simulations == 1: logger_evaluation.text( iteration=self.iteration, db=db, pv=evaluation.pv, gap=gap, time=elapsed_time, ) else: logger_evaluation.text( iteration=self.iteration, db=db, CI=evaluation.CI, gap=gap, time=elapsed_time, ) if (freq_comparisons is not None and self.iteration % freq_comparisons == 0): start = time.time() pv_sim = evaluation.pv if self.iteration / freq_comparisons >= 2: diff = MSP.sense * (numpy.array(pv_sim_past) - numpy.array(pv_sim)) if n_simulations == -1: diff_mean = numpy.mean(diff) right_end_of_CI = diff_mean else: diff_CI = compute_CI(diff, self.percentile) right_end_of_CI = diff_CI[1] elapsed_time = time.time() - start if n_simulations == -1: logger_comparison.text( iteration=self.iteration, ref_iteration=self.iteration - freq_comparisons, diff=diff_mean, time=elapsed_time, ) else: logger_comparison.text( iteration=self.iteration, ref_iteration=self.iteration - freq_comparisons, diff_CI=diff_CI, time=elapsed_time, ) pv_sim_past = pv_sim if freq_clean is not None: clean_stages = [ t for t in range(1, MSP.T - 1) if self.iteration % freq_clean[t] == 0 ] if len(clean_stages) != 0: self._remove_redundant_cut(clean_stages) # self._clean() except KeyboardInterrupt: stop_reason = "interruption by the user" # SDDP iteration stops MSP.db = self.db[-1] if self.iteration >= max_iterations: stop_reason = "iteration:{} has reached".format(max_iterations) if total_time >= max_time: stop_reason = "time:{} has reached".format(max_time) if stable_iterations >= max_stable_iterations: stop_reason = "stable iteration:{} has reached".format( max_stable_iterations) if gap <= tol: stop_reason = "convergence tolerance:{} has reached".format(tol) if right_end_of_CI <= tol_diff: stop_reason = "stablization threshold:{} has reached".format( tol_diff) b = time.time() logger_sddp.footer(reason=stop_reason) if freq_evaluations is not None or freq_comparisons is not None: logger_evaluation.footer() if freq_comparisons is not None: logger_comparison.footer() self.total_time = total_time