def get_stats(name, **kwargs): ''' Takes a distribution name and paras, and returns key statistics. Note: for stat-getting the only choice is to use *scipy*. We need to be careful to ensure the parametrization matches with that used in our get_generator fn, which runs on numpy Generator methods. Thus, kwargs here follow *numpy* namings. ''' _sp = "mv" # moment specification for scipy.stats computations. if name == "lognormal": mean, var = lognorm.stats(s=kwargs["sigma"], scale=np.exp(kwargs["mean"]), moments=_sp) elif name == "normal": mean, var = norm.stats(loc=kwargs["loc"], scale=kwargs["scale"], moments=_sp) elif name == "pareto": mean, var = pareto.stats(b=kwargs["shape"], scale=kwargs["scale"], moments=_sp) else: raise ValueError("Please provide a proper distribution name.") return {"mean": mean, "var": var}
def __init__(self, shape_parameter): self.shape_parameter = shape_parameter if self.shape_parameter is not None: self.bounds = np.array([0.999, np.inf]) if self.shape_parameter > 0: mean, var, skew, kurt = pareto.stats(self.shape_parameter, moments='mvsk') self.parent = pareto(self.shape_parameter) self.mean = mean self.variance = var self.skewness = skew self.kurtosis = kurt self.x_range_for_pdf = np.linspace(0.999, 20.0 + shape_parameter, RECURRENCE_PDF_SAMPLES)
import matplotlib matplotlib.use('Agg') import scipy.stats import matplotlib.pyplot as plt import seaborn as sns import numpy as np from scipy.stats import uniform, pareto, norm #mean, var, skew, kurt = b = 1.0 dists = [] dists += [[("pareto"), pareto.stats(2, moments='mvsk'), pareto]] dists += [[("uniform"), uniform.stats(moments='mvsk'), uniform]] dists += [[("normal"), norm.stats(moments='mvsk'), norm]] dists += [[("normal_sc"), norm.stats(moments='mvsk'), norm]] print(dists) size = 20000 for dist in dists: print dist[0] if (dist[0] == "pareto"): sample = dist[2].rvs(b, size=size) sample = sample[(sample < 8)] if (dist[0] == "normal"): sample = dist[2].rvs(size=size) if (dist[0] == "uniform"):
def __init__(self, seed, speed, nr_samples, interval): np.random.seed(seed) b = 3 self.samples = (np.random.pareto(b, nr_samples) + 1) mean, var, skew, kurt = pareto.stats(b, moments='mvsk') self.gt_mean = mean self.y_values = [] self.confidence = [] self.x_values = range(2, nr_samples, interval) for i in self.x_values: s = self.samples[:i] self.y_values.append(np.mean(s)) self.confidence.append((np.std(s) / math.sqrt(len(s))) * 1.96) self.y_values = np.array(self.y_values) self.confidence = np.array(self.confidence) fig = plt.figure(figsize=(10, 10)) self.ax1 = fig.add_subplot(2, 2, (1, 2)) self.ax2 = fig.add_subplot(2, 2, 3) self.ax3 = fig.add_subplot(2, 2, 4) # history plot self.ax1.set_title('dancing bar history') self.ax1.set_xlabel('iteration') self.ax1.set_ylabel('estimated mean') self.ax1.set_xlim(0, nr_samples) self.ax1.set_ylim(np.min(self.y_values - self.confidence), np.max(self.y_values + self.confidence)) self.ax1_primitives = [] p = Polygon(self._history_polygon_xy(1), True, alpha=0.4, color='blue') self.ax1_primitives.append(p) self.ax1.add_patch(p) l = Line2D([], [], color='blue') self.ax1_primitives.append(l) self.ax1.add_line(l) self.ax1.axhline(y=mean, color='black', linestyle='--', linewidth=0.5) # bar plot self.ax2.set_title('dancing bar') self.ax2.set_ylabel('avg sales') self.ax2.set_xlim(-0.5, 1) self.ax2.set_xticks([0.25]) self.ax2.set_xticklabels(['department XYZ']) self.ax2.set_ylim(0, np.max(self.y_values + self.confidence)) self.ax2_primitives = [] r = Rectangle((0, 0), 0.5, self.y_values[1], alpha=0.4, color='blue') self.ax2_primitives.append(r) self.ax2.add_patch(r) self.ax2.axhline(y=mean, color='black', linestyle='--', linewidth=0.5) l = Line2D([0.25, 0.25], [ self.y_values[1] - self.confidence[1], self.y_values[1] + self.confidence[1] ], color='black') self.ax2_primitives.append(l) self.ax2.add_line(l) # pdf plot self.ax3.set_title('pareto pdf') x = np.linspace(pareto.ppf(0.01, b), pareto.ppf(0.99, b), 100) self.ax3.plot(x, pareto.pdf(x, b) + 1, 'blue', lw=1, alpha=0.6) animation.TimedAnimation.__init__(self, fig, interval=speed, blit=True, repeat=False)
def simulate(): """ """ print "Pareto distribution with shape:", shape, "mean:", mean_npkts print "Sanity check on pareto mean packets:", pareto.stats(shape, scale=scale, moments='m') print "Poisson process with lambda:", lamb inter_arrival, all_flows, max_packets = generate_flows(num_flows) print "Starting simulation." curr_flows = [] all_done_flows = [] curr_time = 0 count = 0 count_step = notify_step count_big = 0 while not all_flows.empty(): new_flow = all_flows.get() count += 1 if count/count_step > count_big: count_big += 1 print "%d flows have arrived..." % count inter_arrival = new_flow.inter_arrival init_active_count = len(curr_flows) # update flows curr_flows, done_flows = update_flows(curr_flows, inter_arrival, curr_time) # update loop state curr_time += inter_arrival all_done_flows += done_flows curr_flows.append(new_flow) # newest arriving flow if debug_flag: print update_log(curr_time, init_active_count, len(done_flows), len(all_done_flows)) print arrival_log(new_flow) print # all flows have arrived, so migrate to updating once every E[arrival] = 1/lamb. if debug_flag: print "All flows have arrived. Updating remaining flows..." update_duration = 1.0/lamb while len(all_done_flows) != num_flows: init_active_count = len(curr_flows) curr_flows, done_flows = update_flows(curr_flows, update_duration, curr_time) curr_time += update_duration all_done_flows += done_flows if debug_flag: print update_log(curr_time, init_active_count, len(done_flows), len(all_done_flows)) print print "Finished simulation. FCT results:" for done_flow in all_done_flows: if debug_flag: print "(packets: %d, fct: %.8f, bottleneck: %d flows)" % (done_flow.packet_length, done_flow.fct, done_flow.flow_bottleneck) fcts_aggregate = packet_info(all_done_flows) avg_fcts = average_fct(fcts_aggregate) max_fcts = max_fct(fcts_aggregate) packet_list = avg_fcts.keys() packet_list.sort() ret_list = [] for packet_length in packet_list: print "Packet length: %d, average FCT: %.8f, max FCT: %.8f" % (packet_length, avg_fcts[packet_length], max_fcts[packet_length]) ret_list.append((packet_length, avg_fcts[packet_length], max_fcts[packet_length])) return ret_list # tuples (packet_length, average fct)
def simulate(): """ """ print "Pareto distribution with shape:", shape, "mean:", mean_npkts print "Sanity check on pareto mean packets:", pareto.stats(shape, scale=scale, moments='m') print "Poisson process with lambda:", lamb inter_arrival, all_flows, max_packets = generate_flows(num_flows) print "Starting simulation." curr_flows = [] all_done_flows = [] curr_time = 0 count = 0 count_step = notify_step count_big = 0 while not all_flows.empty(): new_flow = all_flows.get() count += 1 if count / count_step > count_big: count_big += 1 print "%d flows have arrived..." % count inter_arrival = new_flow.inter_arrival init_active_count = len(curr_flows) # update flows curr_flows, done_flows = update_flows(curr_flows, inter_arrival, curr_time) # update loop state curr_time += inter_arrival all_done_flows += done_flows curr_flows.append(new_flow) # newest arriving flow if debug_flag: print update_log(curr_time, init_active_count, len(done_flows), len(all_done_flows)) print arrival_log(new_flow) print # all flows have arrived, so migrate to updating once every E[arrival] = 1/lamb. if debug_flag: print "All flows have arrived. Updating remaining flows..." update_duration = 1.0 / lamb while len(all_done_flows) != num_flows: init_active_count = len(curr_flows) curr_flows, done_flows = update_flows(curr_flows, update_duration, curr_time) curr_time += update_duration all_done_flows += done_flows if debug_flag: print update_log(curr_time, init_active_count, len(done_flows), len(all_done_flows)) print print "Finished simulation. FCT results:" for done_flow in all_done_flows: if debug_flag: print "(packets: %d, fct: %.8f, bottleneck: %d flows)" % ( done_flow.packet_length, done_flow.fct, done_flow.flow_bottleneck) fcts_aggregate = packet_info(all_done_flows) avg_fcts = average_fct(fcts_aggregate) max_fcts = max_fct(fcts_aggregate) packet_list = avg_fcts.keys() packet_list.sort() ret_list = [] for packet_length in packet_list: print "Packet length: %d, average FCT: %.8f, max FCT: %.8f" % ( packet_length, avg_fcts[packet_length], max_fcts[packet_length]) ret_list.append( (packet_length, avg_fcts[packet_length], max_fcts[packet_length])) return ret_list # tuples (packet_length, average fct)
from scipy.stats import pareto import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) # Calculate a few first moments: b = 2.62 mean, var, skew, kurt = pareto.stats(b, moments='mvsk') # Display the probability density function (``pdf``): x = np.linspace(pareto.ppf(0.01, b), pareto.ppf(0.99, b), 100) ax.plot(x, pareto.pdf(x, b), 'r-', lw=5, alpha=0.6, label='pareto pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = pareto(b) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = pareto.ppf([0.001, 0.5, 0.999], b) np.allclose([0.001, 0.5, 0.999], pareto.cdf(vals, b)) # True # Generate random numbers: