def __get_mean_rate(stats, coeff, win): NaN = float('NaN') R = len(stats) for numbers in stats.values(): C = len(numbers) break x = np.empty((R, C)) for i, numbers in enumerate(stats.values()): x[i, :] = numbers last = x[:, -1] y = x[:, -(win + 1):-1] means = np.mean(y, axis=1) means = np.ceil(means) diffs = last - means zeros = means == 0 means[zeros] = 1. ratios = 100 * (last / means - 1) ratios[zeros] = NaN outliers = generalized_esd(ratios, 10) ratios[outliers] = NaN m_r, e_r = __get_pd_mean(ratios) m_r = np.round(m_r) e_r = np.round(e_r) outliers = generalized_esd(diffs, 10) diffs[outliers] = NaN m_d, e_d = __get_pd_mean(diffs) m_d = np.ceil(m_d) e_d = np.ceil(e_d) return (m_r, e_r, m_d, e_d)
def visualize_step1(stats, name): fig, _ = plt.subplots(num=None, figsize=(16, 12), dpi=80, facecolor='w', edgecolor='k') fig.canvas.set_window_title("{} With Respect to Size of K".format(name)) plt.title("{} With Respect to Size of K".format(name)) plt.xlabel('Size of K') plt.ylabel(name) x = list(stats) y = list(stats.values()) plt.plot(x, y) plt.legend([name], loc='upper left') saveName = "{}.png".format(name.replace(" ", "_")) plt.savefig(saveName)
def informationGain(arrayItems, index, stats): partitionCount = {} for item in arrayItems: if item.value[index] in partitionCount: if item.label in partitionCount[item.value[index]]: partitionCount[item.value[index]][item.label] += 1 else: partitionCount[item.value[index]][item.label] = 1 else: partitionCount[item.value[index]] = {} partitionCount[item.value[index]][item.label] = 1 rootEntropy = entropy(stats.values()) childEntropyList = [entropy(partitionCount[key].values()) for key in partitionCount] childFrequency = [sum(partitionCount[key].values()) for key in partitionCount] totalCount = sum(childFrequency) return (index, rootEntropy - sum(map(lambda x, y: float(x)/totalCount*y, childFrequency, childEntropyList)))
def add_info(data, variant, run, entry_list): """ data: dict entry_list: list to append to """ num_eps = len(data) for i, ep in enumerate(data): stats = ep['stats'] episode_info = ep['episode_info'] reward, _, success, spl = stats.values() ep_id, scene_id, start_pos, start_rot, more_ep_info, _, goals, start_room, shortest_path = episode_info.values( ) entry_list.append({ "index": i, "ep": ep_id, "scene": scene_id, "spl": spl, "success": success, "variant": variant, "run": int(run) })
def produce_stats(self, output_path): num_plots = len(self.stats) cols = 3 rows = int(math.ceil(num_plots / 3.0)) fig, axes = plt.subplots(cols, rows, sharex=False, sharey=True) print 'N-%d, R-%d, C-%d, 1-%d 2-%d' % (num_plots, rows, cols, len(axes), len(axes[0])) i = 0 for name, stats in self.stats.iteritems(): col = int(i / rows) row = i % rows ax = axes[col, row] if isinstance(stats, dict): stats = stats.values() min_value = np.floor(min(stats)) max_value = np.ceil(max(stats)) value_range = (min_value - 0.1, max_value + 0.1) num_bins = 1000 p1 = ax.hist(stats, range=value_range, bins=num_bins, normed=1, histtype='step', cumulative='True') ax.set_title('CDF: %s' % name.replace('_', ' ')) ax.set_ylim(0, 1) ax.set_xlim(min_value, max_value) i += 1 plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) plt.savefig('%s/cdfs.pdf' % (output_path, ), bbox_inches='tight')
import pathlib import scipy.stats import numpy as np from collections import defaultdict filename = 'enwik8' path = pathlib.Path(filename) text = path.read_text() chars = sorted(set(text)) stats = defaultdict(int) for c in text: stats[c] += 1 freq = np.array(sorted(list(stats.values()))) freq = freq / freq.sum() total = scipy.stats.entropy(freq, base=2) * len(text) print(total / 1024 / 1024 / 8)
dataset = np.random.uniform(min, max, size) m = np.mean(dataset) s = np.std(dataset) if round(mean, delta) == round(m, delta) and round(std, delta) == round(s, delta): return dataset stats = { 'print': (5, 7, 5.95, 0.81), # 'press': (1, 1.5, 1.26, 0.16), 'cut': (1, 2, 1.55, 0.28), # 'sew': (2, 3, 2.51, 0.27), # 'package': (1, 2, 1.49, 0.25) } datasets = [generate_dataset(*params, size=50, delta=1) for params in stats.values()] for dataset in datasets: print dataset print round(np.mean(dataset), 2) print round(np.std(dataset), 2) def main(): print_dataset = np.random.uniform(5, 7, 200) press_dataset = np.random.uniform(1, 1.5, 200) cut_dataset = np.random.uniform(1, 2, 200) sew_dataset = np.random.uniform(2, 3, 200) package_dataset = np.random.uniform(1, 2, 200) print np.std(print_dataset) print np.std(press_dataset)
if round(mean, delta) == round(m, delta) and round( std, delta) == round(s, delta): return dataset stats = { 'print': (5, 7, 5.95, 0.81), # 'press': (1, 1.5, 1.26, 0.16), 'cut': (1, 2, 1.55, 0.28), # 'sew': (2, 3, 2.51, 0.27), # 'package': (1, 2, 1.49, 0.25) } datasets = [ generate_dataset(*params, size=50, delta=1) for params in stats.values() ] for dataset in datasets: print dataset print round(np.mean(dataset), 2) print round(np.std(dataset), 2) def main(): print_dataset = np.random.uniform(5, 7, 200) press_dataset = np.random.uniform(1, 1.5, 200) cut_dataset = np.random.uniform(1, 2, 200) sew_dataset = np.random.uniform(2, 3, 200) package_dataset = np.random.uniform(1, 2, 200)
def get_text_length_stat(self, method, label_stat_dict): if label_stat_dict: if method in label_stat_dict: stats = label_stat_dict[method] return np.mean(list(stats.values()))