Esempio n. 1
0
def __get_mean_rate(stats, coeff, win):
    NaN = float('NaN')
    R = len(stats)
    for numbers in stats.values():
        C = len(numbers)
        break
    x = np.empty((R, C))
    for i, numbers in enumerate(stats.values()):
        x[i, :] = numbers

    last = x[:, -1]
    y = x[:, -(win + 1):-1]
    means = np.mean(y, axis=1)
    means = np.ceil(means)
    diffs = last - means
    zeros = means == 0
    means[zeros] = 1.
    ratios = 100 * (last / means - 1)
    ratios[zeros] = NaN
    outliers = generalized_esd(ratios, 10)
    ratios[outliers] = NaN
    m_r, e_r = __get_pd_mean(ratios)
    m_r = np.round(m_r)
    e_r = np.round(e_r)

    outliers = generalized_esd(diffs, 10)
    diffs[outliers] = NaN
    m_d, e_d = __get_pd_mean(diffs)
    m_d = np.ceil(m_d)
    e_d = np.ceil(e_d)

    return (m_r, e_r, m_d, e_d)
Esempio n. 2
0
def visualize_step1(stats, name):
    fig, _ = plt.subplots(num=None,
                          figsize=(16, 12),
                          dpi=80,
                          facecolor='w',
                          edgecolor='k')
    fig.canvas.set_window_title("{} With Respect to Size of K".format(name))
    plt.title("{} With Respect to Size of K".format(name))
    plt.xlabel('Size of K')
    plt.ylabel(name)
    x = list(stats)
    y = list(stats.values())
    plt.plot(x, y)
    plt.legend([name], loc='upper left')
    saveName = "{}.png".format(name.replace(" ", "_"))
    plt.savefig(saveName)
Esempio n. 3
0
def informationGain(arrayItems, index, stats):
    partitionCount = {}
    for item in arrayItems:
        if item.value[index] in partitionCount:
            if item.label in partitionCount[item.value[index]]:
                partitionCount[item.value[index]][item.label] += 1
            else:
                partitionCount[item.value[index]][item.label] = 1
        else:
            partitionCount[item.value[index]] = {}
            partitionCount[item.value[index]][item.label] = 1
    rootEntropy = entropy(stats.values())
    childEntropyList = [entropy(partitionCount[key].values()) for key in partitionCount]
    childFrequency = [sum(partitionCount[key].values()) for key in partitionCount]
    totalCount = sum(childFrequency)
    return (index, rootEntropy - sum(map(lambda x, y: float(x)/totalCount*y, childFrequency, childEntropyList)))
Esempio n. 4
0
def add_info(data, variant, run, entry_list):
    """ data: dict
        entry_list: list to append to
    """
    num_eps = len(data)
    for i, ep in enumerate(data):
        stats = ep['stats']
        episode_info = ep['episode_info']
        reward, _, success, spl = stats.values()
        ep_id, scene_id, start_pos, start_rot, more_ep_info, _, goals, start_room, shortest_path = episode_info.values(
        )
        entry_list.append({
            "index": i,
            "ep": ep_id,
            "scene": scene_id,
            "spl": spl,
            "success": success,
            "variant": variant,
            "run": int(run)
        })
Esempio n. 5
0
    def produce_stats(self, output_path):
        num_plots = len(self.stats)
        cols = 3
        rows = int(math.ceil(num_plots / 3.0))

        fig, axes = plt.subplots(cols, rows, sharex=False, sharey=True)

        print 'N-%d, R-%d, C-%d, 1-%d 2-%d' % (num_plots, rows, cols,
                                               len(axes), len(axes[0]))

        i = 0
        for name, stats in self.stats.iteritems():
            col = int(i / rows)
            row = i % rows

            ax = axes[col, row]

            if isinstance(stats, dict):
                stats = stats.values()

            min_value = np.floor(min(stats))
            max_value = np.ceil(max(stats))
            value_range = (min_value - 0.1, max_value + 0.1)

            num_bins = 1000
            p1 = ax.hist(stats,
                         range=value_range,
                         bins=num_bins,
                         normed=1,
                         histtype='step',
                         cumulative='True')

            ax.set_title('CDF: %s' % name.replace('_', ' '))
            ax.set_ylim(0, 1)
            ax.set_xlim(min_value, max_value)
            i += 1

        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)
        plt.savefig('%s/cdfs.pdf' % (output_path, ), bbox_inches='tight')
Esempio n. 6
0
import pathlib
import scipy.stats
import numpy as np
from collections import defaultdict

filename = 'enwik8'
path = pathlib.Path(filename)
text = path.read_text()
chars = sorted(set(text))

stats = defaultdict(int)
for c in text:
    stats[c] += 1

freq = np.array(sorted(list(stats.values())))
freq = freq / freq.sum()

total = scipy.stats.entropy(freq, base=2) * len(text)
print(total / 1024 / 1024 / 8)
Esempio n. 7
0
        dataset = np.random.uniform(min, max, size)
        m = np.mean(dataset)
        s = np.std(dataset)

        if round(mean, delta) == round(m, delta) and round(std, delta) == round(s, delta):
            return dataset

stats = {
    'print': (5, 7, 5.95, 0.81),
    # 'press': (1, 1.5, 1.26, 0.16),
    'cut': (1, 2, 1.55, 0.28),
    # 'sew': (2, 3, 2.51, 0.27),
    # 'package': (1, 2, 1.49, 0.25)
}

datasets = [generate_dataset(*params, size=50, delta=1) for params in stats.values()]

for dataset in datasets:
    print dataset
    print round(np.mean(dataset), 2)
    print round(np.std(dataset), 2)

def main():
    print_dataset   = np.random.uniform(5, 7, 200)
    press_dataset   = np.random.uniform(1, 1.5, 200)
    cut_dataset     = np.random.uniform(1, 2, 200)
    sew_dataset     = np.random.uniform(2, 3, 200)
    package_dataset = np.random.uniform(1, 2, 200)

    print np.std(print_dataset)
    print np.std(press_dataset)
Esempio n. 8
0
        if round(mean, delta) == round(m, delta) and round(
                std, delta) == round(s, delta):
            return dataset


stats = {
    'print': (5, 7, 5.95, 0.81),
    # 'press': (1, 1.5, 1.26, 0.16),
    'cut': (1, 2, 1.55, 0.28),
    # 'sew': (2, 3, 2.51, 0.27),
    # 'package': (1, 2, 1.49, 0.25)
}

datasets = [
    generate_dataset(*params, size=50, delta=1) for params in stats.values()
]

for dataset in datasets:
    print dataset
    print round(np.mean(dataset), 2)
    print round(np.std(dataset), 2)


def main():
    print_dataset = np.random.uniform(5, 7, 200)
    press_dataset = np.random.uniform(1, 1.5, 200)
    cut_dataset = np.random.uniform(1, 2, 200)
    sew_dataset = np.random.uniform(2, 3, 200)
    package_dataset = np.random.uniform(1, 2, 200)
Esempio n. 9
0
 def get_text_length_stat(self, method, label_stat_dict):
     if label_stat_dict:
         if method in label_stat_dict:
             stats = label_stat_dict[method]
             return np.mean(list(stats.values()))