Beispiel #1
0
    def test_results_withlabels(self):
        labels = ["Test1", 2, "ardvark", 4]
        results = cbook.boxplot_stats(self.data, labels=labels)
        res = results[0]
        for lab, res in zip(labels, results):
            assert res["label"] == lab

        results = cbook.boxplot_stats(self.data)
        for res in results:
            assert "label" not in res
Beispiel #2
0
    def test_results_withlabels(self):
        labels = ['Test1', 2, 'ardvark', 4]
        results = cbook.boxplot_stats(self.data, labels=labels)
        res = results[0]
        for lab, res in zip(labels, results):
            assert res['label'] == lab

        results = cbook.boxplot_stats(self.data)
        for res in results:
            assert 'label' not in res
Beispiel #3
0
    def test_boxplot_stats_autorange_false(self):
        x = np.zeros(shape=140)
        x = np.hstack([-25, x, 25])
        bstats_false = cbook.boxplot_stats(x, autorange=False)
        bstats_true = cbook.boxplot_stats(x, autorange=True)

        assert bstats_false[0]['whislo'] == 0
        assert bstats_false[0]['whishi'] == 0
        assert_array_almost_equal(bstats_false[0]['fliers'], [-25, 25])

        assert bstats_true[0]['whislo'] == -25
        assert bstats_true[0]['whishi'] == 25
        assert_array_almost_equal(bstats_true[0]['fliers'], [])
Beispiel #4
0
    def setup(self):
        np.random.seed(937)
        self.nrows = 37
        self.ncols = 4
        self.data = np.random.lognormal(size=(self.nrows, self.ncols), mean=1.5, sigma=1.75)
        self.known_keys = sorted(
            ["mean", "med", "q1", "q3", "iqr", "cilo", "cihi", "whislo", "whishi", "fliers", "label"]
        )
        self.std_results = cbook.boxplot_stats(self.data)

        self.known_nonbootstrapped_res = {
            "cihi": 6.8161283264444847,
            "cilo": -0.1489815330368689,
            "iqr": 13.492709959447094,
            "mean": 13.00447442387868,
            "med": 3.3335733967038079,
            "fliers": np.array([92.55467075, 87.03819018, 42.23204914, 39.29390996]),
            "q1": 1.3597529879465153,
            "q3": 14.85246294739361,
            "whishi": 27.899688243699629,
            "whislo": 0.042143774965502923,
        }

        self.known_bootstrapped_ci = {"cihi": 8.939577523357828, "cilo": 1.8692703958676578}

        self.known_whis3_res = {
            "whishi": 42.232049135969874,
            "whislo": 0.042143774965502923,
            "fliers": np.array([92.55467075, 87.03819018]),
        }

        self.known_res_percentiles = {"whislo": 0.1933685896907924, "whishi": 42.232049135969874}

        self.known_res_range = {"whislo": 0.042143774965502923, "whishi": 92.554670752188699}
Beispiel #5
0
 def test_results_bootstrapped(self):
     results = cbook.boxplot_stats(self.data, bootstrap=10000)
     res = results[0]
     for key in list(self.known_bootstrapped_ci.keys()):
         assert_approx_equal(
             res[key],
             self.known_bootstrapped_ci[key]
         )
Beispiel #6
0
    def test_results_whiskers_percentiles(self):
        results = cbook.boxplot_stats(self.data, whis=[5, 95])
        res = results[0]
        for key in list(self.known_res_percentiles.keys()):
            if key != "fliers":
                assert_statement = assert_approx_equal
            else:
                assert_statement = assert_array_almost_equal

            assert_statement(res[key], self.known_res_percentiles[key])
    def setup(self):
        np.random.seed(937)
        self.nrows = 37
        self.ncols = 4
        self.data = np.random.lognormal(size=(self.nrows, self.ncols),
                                        mean=1.5, sigma=1.75)
        self.known_keys = sorted([
            'mean', 'med', 'q1', 'q3', 'iqr',
            'cilo', 'cihi', 'whislo', 'whishi',
            'fliers', 'label'
        ])
        self.std_results = cbook.boxplot_stats(self.data)

        self.known_nonbootstrapped_res = {
            'cihi': 6.8161283264444847,
            'cilo': -0.1489815330368689,
            'iqr': 13.492709959447094,
            'mean': 13.00447442387868,
            'med': 3.3335733967038079,
            'fliers': np.array([
                92.55467075,  87.03819018,  42.23204914,  39.29390996
            ]),
            'q1': 1.3597529879465153,
            'q3': 14.85246294739361,
            'whishi': 27.899688243699629,
            'whislo': 0.042143774965502923,
            'label': 1
        }

        self.known_bootstrapped_ci = {
            'cihi': 8.939577523357828,
            'cilo': 1.8692703958676578,
        }

        self.known_whis3_res = {
            'whishi': 42.232049135969874,
            'whislo': 0.042143774965502923,
            'fliers': np.array([92.55467075, 87.03819018]),
        }

        self.known_res_with_labels = {
            'label': 'Test1'
        }

        self.known_res_percentiles = {
            'whislo':   0.1933685896907924,
            'whishi':  42.232049135969874
        }

        self.known_res_range = {
            'whislo': 0.042143774965502923,
            'whishi': 92.554670752188699

        }
 def compute_boxplot(self, series):
     """
     Compute boxplot for given pandas Series.
     """
     from matplotlib.cbook import boxplot_stats
     series = series[series.notnull()]
     if len(series.values) == 0:
         return {}
     stats = boxplot_stats(list(series.values))[0]
     stats['count'] = len(series.values)
     stats['fliers'] = "|".join(map(str, stats['fliers']))
     return stats
Beispiel #9
0
    def test_results_whiskers_range(self):
        results = cbook.boxplot_stats(self.data, whis='range')
        res = results[0]
        for key in list(self.known_res_range.keys()):
            if key != 'fliers':
                assert_statement = assert_approx_equal
            else:
                assert_statement = assert_array_almost_equal

            assert_statement(
                res[key],
                self.known_res_range[key]
            )
Beispiel #10
0
def median_confidence_intervals(data):
    if not data:  # empty
        return [0], [0], [0]
    bxpstats = cbook.boxplot_stats(data)
    confidence_intervals = [[], []]
    medians = []
    for stat in bxpstats:
        confidence_intervals[0].append(stat['cilo'])
        confidence_intervals[1].append(stat['cihi'])
        medians.append(stat['med'])
    confidence_intervals[0] = np.array(confidence_intervals[0])
    confidence_intervals[1] = np.array(confidence_intervals[1])
    return medians, medians - confidence_intervals[0], confidence_intervals[1] - medians
Beispiel #11
0
def plt1(rpt, key='REL', log=True):
    """ plot supervised learning report. """
    # load report form file if necessary.
    sim = ['fam', 'frq', 'mdl', 'nxp']
    nnt = ['gtp', 'xtp', 'nwk']
    mtd = ['mtd', 'par']
    if isinstance(rpt, str) and rpt.endswith('pgz'):
        rpt = lpz(rpt)

    # the benchmark records
    bmk = rpt.bmk

    # title
    ttl = bmk.iloc[0][sim]
    ttl = ', '.join('{}={}'.format(k, v) for k, v in ttl.items())

    # method grouping
    grp = nnt + mtd

    # plot of relative error
    err = bmk[bmk.key == key].loc[:, nnt + mtd + ['val']]
    err = err[err.mtd != 'nul']

    # sample some data points to craft boxplot states
    X, L = [], []
    for l, g in err.groupby(grp):
        if 'nnt' in l:
            l = "{nwk:>10}.{mtd}".format(**g.iloc[0])
        else:
            l = "{par:>10}.{mtd}".format(**g.iloc[0])
        x = np.array(g.val)
        X.append(x)
        L.append(l)
    X = np.array(X).T
    S = cbook.boxplot_stats(X, labels=L)

    # plot
    plt.close('all')
    plt.title(ttl)
    ax = plt.axes()
    if log:
        ax.set_yscale('log')
    ax.bxp(S)

    # draw a line at y=1
    x0, x1 = ax.get_xbound()
    zx, zy = np.linspace(x0, x1, 10), np.ones(10)
    ax.plot(zx, zy, linestyle='--', color='red', linewidth=.5)
    for tick in ax.get_xticklabels():
        tick.set_rotation(90)
    return rpt, plt
Beispiel #12
0
def median_confidence_intervals(data: list):
    """ Compute the median and the median 95% confidence intervals for the data.
    :param data: the data whose statistics are to be calculated
    :return: the medians, the low confidence intervals, and the high confidence intervals
    """
    if not data:  # empty
        return [0], [0], [0]
    bxpstats = cbook.boxplot_stats(data)
    confidence_intervals = [[], []]
    medians = []
    for stat in bxpstats:
        confidence_intervals[0].append(stat['cilo'])
        confidence_intervals[1].append(stat['cihi'])
        medians.append(stat['med'])
    confidence_intervals[0] = np.array(confidence_intervals[0])
    confidence_intervals[1] = np.array(confidence_intervals[1])
    return medians, medians - confidence_intervals[0], confidence_intervals[1] - medians
def plot_boxplots(vectors, axs, s, plot_colors, pos_color, neg_color):
    y_early = vectors[s, 0, :]
    x_early = np.random.normal(1, 0.02, len(y_early))
    y_late = vectors[s, 1, :]
    x_late = np.random.normal(1, 0.02, len(y_late))
    axs_early = axs[s, 0]
    axs_late = axs[s, 1]
    adaptive_changes = 0
    for i in range(len(y_early)):
        if y_early[i] > y_late[i]:
            adaptive_changes = adaptive_changes + 1
    adaptive_change_ratio = np.round(float(adaptive_changes) / len(x_early), 3)

    mean_diff = np.round(np.mean(y_late) - np.mean(y_early), 3)
    median_diff = np.round(np.median(y_late) - np.median(y_early), 3)

    axs_early.boxplot(y_early, showmeans=True,
                      meanprops={"marker": "s", "markerfacecolor": "black", "markeredgecolor": "black"},
                      showfliers=False)
    early_fliers = boxplot_stats(y_early)[0]['fliers']
    axs_late.boxplot(y_late, showmeans=True,
                     meanprops={"marker": "s", "markerfacecolor": "black", "markeredgecolor": "black"},
                     showfliers=False)
    late_fliers = boxplot_stats(y_late)[0]['fliers']

    outlier_idxs = [i for i in range(len(y_late)) if y_late[i] in late_fliers or y_early[i] in early_fliers]
    outlier_mask = np.ones(len(y_late), dtype=bool)
    outlier_mask[outlier_idxs] = 0

    x_early = x_early[outlier_mask]
    y_early = y_early[outlier_mask]
    x_late = x_late[outlier_mask]
    y_late = y_late[outlier_mask]

    plot_colors = np.array(plot_colors)[outlier_mask]
    axs_early.scatter(x_early, y_early, marker='.', c=plot_colors)
    axs_late.scatter(x_late, y_late, marker='.', c=plot_colors)

    xy_early = np.column_stack((x_early, y_early))
    xy_late = np.column_stack((x_late, y_late))

    for j in range(xy_early.shape[0]):
        xy_early_point = xy_early[j, :]
        xy_late_point = xy_late[j, :]
        c = pos_color
        if xy_late_point[1] < xy_early_point[1]:
            c = neg_color
        elif xy_late_point[1] == xy_early_point[1]:
            c = 'black'
        con = ConnectionPatch(xyA=xy_late_point, xyB=xy_early_point, coordsA='data', coordsB='data',
                              axesA=axs_late, axesB=axs_early, linewidth=0.5,
                              linestyle='dotted', color=c)
        axs_late.add_artist(con)

    early_xlim = axs_early.axes.get_xlim()
    early_ylim = axs_late.axes.get_ylim()

    late_xlim = axs_early.axes.get_xlim()
    late_ylim = axs_late.axes.get_ylim()

    xy_top = np.array([[early_xlim[0], early_ylim[1]], [late_xlim[1], late_ylim[1]]])
    xy_bottom = np.array([[early_xlim[0], early_ylim[0]], [late_xlim[1], late_ylim[0]]])
    con_top = ConnectionPatch(xyA=xy_top[1, :], xyB=xy_top[0, :], coordsA='data', coordsB='data', axesA=axs_late,
                              axesB=axs_early, linewidth=0.7)
    con_bottom = ConnectionPatch(xyA=xy_bottom[1, :], xyB=xy_bottom[0, :], coordsA='data', coordsB='data',
                                 axesA=axs_late, axesB=axs_early, linewidth=0.7)

    axs_late.add_artist(con_top)
    axs_late.add_artist(con_bottom)
    axs_early.text(0.2, 0.9, "Adaptive Change \nRatio: " + str(adaptive_change_ratio), ha='center', va='center',
                   color='k',
                   fontsize='medium', fontweight='semibold', transform=axs_early.transAxes,
                   bbox=dict(facecolor='none', edgecolor='k', pad=3))
    axs_early.text(0.2, 0.5, "Mean Difference: \n" + str(mean_diff), ha='center', va='center', color='k',
                   fontsize='medium', fontweight='semibold', transform=axs_early.transAxes,
                   bbox=dict(facecolor='none', edgecolor='k', pad=3))
    axs_early.text(0.21, 0.1, "Median Difference: \n" + str(median_diff), ha='center', va='center', color='k',
                   fontsize='medium', fontweight='semibold', transform=axs_early.transAxes,
                   bbox=dict(facecolor='none', edgecolor='k', pad=3))
Beispiel #14
0
 def test_results_whiskers_percentiles(self):
     results = cbook.boxplot_stats(self.data, whis=[5, 95])
     res = results[0]
     for key, value in self.known_res_percentiles.items():
         assert_array_almost_equal(res[key], value)
Beispiel #15
0
 def test_results_whiskers_float(self):
     results = cbook.boxplot_stats(self.data, whis=3)
     res = results[0]
     for key, value in self.known_whis3_res.items():
         assert_array_almost_equal(res[key], value)
Beispiel #16
0
A good general reference on boxplots and their history can be found
here: http://vita.had.co.nz/papers/boxplots.pdf
"""

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook

# fake data
np.random.seed(19680801)
data = np.random.lognormal(size=(37, 4), mean=1.5, sigma=1.75)
labels = list('ABCD')

# compute the boxplot stats
stats = cbook.boxplot_stats(data, labels=labels, bootstrap=10000)

###############################################################################
# After we've computed the stats, we can go through and change anything.
# Just to prove it, I'll set the median of each set to the median of all
# the data, and double the means

for n in range(len(stats)):
    stats[n]['med'] = np.median(data)
    stats[n]['mean'] *= 2

print(list(stats[0]))

fs = 10  # fontsize

###############################################################################
Beispiel #17
0
print('upper whisk group one: ', upper_whisk_group_one)
print('lower whisk group one: ', lower_whisk_group_one)
'''
Q 2(c): Boxplot of x
'''
sns.boxplot(col, orient='vertical', color='yellow')
plt.show()
'''
Q 2(d): boxplots for each group and overall boxplot
'''
# splitting the overall data into groups
overall_data = file.copy()
overall_data['group'] = 'all data'
group_zero = file.loc[file['group'] == 0, :]
group_one = file.loc[file['group'] == 1, :]

# combining the data segments for the box plot
combined = pd.concat([overall_data, group_zero, group_one], axis=0)
sns.boxplot(x=combined['group'], y=combined['x'])
plt.show()

# obtaining the outlier values
stats = boxplot_stats(overall_data['x'])
print('outliers for overall data: ', stats[0]['fliers'])

stats = boxplot_stats(group_one['x'])
print('outliers for group one: ', stats[0]['fliers'])

stats = boxplot_stats(group_zero['x'])
print('outliers for group zero: ', stats[0]['fliers'])
Beispiel #18
0
def boxplt(dataset):
    "prepare data for box plot"
    df = dataset.drop('class', 1)
    df1 = df.as_matrix()
    stats = cbook.boxplot_stats(df1)
    return stats
def remove_outliers(x, of):
    stat = boxplot_stats(x[of])[0]
    low, high = stat["whislo"], stat["whishi"]
    return x.loc[(x[of] > low) & (x[of] < high)]
 def test_label_error(self):
     labels = [1, 2]
     results = cbook.boxplot_stats(self.data, labels=labels)
Beispiel #21
0
 def test_bad_dims(self):
     data = np.random.normal(size=(34, 34, 34))
     results = cbook.boxplot_stats(data)
Beispiel #22
0
 def test_label_error(self):
     labels = [1, 2]
     results = cbook.boxplot_stats(self.data, labels=labels)
Beispiel #23
0
 def test_results_bootstrapped(self):
     results = cbook.boxplot_stats(self.data, bootstrap=10000)
     res = results[0]
     for key in list(self.known_bootstrapped_ci.keys()):
         assert_approx_equal(res[key], self.known_bootstrapped_ci[key])
Beispiel #24
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('sequence_name', help='dataset sequence name')
    parser.add_argument(
        '--diff_list',
        help=
        'string - name of diff_list file (needed for Graph 1, output of depth_map.py)'
    )
    parser.add_argument(
        '--graph_depths',
        help=
        'directory - graph files (needed for Graphs 2-5, output of depth_map.py)'
    )
    parser.add_argument(
        '--x_axis_spacing',
        help='integer - separation among ticks in the x axis (for readability))'
    )
    args = parser.parse_args()

    make_fig_1(args)

    # depth vs errors (amount, mean)
    # load all graph_depth*.npy files

    graph_depth_dir = "./depth_info/"
    if args.graph_depths:
        graph_depth_dir = args.graph_depths

    x_axis_spacing = 5
    if args.x_axis_spacing and args.x_axis_spacing >= 1:
        x_axis_spacing = int(float(args.x_axis_spacing))

    print "Using graph depth dir: ", graph_depth_dir
    npys = glob.glob(graph_depth_dir + 'graph_depth*.npy')

    if len(npys) <= 0:
        print "No data to collect.."
        return

    bins = np.load(graph_depth_dir + 'bins.npy')
    print bins
    graph_depth = [[] for i in range(len(bins))]

    fig, ax = plt.subplots(1, 1)
    bxpstats = list()
    graphs = []
    for npy in npys:
        g = np.load(npy)[1]
        if len(g) > 0:
            graphs.append(g)

    means = np.zeros(len(bins))
    medians = np.zeros(len(bins))
    maxs = np.zeros(len(bins))
    for i in range(len(bins)):
        graph_depth = []

        for j in range(len(npys)):
            if i < len(graphs[j]):
                if len(graphs[j][i]) > 0:
                    graph_depth.extend(graphs[j][i])

        if len(graph_depth) > 0:
            means[i] = np.mean(graph_depth)
            medians[i] = np.median(graph_depth)
            maxs[i] = np.max(graph_depth)
            bxpstats.extend(cbook.boxplot_stats(np.ravel(graph_depth)))
        else:
            bxpstats.extend(cbook.boxplot_stats(np.ravel([0])))

        print "ITEM : ", i, len(graph_depth)

    ax.bxp(bxpstats, showfliers=False)
    bins_str = map(
        lambda x: str(int(bins[x])) if x % x_axis_spacing == 0 else '',
        range(len(bins)))

    # bins-bins[0]+1 since it can start at any number
    plt.xticks(bins - bins[0] + 1, bins_str)
    plt.xlabel(utf8("Distance to the camera (depth, m)"))
    plt.ylabel("Error (m)")
    plt.savefig(args.sequence_name + "2.png")

    # mean
    plt.figure(3)
    plt.plot(bins, means)
    plt.xlabel(utf8("Distance to the camera (depth, m)"))
    plt.ylabel("Error - mean (m)")
    plt.savefig(args.sequence_name + "3.png")

    plt.figure(4)
    plt.plot(bins, medians)
    plt.xlabel(utf8("Distance to the camera (depth, m)"))
    plt.ylabel("Error - median (m)")
    plt.savefig(args.sequence_name + "4.png")

    np.save("depth_info/medians" + args.sequence_name + ".npy", medians)

    plt.figure(5)
    plt.plot(bins, maxs)
    plt.xlabel(utf8("Distance to the camera (depth, m)"))
    plt.ylabel(utf8("Error - máximo (m)"))
    plt.savefig(args.sequence_name + "5.png")

    print ""
    print "Saved " + args.sequence_name + "{1-5}.png files"
Beispiel #25
0
def season_boxplot(reload_data=False):
    with open("log_sigma3_trim_bundarys_list.json", "r") as f:
        bundarys_list = json.load(f)

    for plt_i in range(len(pollutants)):
        # for plt_i in range(1):
        if reload_data:
            get_data_dict(plt_i, bundarys_list[pollutants[plt_i]], method="clip")

        data_dict = load_data_dict()

        c = 0
        for i in stations:
            for j in range(len(seasons)):
                for k in data_dict[i][j]:
                    c += len(data_dict[i][j][k])
        print(
            "Size of {} data: {}MB\nCount of data: {}".format(
                pollutants[plt_i], sys.getsizeof(data_dict) / 1024 ** 2, c
            )
        )

        fig, axs = plt.subplots(2, 1, figsize=[10, 8])
        box_width = 0.2
        offest = 0.05
        positions = np.linspace(0, 3, 4)
        box_data = []
        box_log_data = []
        for i_index in range(len(stations)):
            i = stations[i_index]
            box_stats = []
            box_stats_log = []
            for j in range(len(seasons)):
                all_years = []
                for k in data_dict[i][j]:
                    all_years += data_dict[i][j][k]
                box_stats += cbook.boxplot_stats(all_years)
                box_stats_log += cbook.boxplot_stats(np.log(all_years))

            box_color = box_colors[i_index]
            _ = axs[0].bxp(
                box_stats,
                widths=box_width,
                showfliers=True,
                boxprops={"color": box_color},
                whiskerprops={"color": box_color},
                capprops={"color": box_color},
                medianprops={"color": box_color},
                flierprops={"color": box_color, "marker": "+"},
                positions=positions + i_index * (box_width + offest),
            )
            _ = axs[1].bxp(
                box_stats_log,
                widths=box_width,
                showfliers=True,
                boxprops={"color": box_color},
                whiskerprops={"color": box_color},
                capprops={"color": box_color},
                medianprops={"color": box_color},
                flierprops={"color": box_color, "marker": "+"},
                positions=positions + i_index * (box_width + offest),
            )
            box_data.append(box_stats)
            box_log_data.append(box_stats_log)

        patches = [
            mpatches.Patch(color=box_colors[i], label=station_names[i])
            for i in range(len(stations))
        ]

        axs[0].set_ylabel(pollutant_labels[plt_i])
        axs[1].set_ylabel(pollutant_log_labels[plt_i])
        scale_ls = positions + box_width + offest
        axs[0].set_xticks([])
        axs[1].set_xticks(scale_ls)
        axs[1].set_xticklabels(seasons)
        axs[0].legend(handles=patches, bbox_to_anchor=(1.1, 1.3), ncol=3)
        filename = "{}_log_box_fliers.png".format(pollutants[plt_i])
        fig.savefig(filename, dpi=300, bbox_inches="tight")

        for i in box_data:
            for j in i:
                j.pop("fliers")
        for i in box_log_data:
            for j in i:
                j.pop("fliers")

        with open("{}_box_data.json".format(pollutants[plt_i]), "w") as f:
            json.dump(box_data, f)
        with open("{}_box_log_data.json".format(pollutants[plt_i]), "w") as f:
            json.dump(box_log_data, f)
 def test_results_withlabels(self):
     labels = ['Test1', 2, 3, 4]
     results = cbook.boxplot_stats(self.data, labels=labels)
     res = results[0]
     for key in list(self.known_res_with_labels.keys()):
         assert_equal(res[key], self.known_res_with_labels[key])
Beispiel #27
0
 def test_results_bootstrapped(self):
     results = cbook.boxplot_stats(self.data, bootstrap=10000)
     res = results[0]
     for key, value in self.known_bootstrapped_ci.items():
         assert_approx_equal(res[key], value)
 def test_bad_dims(self):
     data = np.random.normal(size=(34, 34, 34))
     results = cbook.boxplot_stats(data)
Beispiel #29
0
 def test_results_whiskers_float(self):
     results = cbook.boxplot_stats(self.data, whis=3)
     res = results[0]
     for key, value in self.known_whis3_res.items():
         assert_array_almost_equal(res[key], value)
Beispiel #30
0
def plotlyGCbias(file_name, frequencies, reads_per_gc, region_size):
    import plotly.offline as py
    import plotly.graph_objs as go
    import matplotlib.cbook as cbook

    fig = go.Figure()
    fig['layout']['xaxis1'] = dict(domain=[0.0, 1.0],
                                   anchor="y1",
                                   title="GC fraction")
    fig['layout']['yaxis1'] = dict(domain=[0.55, 1.0],
                                   anchor="x1",
                                   title="Number of reads")
    fig['layout']['xaxis2'] = dict(domain=[0.0, 1.0],
                                   anchor="y2",
                                   title="GC fraction",
                                   range=[0.2, 0.7])
    fig['layout']['yaxis2'] = dict(domain=[0.0, 0.45],
                                   anchor="x2",
                                   title="log2(observed/expected)")
    text = "reads per {} base region".format(region_size)
    annos = [{
        'yanchor': 'bottom',
        'xref': 'paper',
        'xanchor': 'center',
        'yref': 'paper',
        'text': text,
        'y': 1.0,
        'x': 0.5,
        'font': {
            'size': 16
        },
        'showarrow': False
    }]
    text = "normalized observed/expected read counts"
    annos.append({
        'yanchor': 'bottom',
        'xref': 'paper',
        'xanchor': 'center',
        'yref': 'paper',
        'text': text,
        'y': 0.5,
        'x': 0.5,
        'font': {
            'size': 16
        },
        'showarrow': False
    })

    # prepare data for boxplot
    reads, GC = reads_per_gc.T
    reads_per_gc, bin_labels = bin_by(reads, GC, nbins=100)
    to_keep = [idx for idx, x in enumerate(bin_labels) if 0.2 <= x <= 0.7]
    reads_per_gc = [reads_per_gc[x] for x in to_keep]
    bin_labels = [bin_labels[x] for x in to_keep]

    # produce the same boxplot as matplotlib as vastly reduce the output file size
    bins = []
    for b in reads_per_gc:
        s = cbook.boxplot_stats(b)[0]
        bins.append([
            s['whislo'], s['q1'], s['q1'], s['med'], s['med'], s['med'],
            s['q3'], s['q3'], s['whishi']
        ])

    data = []

    # top plot
    for x, y in zip(bin_labels, bins):
        trace = go.Box(x=x,
                       y=y,
                       xaxis='x1',
                       yaxis='y1',
                       boxpoints='outliers',
                       showlegend=False,
                       name="{}".format(x),
                       line=dict(color='rgb(107,174,214)'))
        data.append(trace)

    # bottom plot
    x = np.linspace(0, 1, frequencies.shape[0])
    trace = go.Scatter(x=x,
                       y=np.log2(frequencies[:, 2]),
                       xaxis='x2',
                       yaxis='y2',
                       showlegend=False,
                       line=dict(color='rgb(107,174,214)'))
    data.append(trace)
    fig['data'] = data
    fig['layout']['annotations'] = annos
    py.plot(fig, filename=file_name, auto_open=False)
Beispiel #31
0
 def test_results_whiskers_range(self):
     results = cbook.boxplot_stats(self.data, whis=[0, 100])
     res = results[0]
     for key, value in self.known_res_range.items():
         assert_array_almost_equal(res[key], value)
Beispiel #32
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('sequence_name', help='dataset sequence name')
  parser.add_argument('--diff_list', help='string - name of diff_list file (needed for Graph 1, output of depth_map.py)')
  parser.add_argument('--graph_depths', help='directory - graph files (needed for Graphs 2-5, output of depth_map.py)')
  parser.add_argument('--x_axis_spacing', help='integer - separation among ticks in the x axis (for readability))')
  args = parser.parse_args()

  make_fig_1(args)

  # depth vs errors (amount, mean)
  # load all graph_depth*.npy files

  graph_depth_dir = "./depth_info/"
  if args.graph_depths:
    graph_depth_dir = args.graph_depths

  x_axis_spacing = 5
  if args.x_axis_spacing and args.x_axis_spacing >= 1:
    x_axis_spacing = int(float(args.x_axis_spacing))


  print "Using graph depth dir: ", graph_depth_dir
  npys = glob.glob(graph_depth_dir+'graph_depth*.npy')

  if len(npys)<=0 :
    print "No data to collect.."
    return

  bins = np.load(graph_depth_dir+'bins.npy')
  print bins
  graph_depth = [[] for i in range(len(bins))]

  fig, ax = plt.subplots(1,1)
  bxpstats = list()
  graphs = []
  for npy in npys:
    g = np.load(npy)[1]
    if len(g) > 0:
      graphs.append(g)

  means = np.zeros(len(bins))
  medians = np.zeros(len(bins))
  maxs = np.zeros(len(bins))
  for i in range(len(bins)):
    graph_depth = []

    for j in range(len(npys)):
      if i < len(graphs[j]):
        if len(graphs[j][i]) > 0:
          graph_depth.extend(graphs[j][i])

    if len(graph_depth) > 0:
      means[i] = np.mean(graph_depth)
      medians[i] = np.median(graph_depth)
      maxs[i] = np.max(graph_depth)
      bxpstats.extend(cbook.boxplot_stats(np.ravel(graph_depth)))
    else:
      bxpstats.extend(cbook.boxplot_stats(np.ravel([0])))

    print "ITEM : " , i, len(graph_depth)

  ax.bxp(bxpstats, showfliers=False)
  bins_str = map(lambda x: str(int(bins[x])) if x % x_axis_spacing == 0 else '', range(len(bins)))

  # bins-bins[0]+1 since it can start at any number
  plt.xticks(bins-bins[0]+1, bins_str)
  plt.xlabel(utf8("Distance to the camera (depth, m)"))
  plt.ylabel("Error (m)")
  plt.savefig(args.sequence_name+"2.png")

  # mean
  plt.figure(3)
  plt.plot(bins, means)
  plt.xlabel(utf8("Distance to the camera (depth, m)"))
  plt.ylabel("Error - mean (m)")
  plt.savefig(args.sequence_name+"3.png")

  plt.figure(4)
  plt.plot(bins, medians)
  plt.xlabel(utf8("Distance to the camera (depth, m)"))
  plt.ylabel("Error - median (m)")
  plt.savefig(args.sequence_name+"4.png")

  np.save("depth_info/medians"+args.sequence_name+".npy", medians)

  plt.figure(5)
  plt.plot(bins, maxs)
  plt.xlabel(utf8("Distance to the camera (depth, m)"))
  plt.ylabel(utf8("Error - máximo (m)"))
  plt.savefig(args.sequence_name+"5.png")

  print ""
  print "Saved " + args.sequence_name + "{1-5}.png files"
Beispiel #33
0
 def test_label_error(self):
     labels = [1, 2]
     with pytest.raises(ValueError):
         results = cbook.boxplot_stats(self.data, labels=labels)
Beispiel #34
0
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cbook
np.random.seed(0)

fig, ax = plt.subplots(figsize=(4, 6))
ax.set_yscale('log')
data = np.random.lognormal(-1.75, 2.75, size=37)

stats = cbook.boxplot_stats(data, labels=['arithmetic'])
logstats = cbook.boxplot_stats(np.log(data), labels=['log-transformed'])

for lsdict in logstats:
    for key, value in lsdict.items():
        if key != 'label':
            lsdict[key] = np.exp(value)

stats.extend(logstats)
ax.bxp(stats)
fig.show()
Beispiel #35
0
    st.markdown("# Missing Values")
    st.write(full_df.isnull().any())

    st.markdown("# Boxplots and Histograms")
    st.markdown("## Drop useless columns")
    drop_cols = [
        "LocID", "Country", "Time", "MidPeriod", "Code", "Unnamed: 0",
        "country", "year", "ranking"
    ]
    st.write(drop_cols)
    small_df = full_df.drop(columns=drop_cols)

    st.markdown("## Plot those with more than 3 outliers")
    plot_cols = [
        column for column in small_df.columns if len([
            y for stat in boxplot_stats(small_df[column])
            for y in stat['fliers']
        ]) > 3
    ]
    st.write(plot_cols)
    _, axes = plt.subplots(nrows=len(plot_cols), ncols=2, figsize=(10, 150))
    for i, column in enumerate(plot_cols):
        small_df.boxplot(column=column, ax=axes[i][0])
        small_df.hist(column=column, ax=axes[i][1])
    st.pyplot()

    st.markdown("## Print rows of max outliers")
    max_indices = small_df[plot_cols].idxmax(axis=0)
    for column in [
            "Deaths", "DeathsMale", "DeathsFemale", "CNMR", "GrowthRate",
            "RelMigrations", "change_from_previous_year"
Beispiel #36
0
 def test_results_bootstrapped(self):
     results = cbook.boxplot_stats(self.data, bootstrap=10000)
     res = results[0]
     for key, value in self.known_bootstrapped_ci.items():
         assert_approx_equal(res[key], value)
Beispiel #37
0
A good general reference on boxplots and their history can be found
here: http://vita.had.co.nz/papers/boxplots.pdf
"""

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cbook as cbook

# fake data
np.random.seed(19680801)
data = np.random.lognormal(size=(37, 4), mean=1.5, sigma=1.75)
labels = list('ABCD')

# compute the boxplot stats
stats = cbook.boxplot_stats(data, labels=labels, bootstrap=10000)

###############################################################################
# After we've computed the stats, we can go through and change anything.
# Just to prove it, I'll set the median of each set to the median of all
# the data, and double the means

for n in range(len(stats)):
    stats[n]['med'] = np.median(data)
    stats[n]['mean'] *= 2

print(list(stats[0]))

fs = 10  # fontsize

###############################################################################
Beispiel #38
0
 def test_results_whiskers_range(self):
     results = cbook.boxplot_stats(self.data, whis='range')
     res = results[0]
     for key, value in self.known_res_range.items():
         assert_array_almost_equal(res[key], value)
wcls = sumbrief[sumbrief['Experiment'].str.contains('_WCL')]
wcl = wcls[~wcls['Experiment'].str.contains('_WCLP')]
wclp = wcls[wcls['Experiment'].str.contains('_WCLP')]
ubs = sumbrief[sumbrief['Experiment'].str.contains('_Ub')]
ub = ubs[~ubs['Experiment'].str.contains('_UbP')]
ubp = ubs[ubs['Experiment'].str.contains('_UbP')]

#print wcl
#print wclp
#print ub
#print ubp


# compute the boxplot stats
ubstats = cbook.boxplot_stats(ub[["MS/MS Identified"]].values, whis='range', bootstrap=None, labels=None)
ubpstats = cbook.boxplot_stats(ubp[["MS/MS Identified"]].values, whis='range', bootstrap=None, labels=None)
wclstats = cbook.boxplot_stats(wcl[["MS/MS Identified"]].values, whis='range', bootstrap=None, labels=None)
wclpstats = cbook.boxplot_stats(wclp[["MS/MS Identified"]].values, whis='range', bootstrap=None, labels=None)

fs = 10 # fontsize

# demonstrate how to toggle the display of different elements:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(4,4))
axes[0, 0].bxp(ubstats)
axes[0, 0].set_title('ub', fontsize=fs)

axes[0, 1].bxp(ubpstats)
axes[0, 1].set_title('ubp', fontsize=fs)

axes[1, 0].bxp(wclstats)
Beispiel #40
0
 def test_results_whiskers_percentiles(self):
     results = cbook.boxplot_stats(self.data, whis=[5, 95])
     res = results[0]
     for key, value in self.known_res_percentiles.items():
         assert_array_almost_equal(res[key], value)
#            flist.sort()   
            t_start.append(dt.datetime.strptime(name[:15], '%Y%m%d_%H%M%S'))
            t_end.append(dt.datetime.strptime(name[17:32], '%Y%m%d_%H%M%S'))                                          
            if file_paths.startswith(('/data/stor/basic_data/tri_data/rink/proc_data/d0728/INT/REC/')):
                with open(file_paths,'rb') as f:
                    temp= f.read()
                    pha_= np.fromfile(file_paths, dtype='>f')
                    pha_[pha_==0] = np.nan
                    pha_rectangle = np.reshape(pha_, (1559,845))
                    vlos = (-0.0175*pha_rectangle)/(4* 3.14159*(2.5/1440)) #LOS Speeds
                    gla.append(vlos[673,233])
                    rock0.append(vlos[832,328]) #rock velocities
                    #Atmosphere
                    rockfav = np.array(vlos[790:815,340:365]) #rock square for noise analysis
                    unravel = rockfav.ravel()
                    stats['C'] = cbook.boxplot_stats(unravel, labels='C')[0]
                    iqrstats.append(stats['C'])
                    median = st.median(unravel)
                    medians.append(median)
                    means.append(np.mean(unravel))
                    true_mean = np.mean(unravel) +2*(np.std(unravel)/np.sqrt(400))
                    t_mean.append(true_mean)
                    snratio= unravel/np.std(unravel)
                    snratios.append(snratio)
                    q1.append(np.percentile(unravel, 25, interpolation = 'midpoint'))
                    q2.append(np.percentile(unravel, 50, interpolation = 'midpoint'))
                    q3.append(np.percentile(unravel, 75, interpolation = 'midpoint'))
                    IQR = np.percentile(unravel, 75, interpolation = 'midpoint') - np.percentile(unravel, 25, interpolation = 'midpoint')
                    iqrs.append(IQR)
                    inter_quart = iqr(unravel)
                    inter.append(inter_quart)
Beispiel #42
0
 def test_bad_dims(self):
     data = np.random.normal(size=(34, 34, 34))
     with pytest.raises(ValueError):
         results = cbook.boxplot_stats(data)
Beispiel #43
0
 def test_label_error(self):
     labels = [1, 2]
     with pytest.raises(ValueError):
         cbook.boxplot_stats(self.data, labels=labels)
Beispiel #44
0
 def test_bad_dims(self):
     data = np.random.normal(size=(34, 34, 34))
     with pytest.raises(ValueError):
         cbook.boxplot_stats(data)
Beispiel #45
0
def figure1b(drip_boot):
    import matplotlib.cbook as cbook
    graph_data = []
    graph_ho_data = []
    graph_cd_data = []
    ylim_range = (0.29, 0.51)
    for sample in ['control'] + samples:
        nrow, ncol = drip_boot[sample].shape
        sample_data = []
        sample_ho_data = []
        sample_cd_data = []
        for i in range(nrow):
            assert len(drip_boot[sample][i, :]) == ncol, len(
                drip_boot[sample][i, :])
            sample_data.append((drip_boot[sample][i, 60:180]).mean())
            sample_ho_data.append((drip_boot[sample][i, 60:120]).mean())
            sample_cd_data.append((drip_boot[sample][i, 120:180]).mean())
        assert len(sample_data) == nrow
        stat_data = cbook.boxplot_stats(sample_data)[0]
        graph_data.append(stat_data)

        stat_ho_data = cbook.boxplot_stats(sample_ho_data)[0]
        graph_ho_data.append(stat_ho_data)

        stat_cd_data = cbook.boxplot_stats(sample_cd_data)[0]
        graph_cd_data.append(stat_cd_data)

    y_axis_formatter = matplotlib.ticker.ScalarFormatter(useOffset=True,
                                                         useMathText=True,
                                                         useLocale=None)
    y_axis_formatter.set_powerlimits((-1, 1))
    y_axis_formatter.set_scientific(True)

    ax = plt.axes()
    ax.bxp(graph_data, widths=0.3, showfliers=False)
    plt.xticks(
        [1, 2, 3, 4, 5],
        [sample_dict[i].replace(' ', '\n') for i in ['control'] + samples],
        fontsize=12)
    plt.title('Average DRIP-seq readcount in 12kb window', fontsize=12)
    plt.ylabel('average DRIP-seq readcount', fontsize=12)
    plt.ylim(ylim_range)
    ax.yaxis.set_major_formatter(y_axis_formatter)
    ax.yaxis.set_offset_position('left')
    out_fig_route = os.path.join('.', 'figure', 'fig1b.png')
    plt.savefig(out_fig_route)
    plt.close()

    ax = plt.axes()
    ax.bxp(graph_ho_data, widths=0.3, showfliers=False)
    plt.xticks(
        [1, 2, 3, 4, 5],
        [sample_dict[i].replace(' ', '\n') for i in ['control'] + samples],
        fontsize=12)
    plt.title('Average DRIP-seq readcount in 6kb HO region', fontsize=12)
    plt.ylabel('average DRIP-seq readcount', fontsize=12)
    plt.ylim(ylim_range)
    ax.yaxis.set_major_formatter(y_axis_formatter)
    ax.yaxis.set_offset_position('left')
    out_fig_route = os.path.join('.', 'figure', 'fig1b_ho.png')
    plt.savefig(out_fig_route)
    plt.close()

    ax = plt.axes()
    ax.bxp(graph_cd_data, widths=0.3, showfliers=False)
    plt.xticks(
        [1, 2, 3, 4, 5],
        [sample_dict[i].replace(' ', '\n') for i in ['control'] + samples],
        fontsize=12)
    plt.title('Average DRIP-seq readcount in 6kb CD region', fontsize=12)
    plt.ylabel('average DRIP-seq readcount', fontsize=12)
    plt.ylim(ylim_range)
    ax.yaxis.set_major_formatter(y_axis_formatter)
    ax.yaxis.set_offset_position('left')
    out_fig_route = os.path.join('.', 'figure', 'fig1b_cd.png')
    plt.savefig(out_fig_route)
    plt.close()