def test_hist_no_overlap(self):
     from matplotlib.pyplot import subplot, gcf
     x = Series(randn(2))
     y = Series(randn(2))
     subplot(121)
     x.hist()
     subplot(122)
     y.hist()
     fig = gcf()
     axes = fig.get_axes()
     self.assertEqual(len(axes), 2)
Beispiel #2
0
 def test_hist_no_overlap(self):
     from matplotlib.pyplot import subplot, gcf
     x = Series(randn(2))
     y = Series(randn(2))
     subplot(121)
     x.hist()
     subplot(122)
     y.hist()
     fig = gcf()
     axes = fig.axes if self.mpl_ge_1_5_0 else fig.get_axes()
     assert len(axes) == 2
Beispiel #3
0
# Plot histogram of tip_pct
tips['tip_pct'].hist(bins=50, alpha=0.3, color='r')

# plot density plot (KDE = kernel density estimation)
tips['tip_pct'].plot(kind='kde')



# Bimodal example
fig = plt.figure()
comp1 = np.random.normal(0, 1, size=200)  # N(0, 1)
comp2 = np.random.normal(10, 2, size=200)  # N(10, 4)
values = Series(np.concatenate([comp1, comp2]))

values.hist(bins=100, alpha=0.3, color='g', normed=True)
values.plot(kind='kde', style='r-')
draw()

# Scatterplot
plt.figure()
macro = pd.read_csv('../../pydata-book/ch08/macrodata.csv')
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
trans_data = np.log(data).diff().dropna()

plt.scatter(trans_data['m1'], trans_data['unemp'])
plt.title('Changes in log %s vs log %s' % ('m1', 'unemp'))

# scatter matrix
pd.scatter_matrix(trans_data, diagonal='kde', color='b', alpha=0.3)
    freqs = dict()
    total = float(sum(counts.values()))
    for ipos, count in counts.items():
        freqs[ipos] = count/total
    return freqs

def sequence_entropy(sequence):
    counts = count_positions(sequence)
    freqs = relative_frequency(counts)
    entropy = 0.0
    for ipos, freq in freqs.items():
        entropy += freq * math.log(1/freq, 2)
    return entropy

def main(sequences):
    sequences = map(lambda x: Sequence(x.split("[")[0]), sequences)
    entropies = map(sequence_entropy, sequences)
    return entropies

if __name__ == '__main__':
    import sys
    import os
    sequence_file = sys.argv[1]
    seqs = open(sequence_file).readlines()
    entropies = main(seqs)
    from matplotlib import pyplot as plt
    from pandas import Series
    es = Series(entropies)
    es.hist()
    plt.savefig(os.path.splitext(sequence_file)[0] + "_entropy_hist.png")
p3_2 = normed[1] - normed[0]
np.allclose(p3_1, p3_2)

# p88
p4_binom = scipy.stats.binom.pmf(6, n=10, p=0.5)
params4 = {
    "loc": 10 * 0.5,
    "scale": np.sqrt(10 * 0.5 * (1 - 0.5))
}
norm4 = norm.cdf([5.5, 6.5], **params4)
p4_norm = norm4[1] - norm4[0]
x4 = scipy.linspace(0, 10, 11)
data4_norm = Series(norm.pdf(x4, **params4), index=x4)
data4_norm.plot(ax=axes[0][1], kind="bar", width=1,
                title="B(10,0.5) and N(%.0f, %0.2f)" % (params4["loc"], params4["scale"]))
data4_binom = Series(scipy.stats.binom.pmf(x4, n=10, p=0.5), index=x4)
data4_binom.plot(ax=axes[0][1], color="r")

# p90
means_5 = []
for _ in range(200):
    samples = np.random.standard_t(5, 10)
    means_5.append(samples.mean())

data5 = Series(means_5)
n5 = np.ceil(1 + np.log2(data5.size))
axes[1][1].set_title("Random samples with mean 0, variance 1.67")
data5.hist(bins=n5, ax=axes[1][1], normed=True)

plt.show()
Beispiel #6
0
 def plot(self):
     vals = Series(self.summary)
     ax = vals.hist()
     ax.set_title("%s Histogram" % self.metric.name)
     return ax
Beispiel #7
0
def histogram(data):
    s = Series(data)
    plt.figure()
    s.hist(color='k', alpha=0.5, bins=50)
    plt.show()
        
def slide_12_2():
    comp1 = np.random.normal(0, 1, size=200)
    comp2 = np.random.normal(10, 2, size=200)
    values = Series(np.concatenate([comp1, comp2]))
    values.hist(bins=100, alpha=0.3, color='k', normed=True)
    values.plot(kind='kde', style='k--')