def plot_call_summary(self): """ """ # Gets the most expensive 1000 calls top_n_expensive_calls = self.df.sort_index(by='elapsed', ascending=False)[:1000] calls = [ call for call, group in top_n_expensive_calls.groupby('call') ] data = [] for call in calls: call_info = self.df[self.df['call']==call]['elapsed'] call_data = {} call_data['call'] = call call_data['mean'] = call_info.mean() call_data['count'] = call_info.count() call_data['max'] = call_info.max() call_data['min'] = call_info.min() data.append(call_data) call_data_df = DataFrame(data) fig = plt.figure() fig.suptitle('{0} - {1}'.format(self.min_timestamp, self.max_timestamp), fontsize=16) ax = fig.add_subplot(3, 1, 1) #ax.get_xaxis().set_visible(False) self.df[self.df['elapsed']>=5].sort_index(by='timestamp').plot(title='Response time > 5 seconds', ax=ax, x='timestamp', y='elapsed') ax = fig.add_subplot(3, 1, 2) call_data_df[['call', 'min', 'max', 'mean']].set_index('call').plot(title='Response Time', ax=ax, kind='barh') ax = fig.add_subplot(3, 1, 3) call_data_df.plot(title='Call Count', ax=ax, x='call', y='count', kind='barh') fig.show() self.plot_calls_distribution(calls)
def test_time(self): t = datetime(1, 1, 1, 3, 30, 0) deltas = np.random.randint(1, 20, 3).cumsum() ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]) df = DataFrame({'a': np.random.randn(len(ts)), 'b': np.random.randn(len(ts))}, index=ts) _, ax = self.plt.subplots() df.plot(ax=ax) # verify tick labels ticks = ax.get_xticks() labels = ax.get_xticklabels() for t, l in zip(ticks, labels): m, s = divmod(int(t), 60) h, m = divmod(m, 60) xp = l.get_text() if len(xp) > 0: rs = time(h, m, s).strftime('%H:%M:%S') assert xp == rs # change xlim ax.set_xlim('1:30', '5:00') # check tick labels again ticks = ax.get_xticks() labels = ax.get_xticklabels() for t, l in zip(ticks, labels): m, s = divmod(int(t), 60) h, m = divmod(m, 60) xp = l.get_text() if len(xp) > 0: rs = time(h, m, s).strftime('%H:%M:%S') assert xp == rs
def test_fontsize_set_correctly(self): # For issue #8765 df = DataFrame(np.random.randn(10, 9), index=range(10)) fig, ax = self.plt.subplots() df.plot(fontsize=2, ax=ax) for label in (ax.get_xticklabels() + ax.get_yticklabels()): assert label.get_fontsize() == 2
def test_bar_colors(self): import matplotlib.pyplot as plt import matplotlib.colors as colors default_colors = plt.rcParams.get('axes.color_cycle') custom_colors = 'rgcby' plt.close('all') df = DataFrame(np.random.randn(5, 5)) ax = df.plot(kind='bar') rects = ax.patches conv = colors.colorConverter for i, rect in enumerate(rects[::5]): xp = conv.to_rgba(default_colors[i % len(default_colors)]) rs = rect.get_facecolor() self.assert_(xp == rs) plt.close('all') ax = df.plot(kind='bar', color=custom_colors) rects = ax.patches conv = colors.colorConverter for i, rect in enumerate(rects[::5]): xp = conv.to_rgba(custom_colors[i]) rs = rect.get_facecolor() self.assert_(xp == rs) plt.close('all') df.ix[:, [0]].plot(kind='bar', color='DodgerBlue')
def test_line_colors(self): import matplotlib.pyplot as plt import sys from StringIO import StringIO custom_colors = 'rgcby' plt.close('all') df = DataFrame(np.random.randn(5, 5)) ax = df.plot(color=custom_colors) lines = ax.get_lines() for i, l in enumerate(lines): xp = custom_colors[i] rs = l.get_color() self.assert_(xp == rs) tmp = sys.stderr sys.stderr = StringIO() try: plt.close('all') ax2 = df.plot(colors=custom_colors) lines2 = ax2.get_lines() for l1, l2 in zip(lines, lines2): self.assert_(l1.get_color(), l2.get_color()) finally: sys.stderr = tmp # make color a list if plotting one column frame # handles cases like df.plot(color='DodgerBlue') plt.close('all') df.ix[:, [0]].plot(color='DodgerBlue')
def test_bar_colors(self): import matplotlib.pyplot as plt import matplotlib.colors as colors default_colors = 'brgyk' custom_colors = 'rgcby' plt.close('all') df = DataFrame(np.random.randn(5, 5)) ax = df.plot(kind='bar') rects = ax.patches conv = colors.colorConverter for i, rect in enumerate(rects[:5]): xp = conv.to_rgba(default_colors[i]) rs = rect.get_facecolor() self.assert_(xp, rs) plt.close('all') ax = df.plot(kind='bar', color=custom_colors) rects = ax.patches conv = colors.colorConverter for i, rect in enumerate(rects[:5]): xp = conv.to_rgba(custom_colors[i]) rs = rect.get_facecolor() self.assert_(xp, rs)
def test_bar_colors(self): import matplotlib.pyplot as plt import matplotlib.colors as colors default_colors = "brgyk" custom_colors = "rgcby" plt.close("all") df = DataFrame(np.random.randn(5, 5)) ax = df.plot(kind="bar") rects = ax.patches conv = colors.colorConverter for i, rect in enumerate(rects[::5]): xp = conv.to_rgba(default_colors[i]) rs = rect.get_facecolor() self.assert_(xp == rs) plt.close("all") ax = df.plot(kind="bar", color=custom_colors) rects = ax.patches conv = colors.colorConverter for i, rect in enumerate(rects[::5]): xp = conv.to_rgba(custom_colors[i]) rs = rect.get_facecolor() self.assert_(xp == rs) plt.close("all") df.ix[:, [0]].plot(kind="bar", color="DodgerBlue")
def fare_analyze(is_plot=True): fare_test = titanic_df['Fare'] print '+' * 40 print fare_test.head() fare_not_survived = titanic_df["Fare"][titanic_df["Survived"] == 0] fare_survived = titanic_df["Fare"][titanic_df["Survived"] == 1] fare_not_survived = titanic_df['Fare'][titanic_df['Survived'] == 0] fare_survived = titanic_df['Fare'][titanic_df['Survived'] == 1] # 计算存活和死亡的均值和方差 avgerage_fare = DataFrame([fare_not_survived.mean(), fare_survived.mean()]) std_fare = DataFrame([fare_not_survived.std(), fare_survived.std()]) avgerage_fare = DataFrame([fare_not_survived.mean(), fare_survived.mean()]) std_fare = DataFrame([fare_not_survived.std(), fare_survived.std()]) print '-' print avgerage_fare print std_fare # plot titanic_df['Fare'].plot(kind='hist', figsize=(15,3), bins=100, xlim=(0,50)) avgerage_fare.index.names = std_fare.index.names = ["Survived"] if is_plot: avgerage_fare.plot(yerr=std_fare,kind='bar',legend=False)
def test_partially_invalid_plot_data(self): kinds = "line", "bar", "barh", "kde", "density" df = DataFrame(randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = "a" for kind in kinds: with tm.assertRaises(TypeError): df.plot(kind=kind)
def plot_class_distribution(target, ax=None): """ Plot the distribution of the classes. Parameters ---------- target : array The target column of the dataset. ax : Matplotlib Axes object A matplotlib Axes instance. Returns ------- ax : Matplotlib Axes object The matplotlib Axes instance where the figure is drawn. """ if not ax: ax = plt.gca() counts = DataFrame(target).apply(pd.value_counts) counts.plot(ax=ax, kind="bar", fontsize=12, legend=False) ax.set_xticklabels(labels=counts.index, rotation=0) format_thousands = lambda x, pos: format(int(x), ',') ax.get_yaxis().set_major_formatter(FuncFormatter(format_thousands)) ax.grid(False) return ax
def plot_scores(scores, title, x_label, classifier_names): """ Make a barplot of the scores of some performance measure. Parameters ---------- scores : dict Where the keys are the classifier names and the values are the scores. title : str Title of the plot. x_label : str Label for the x-axis classifier_names : array List of the names of the classifiers, the order of which will be used to order the bars. """ scores = DataFrame(scores, index=[x_label]) scores = scores.reindex(columns=classifier_names) format_as_percent_plot = lambda x, pos: "{:.0f}%".format(x * 100) fig, ax = plt.subplots(figsize=(9, 5)) scores.plot(ax=ax, kind="bar", title=title, fontsize=12) ax.legend(bbox_to_anchor = (1.5, 0.6)) ax.set_xticklabels([], rotation=0) ax.get_yaxis().set_major_formatter(FuncFormatter(format_as_percent_plot)) plt.show()
def test_line_colors(self): import matplotlib.pyplot as plt import sys from StringIO import StringIO custom_colors = 'rgcby' plt.close('all') df = DataFrame(np.random.randn(5, 5)) ax = df.plot(color=custom_colors) lines = ax.get_lines() for i, l in enumerate(lines): xp = custom_colors[i] rs = l.get_color() self.assert_(xp == rs) tmp = sys.stderr sys.stderr = StringIO() try: plt.close('all') ax2 = df.plot(colors=custom_colors) lines2 = ax2.get_lines() for l1, l2 in zip(lines, lines2): self.assert_(l1.get_color(), l2.get_color()) finally: sys.stderr = tmp
def test_from_resampling_area_line_mixed(self): idxh = date_range("1/1/1999", periods=52, freq="W") idxl = date_range("1/1/1999", periods=12, freq="M") high = DataFrame(np.random.rand(len(idxh), 3), index=idxh, columns=[0, 1, 2]) low = DataFrame(np.random.rand(len(idxl), 3), index=idxl, columns=[0, 1, 2]) # low to high for kind1, kind2 in [("line", "area"), ("area", "line")]: ax = low.plot(kind=kind1, stacked=True) ax = high.plot(kind=kind2, stacked=True, ax=ax) # check low dataframe result expected_x = np.array( [1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562], dtype=np.float64 ) expected_y = np.zeros(len(expected_x), dtype=np.float64) for i in range(3): l = ax.lines[i] self.assertEqual(PeriodIndex(l.get_xdata()).freq, idxh.freq) self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x) # check stacked values are correct expected_y += low[i].values self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y) # check high dataframe result expected_x = idxh.to_period().asi8.astype(np.float64) expected_y = np.zeros(len(expected_x), dtype=np.float64) for i in range(3): l = ax.lines[3 + i] self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x) expected_y += high[i].values self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y) # high to low for kind1, kind2 in [("line", "area"), ("area", "line")]: ax = high.plot(kind=kind1, stacked=True) ax = low.plot(kind=kind2, stacked=True, ax=ax) # check high dataframe result expected_x = idxh.to_period().asi8.astype(np.float64) expected_y = np.zeros(len(expected_x), dtype=np.float64) for i in range(3): l = ax.lines[i] self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x) expected_y += high[i].values self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y) # check low dataframe result expected_x = np.array( [1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562], dtype=np.float64 ) expected_y = np.zeros(len(expected_x), dtype=np.float64) for i in range(3): l = ax.lines[3 + i] self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x) expected_y += low[i].values self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y)
def slide_11(): fig, axes = plt.subplots(2, 1) data = Series(np.random.rand(16), index=list('abcdefghijklmnop')) data.plot(kind='bar', ax=axes[0], color='k', alpha=0.7) data.plot(kind='barh', ax=axes[1], color='k', alpha=0.7) df = DataFrame(np.random.rand(6, 4), index=['one', 'two', 'three', 'four', 'five', 'six'], columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus')) print df df.plot(kind='bar') df.plot(kind='barh', stacked=True, alpha=0.5) tips = pd.read_csv(TIPSCSVPATH) print tips.head() party_counts = pd.crosstab(index=tips.day, columns=tips.sizes) print '曜日とパーティの大きさ別に仕分け' print party_counts party_counts = party_counts.ix[:, 2: 5] print 'サイズ1と6のパーティは少ないから除外' print party_counts print '正規化' party_pcts = party_counts.div(party_counts.sum(1).astype(float), axis=0) print party_pcts party_pcts.plot(kind='bar', stacked=True)
class BJWorksAverageWage(): def __init__(self): self.url = 'http://www.bjrbj.gov.cn/bmfw/ywml/201601/t20160112_55858.html' self.wagelist = DataFrame(columns=['wage']) def crawl(self): r = requests.get(self.url) html = etree.HTML(r.text) tbody = html.xpath('//*/tbody')[0] for i, tr in enumerate(tbody): if i == 0: pass elif i == 1: year = datetime.strptime(tr[0][0].text,'%Y') wage = float(tr[1][0].text) self.wagelist.loc[year] = [wage] else: year = datetime.strptime(tr[0][0][0].text,'%Y') wage = float(tr[1][0][0].text) self.wagelist.loc[year] = [wage] def save(self): pass def plot(self): import matplotlib.pyplot as plt plt.figure() self.wagelist.plot()
def test_subplots(self): df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) axes = df.plot(subplots=True, sharex=True, legend=True) for ax in axes: self.assert_(ax.get_legend() is not None) axes = df.plot(subplots=True, sharex=True) for ax in axes[:-2]: [self.assert_(not label.get_visible()) for label in ax.get_xticklabels()] [self.assert_(label.get_visible()) for label in ax.get_yticklabels()] [self.assert_(label.get_visible()) for label in axes[-1].get_xticklabels()] [self.assert_(label.get_visible()) for label in axes[-1].get_yticklabels()] axes = df.plot(subplots=True, sharex=False) for ax in axes: [self.assert_(label.get_visible()) for label in ax.get_xticklabels()] [self.assert_(label.get_visible()) for label in ax.get_yticklabels()]
def test_errorbar_plot(self): d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} df = DataFrame(d) d_err = {'x': np.ones(12)*0.2, 'y': np.ones(12)*0.4} df_err = DataFrame(d_err) # check line plots _check_plot_works(df.plot, yerr=df_err, logy=True) _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True) kinds = ['line', 'bar', 'barh'] for kind in kinds: _check_plot_works(df.plot, yerr=df_err['x'], kind=kind) _check_plot_works(df.plot, yerr=d_err, kind=kind) _check_plot_works(df.plot, yerr=df_err, xerr=df_err, kind=kind) _check_plot_works(df.plot, yerr=df_err['x'], xerr=df_err['x'], kind=kind) _check_plot_works(df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind) _check_plot_works((df+1).plot, yerr=df_err, xerr=df_err, kind='bar', log=True) # yerr is raw error values _check_plot_works(df['y'].plot, yerr=np.ones(12)*0.4) _check_plot_works(df.plot, yerr=np.ones((2, 12))*0.4) # yerr is column name df['yerr'] = np.ones(12)*0.2 _check_plot_works(df.plot, y='y', x='x', yerr='yerr') with tm.assertRaises(ValueError): df.plot(yerr=np.random.randn(11)) df_err = DataFrame({'x': ['zzz']*12, 'y': ['zzz']*12}) with tm.assertRaises(TypeError): df.plot(yerr=df_err)
def test_from_resampling_area_line_mixed(self): idxh = date_range('1/1/1999', periods=52, freq='W') idxl = date_range('1/1/1999', periods=12, freq='M') high = DataFrame(np.random.rand(len(idxh), 3), index=idxh, columns=[0, 1, 2]) low = DataFrame(np.random.rand(len(idxl), 3), index=idxl, columns=[0, 1, 2]) # low to high for kind1, kind2 in [('line', 'area'), ('area', 'line')]: ax = low.plot(kind=kind1, stacked=True) ax = high.plot(kind=kind2, stacked=True, ax=ax) # check low dataframe result expected_x = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562]) expected_y = np.zeros(len(expected_x)) for i in range(3): l = ax.lines[i] self.assertTrue(PeriodIndex(data=l.get_xdata()).freq.startswith('W')) self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x) # check stacked values are correct expected_y += low[i].values self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y) # check high dataframe result expected_x = idxh.to_period().asi8 expected_y = np.zeros(len(expected_x)) for i in range(3): l = ax.lines[3 + i] self.assertTrue(PeriodIndex(data=l.get_xdata()).freq.startswith('W')) self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x) expected_y += high[i].values self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y) # high to low for kind1, kind2 in [('line', 'area'), ('area', 'line')]: ax = high.plot(kind=kind1, stacked=True) ax = low.plot(kind=kind2, stacked=True, ax=ax) # check high dataframe result expected_x = idxh.to_period().asi8 expected_y = np.zeros(len(expected_x)) for i in range(3): l = ax.lines[i] self.assertTrue(PeriodIndex(data=l.get_xdata()).freq.startswith('W')) self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x) expected_y += high[i].values self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y) # check low dataframe result expected_x = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562]) expected_y = np.zeros(len(expected_x)) for i in range(3): l = ax.lines[3 + i] self.assertTrue(PeriodIndex(data=l.get_xdata()).freq.startswith('W')) self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x) expected_y += low[i].values self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y)
def test_partially_invalid_plot_data(self): kinds = 'line', 'bar', 'barh', 'kde', 'density' df = DataFrame(randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = 'a' for kind in kinds: with tm.assertRaises(TypeError): df.plot(kind=kind)
def test_bar_colors(self): import matplotlib.pyplot as plt import matplotlib.colors as colors default_colors = plt.rcParams.get('axes.color_cycle') custom_colors = 'rgcby' df = DataFrame(randn(5, 5)) ax = df.plot(kind='bar') rects = ax.patches conv = colors.colorConverter for i, rect in enumerate(rects[::5]): xp = conv.to_rgba(default_colors[i % len(default_colors)]) rs = rect.get_facecolor() self.assertEqual(xp, rs) tm.close() ax = df.plot(kind='bar', color=custom_colors) rects = ax.patches conv = colors.colorConverter for i, rect in enumerate(rects[::5]): xp = conv.to_rgba(custom_colors[i]) rs = rect.get_facecolor() self.assertEqual(xp, rs) tm.close() from matplotlib import cm # Test str -> colormap functionality ax = df.plot(kind='bar', colormap='jet') rects = ax.patches rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5)) for i, rect in enumerate(rects[::5]): xp = rgba_colors[i] rs = rect.get_facecolor() self.assertEqual(xp, rs) tm.close() # Test colormap functionality ax = df.plot(kind='bar', colormap=cm.jet) rects = ax.patches rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5)) for i, rect in enumerate(rects[::5]): xp = rgba_colors[i] rs = rect.get_facecolor() self.assertEqual(xp, rs) tm.close() df.ix[:, [0]].plot(kind='bar', color='DodgerBlue')
def test_rotation(self): df = DataFrame(randn(5, 5)) # Default rot 0 axes = df.plot() self._check_ticks_props(axes, xrot=0) axes = df.plot(rot=30) self._check_ticks_props(axes, xrot=30)
def test_line_colors(self): import matplotlib.pyplot as plt import sys from StringIO import StringIO from matplotlib import cm custom_colors = "rgcby" plt.close("all") df = DataFrame(np.random.randn(5, 5)) ax = df.plot(color=custom_colors) lines = ax.get_lines() for i, l in enumerate(lines): xp = custom_colors[i] rs = l.get_color() self.assert_(xp == rs) tmp = sys.stderr sys.stderr = StringIO() try: plt.close("all") ax2 = df.plot(colors=custom_colors) lines2 = ax2.get_lines() for l1, l2 in zip(lines, lines2): self.assert_(l1.get_color(), l2.get_color()) finally: sys.stderr = tmp plt.close("all") ax = df.plot(colormap="jet") rgba_colors = map(cm.jet, np.linspace(0, 1, len(df))) lines = ax.get_lines() for i, l in enumerate(lines): xp = rgba_colors[i] rs = l.get_color() self.assert_(xp == rs) plt.close("all") ax = df.plot(colormap=cm.jet) rgba_colors = map(cm.jet, np.linspace(0, 1, len(df))) lines = ax.get_lines() for i, l in enumerate(lines): xp = rgba_colors[i] rs = l.get_color() self.assert_(xp == rs) # make color a list if plotting one column frame # handles cases like df.plot(color='DodgerBlue') plt.close("all") df.ix[:, [0]].plot(color="DodgerBlue")
def test_df_series_secondary_legend(self): # GH 9779 df = DataFrame(np.random.randn(30, 3), columns=list('abc')) s = Series(np.random.randn(30), name='x') # primary -> secondary (without passing ax) ax = df.plot() s.plot(legend=True, secondary_y=True) # both legends are dran on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=['a', 'b', 'c', 'x (right)']) self.assertTrue(ax.get_yaxis().get_visible()) self.assertTrue(ax.right_ax.get_yaxis().get_visible()) tm.close() # primary -> secondary (with passing ax) ax = df.plot() s.plot(ax=ax, legend=True, secondary_y=True) # both legends are dran on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=['a', 'b', 'c', 'x (right)']) self.assertTrue(ax.get_yaxis().get_visible()) self.assertTrue(ax.right_ax.get_yaxis().get_visible()) tm.close() # seconcary -> secondary (without passing ax) ax = df.plot(secondary_y=True) s.plot(legend=True, secondary_y=True) # both legends are dran on left ax # left axis must be invisible and right axis must be visible expected = ['a (right)', 'b (right)', 'c (right)', 'x (right)'] self._check_legend_labels(ax.left_ax, labels=expected) self.assertFalse(ax.left_ax.get_yaxis().get_visible()) self.assertTrue(ax.get_yaxis().get_visible()) tm.close() # secondary -> secondary (with passing ax) ax = df.plot(secondary_y=True) s.plot(ax=ax, legend=True, secondary_y=True) # both legends are dran on left ax # left axis must be invisible and right axis must be visible expected = ['a (right)', 'b (right)', 'c (right)', 'x (right)'] self._check_legend_labels(ax.left_ax, expected) self.assertFalse(ax.left_ax.get_yaxis().get_visible()) self.assertTrue(ax.get_yaxis().get_visible()) tm.close() # secondary -> secondary (with passing ax) ax = df.plot(secondary_y=True, mark_right=False) s.plot(ax=ax, legend=True, secondary_y=True) # both legends are dran on left ax # left axis must be invisible and right axis must be visible expected = ['a', 'b', 'c', 'x (right)'] self._check_legend_labels(ax.left_ax, expected) self.assertFalse(ax.left_ax.get_yaxis().get_visible()) self.assertTrue(ax.get_yaxis().get_visible()) tm.close()
def slide_10(): s = Series(randn(10).cumsum(), index=np.arange(0, 100, 10)) print s s.plot() df = DataFrame(randn(10, 4).cumsum(0), columns=['A', 'B', 'C', 'D'], index=np.arange(0, 100, 10)) df.plot()
def test_nonzero_base(self): # GH2571 idx = (date_range('2012-12-20', periods=24, freq='H') + timedelta( minutes=30)) df = DataFrame(np.arange(24), index=idx) _, ax = self.plt.subplots() df.plot(ax=ax) rs = ax.get_lines()[0].get_xdata() assert not Index(rs).is_normalized
def test_line_colors(self): import matplotlib.pyplot as plt import sys from matplotlib import cm custom_colors = 'rgcby' df = DataFrame(randn(5, 5)) ax = df.plot(color=custom_colors) lines = ax.get_lines() for i, l in enumerate(lines): xp = custom_colors[i] rs = l.get_color() self.assertEqual(xp, rs) tmp = sys.stderr sys.stderr = StringIO() try: tm.close() ax2 = df.plot(colors=custom_colors) lines2 = ax2.get_lines() for l1, l2 in zip(lines, lines2): self.assertEqual(l1.get_color(), l2.get_color()) finally: sys.stderr = tmp tm.close() ax = df.plot(colormap='jet') rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df))) lines = ax.get_lines() for i, l in enumerate(lines): xp = rgba_colors[i] rs = l.get_color() self.assertEqual(xp, rs) tm.close() ax = df.plot(colormap=cm.jet) rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df))) lines = ax.get_lines() for i, l in enumerate(lines): xp = rgba_colors[i] rs = l.get_color() self.assertEqual(xp, rs) # make color a list if plotting one column frame # handles cases like df.plot(color='DodgerBlue') tm.close() df.ix[:, [0]].plot(color='DodgerBlue')
def test_nonnumeric_exclude(self): idx = date_range('1/1/1987', freq='A', periods=3) df = DataFrame({'A': ["x", "y", "z"], 'B': [1, 2, 3]}, idx) fig, ax = self.plt.subplots() df.plot(ax=ax) # it works assert len(ax.get_lines()) == 1 # B was plotted self.plt.close(fig) pytest.raises(TypeError, df['A'].plot)
def test_kde(self): _skip_if_no_scipy() df = DataFrame(randn(100, 4)) _check_plot_works(df.plot, kind='kde') _check_plot_works(df.plot, kind='kde', subplots=True) ax = df.plot(kind='kde') self.assert_(ax.get_legend() is not None) axes = df.plot(kind='kde', logy=True, subplots=True) for ax in axes: self.assert_(ax.get_yscale() == 'log')
def test_rotation(self): df = DataFrame(randn(5, 5)) # Default rot 0 _, ax = self.plt.subplots() axes = df.plot(ax=ax) self._check_ticks_props(axes, xrot=0) _, ax = self.plt.subplots() axes = df.plot(rot=30, ax=ax) self._check_ticks_props(axes, xrot=30)
def test_label(self): import matplotlib.pyplot as plt plt.close('all') df = DataFrame(np.random.randn(10, 3), columns=['a', 'b', 'c']) ax = df.plot(x='a', y='b') self.assert_(ax.xaxis.get_label().get_text() == 'a') plt.close('all') ax = df.plot(x='a', y='b', label='LABEL') self.assert_(ax.xaxis.get_label().get_text() == 'LABEL')
# report performance rmse = sqrt(mean_squared_error(raw_values[-12:], predictions)) print('%d) Test RMSE: %.3f' % (r + 1, rmse)) error_scores.append(rmse) return error_scores, raw_values[-12:], predictions # configure the experiment series = read_csv('sales-of-shampoo-over-a-three-ye.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser) print(series) # configure the experiment n_lag = 1 n_repeats = 30 n_epochs = 1000 n_batch = 4 n_neurons = 3 results = DataFrame() results['results'], raw_values1, predictions = experiment( series, n_lag, n_repeats, n_epochs, n_batch, n_neurons) # summarize results print(results.describe()) results.plot(title="LSTM RMSE Iteration") pyplot.show() pyplot.savefig('plot_lstm_rmse.png')
def test_invalid_colormap(self): df = DataFrame(randn(3, 2), columns=['A', 'B']) with tm.assertRaises(ValueError): df.plot(colormap='invalid_colormap')
def demo2(): data = DataFrame(np.random.randn(10, 4).cumsum(0), columns=['a', 'b', 'c', 'd'], index=np.arange(0, 100, 10)) data.plot(style='o--') plt.show()
def test_explicit_label(self): df = DataFrame(randn(10, 3), columns=['a', 'b', 'c']) ax = df.plot(x='a', y='b', label='LABEL') self.assertEqual(ax.xaxis.get_label().get_text(), 'LABEL')
Selected = pd.DataFrame(S) Features = New_File.iloc[:,1:New_File.shape[1]-1] names = pd.DataFrame(Features.columns) list = pd.concat([names, Selected], axis=1) list.columns=['Feature', 'Boruta_ranking'] #call transform() on X to filter it down to selected features X_filtered = boruta.transform(Xs.values) X_filtered = list.loc[(list['Boruta_ranking'] <= 15)] X_filtered.to_csv(filenamee+'_Filtered_boruta_FS.csv') df = pd.DataFrame(New_File) df = df.loc[:, X_filtered['Feature']] df = pd.concat([study_1,df,outc_1], axis=1) df.to_csv(filenamee+'_Feature_selected.csv') X_filtered1 = DataFrame(X_filtered,columns=['Feature','Boruta_ranking']) X_filtered1.plot(x ='Feature', y='Boruta_ranking', kind = 'line') plt.xticks(rotation=30, ha='right') plt.ylabel('Boruta_Ranking') plt.xlabel('Feature selected') plt.tight_layout() plt.savefig(filenamee+'_Filtered_boruta_FS.pdf') s3_resource.meta.client.upload_file( Filename=filenamee+'_Filtered_boruta_FS.csv',Bucket='superlearner',Key=filenamee+'_Filtered_boruta_FS.csv') s3_resource.meta.client.upload_file( Filename=filenamee+'_Feature_selected.csv',Bucket='superlearner',Key=filenamee+'_Feature_selected.csv') key= filenamee+'_Filtered_boruta_FS.csv' key_two =filenamee+'_Feature_selected.csv' bucket = 'superlearner' New_url = f"https://{bucket}.s3.eu-west-2.amazonaws.com/{key}" New_url_two = f"https://{bucket}.s3.eu-west-2.amazonaws.com/{key_two}"
# 5.2 Measure the variance between the data and the values predicted by the model # You can measure whether the results of your model fit the underlying data by using the residual # sum of squares (RSS) metric. A small RSS indicates that the model fits tightly to the data. # # Yet another approach to validate the ARIMA model appropriateness is by performing residual analysis. # # Print the results of the ARIMA model and plot the residuals. A density plot of the residual error values # indicates a normal distribution centered around zero mean. Also, the residuals do not violate the assumptions of # constant location and scale with most values in the range (-1,1). print(results_ARIMA.summary()) # plot residual errors residuals = DataFrame(results_ARIMA.resid) residuals.plot(kind='kde') print(residuals.describe()) # 5.3 Scale predictions¶ # Now that the model is returning the results you want to see, you # can scale the model predictions back to the original scale. For this, you will remove the first order # differencing and take exponent to restore the predictions back to their original scale. # # The lower the root mean square error (RMSE) and the closer it is to 0, the better are the model # predictions in being closer to actual values. euro_predictions_ARIMA_diff = pd.Series(results_ARIMA.fittedvalues, copy=True) print (euro_predictions_ARIMA_diff.head())
'Temperature': [data[0][2], data[1][2], data[2][2], data[3][2], data[4][2], data[5][2], data[6][2], data[7][2], data[8][2], data[9][2], data[10][2], data[11][2], data[12][2], data[13][2], data[14][2]] } df2 = DataFrame(data2, columns=['Machine', 'Temperature']) root = tk.Tk() root.title("Machine's Temperatures") root.grid() # root.geometry("1200x1200") figure1 = plt.Figure(figsize=(4, 4), dpi=100) ax1 = figure1.add_subplot(111) bar1 = FigureCanvasTkAgg(figure1, root) bar1.get_tk_widget().grid(row=14, column=10) df1 = df1[['Machine', 'Temperature']].groupby('Machine').sum() df1.plot(kind='bar', legend=True, ax=ax1) ax1.set_title('Machine Vs. Temperature') figure2 = plt.Figure(figsize=(4, 4), dpi=100) ax2 = figure2.add_subplot(111) line2 = FigureCanvasTkAgg(figure2, root) line2.get_tk_widget().grid(row=14, column=12) df2 = df2[['Machine', 'Temperature']].groupby('Machine').sum() df2.plot(kind='line', legend=True, ax=ax2, color='r', marker='o', fontsize=10) ax2.set_title('Machine Vs. Temperature') menu = Menu(root) root.config(menu=menu) filemenu = Menu(menu) menu.add_cascade(label='File', menu=filemenu) filemenu.add_command(label='New')
def plot_scatter(minsize, r): cd = load_catdict() cat_strats, article_strats = get_strats(minsize, r, cd) lonelies_cats, tps_cats, tns_cats, mixed_cats = cat_strats positive_url_cats = [ c for c in cd if cd[c]["URLPattern"] or cd[c]["URLBracesPattern"] ] lonelies = DataFrame(columns=['#Positive', '#Negative'], index=lonelies_cats) for c in lonelies_cats: lonelies.loc[c] = Series({ '#Positive': cd[c]["#Positive"], '#Negative': cd[c]["#Negative"] }) tps = DataFrame(columns=['#Positive', '#Negative'], index=tps_cats) for c in tps_cats: tps.loc[c] = Series({ '#Positive': cd[c]["#Positive"], '#Negative': cd[c]["#Negative"] }) tns = DataFrame(columns=['#Positive', '#Negative'], index=tns_cats) for c in tns_cats: tns.loc[c] = Series({ '#Positive': cd[c]["#Positive"], '#Negative': cd[c]["#Negative"] }) mixed = DataFrame(columns=['#Positive', '#Negative'], index=mixed_cats) for c in mixed_cats: mixed.loc[c] = Series({ '#Positive': cd[c]["#Positive"], '#Negative': cd[c]["#Negative"] }) positive_url = DataFrame(columns=['#Positive', '#Negative'], index=positive_url_cats) for c in positive_url_cats: positive_url.loc[c] = Series({ '#Positive': cd[c]["#Positive"], '#Negative': cd[c]["#Negative"] }) linedf = DataFrame(columns=['#Positive', '#Negative'], index=cd.keys()) x = 1 for c in cd: if x < 2000: linedf.loc[c] = Series({'#Positive': x, '#Negative': x}) x += 1 fig, ax = plt.subplots(nrows=1, ncols=1) mixed.plot(x="#Positive", y="#Negative", kind="scatter", ax=ax, color="blue", loglog=False) lonelies.plot(x="#Positive", y="#Negative", kind="scatter", ax=ax, color="grey", marker="x") tps.plot(x="#Positive", y="#Negative", kind="scatter", ax=ax, color="green") tns.plot(x="#Positive", y="#Negative", kind="scatter", ax=ax, color="black") linedf.plot(x="#Positive", y="#Negative", ls="--", ax=ax, color="grey") positive_url.plot(x="#Positive", y="#Negative", kind="scatter", marker="x", ax=ax, color="pink") ax.set_title('Assessing Categories - ' + str(r)) ax.legend( ["Regression Line", "Mixed", "Too Small", "Positive", "Negative"]) plt.gca().set_aspect('equal', adjustable='box') plt.show()
def test_secondary_bar_frame(self): df = DataFrame(np.random.randn(5, 3), columns=['a', 'b', 'c']) axes = df.plot(kind='bar', secondary_y=['a', 'c'], subplots=True) self.assertEqual(axes[0].get_yaxis().get_ticks_position(), 'right') self.assertEqual(axes[1].get_yaxis().get_ticks_position(), 'default') self.assertEqual(axes[2].get_yaxis().get_ticks_position(), 'right')
def test_dataframe(self): bts = DataFrame({'a': tm.makeTimeSeries()}) ax = bts.plot() idx = ax.get_lines()[0].get_xdata() assert_array_equal(bts.index.to_period(), idx)
def test_invalid_kind(self): df = DataFrame(randn(10, 2)) with tm.assertRaises(ValueError): df.plot(kind='aasdf')
def test_all_invalid_plot_data(self): kinds = 'line', 'bar', 'barh', 'kde', 'density' df = DataFrame(list('abcd')) for kind in kinds: with tm.assertRaises(TypeError): df.plot(kind=kind)
def test_rotation(self): df = DataFrame(randn(5, 5)) ax = df.plot(rot=30) for l in ax.get_xticklabels(): self.assertEqual(l.get_rotation(), 30)
arr1 = np.array(data); indexs = [timeformat(x) for x in arr1[:,0:1].ravel()]; values = [x for x in arr1[:,1:2].ravel()] count = 0 for index in indexs: if((fundCode in all_data)==False): all_data[fundCode]={} all_data[fundCode][index]=values[count]; count=count+1 # 保存数据 fig,axes = plt.subplots(2, 1) # 处理基本信息 df2 = DataFrame(all_data_base) print(df2) df2.stack().unstack(0).to_excel(f'result_{time.time()}.xlsx',sheet_name='out') df2.iloc[1:5,:].plot.barh(ax=axes[0],grid=True,fontsize=25) # 处理收益 df=DataFrame(all_data).sort_index().fillna(method='ffill') print(df) df.plot(ax=axes[1],grid=True,fontsize=25) fig.set_size_inches(20, 20) fig.savefig(f'result_{time.time()}.png') # https://www.zhihu.com/question/25404709 matplotlib图例中文乱码
def test_subplots_multiple_axes(self): # GH 5353, 6970, GH 7069 fig, axes = self.plt.subplots(2, 3) df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) assert returned.shape == (3, ) assert returned[0].figure is fig # draw on second row returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) assert returned.shape == (3, ) assert returned[0].figure is fig self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) tm.close() with pytest.raises(ValueError): fig, axes = self.plt.subplots(2, 3) # pass different number of axes from required df.plot(subplots=True, ax=axes) # pass 2-dim axes and invalid layout # invalid lauout should not affect to input and return value # (show warning is tested in # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes fig, axes = self.plt.subplots(2, 2) with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) returned = df.plot(subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) assert returned.shape == (4, ) returned = df.plot(subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) assert returned.shape == (4, ) returned = df.plot(subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False) self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) assert returned.shape == (4, ) # single column fig, axes = self.plt.subplots(1, 1) df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) assert axes.shape == (1, )
def plot(prefix, header, row): x = {h: d for (h, d) in zip(header, row)} jobid = x["jobid"] del x["jobid"] result = [] for k in x: timeseries = x[k].split(":") timeseries = [float(x) for x in timeseries] if sum(timeseries) == 0: continue timeseries = [[k, x, s] for (s, x) in zip(timeseries, range(0, len(timeseries)))] result.extend(timeseries) if len(result) == 0: print("Empty job! Cannot plot!") return data = DataFrame(result, columns=["metrics", "segment", "value"]) groups = data.groupby(["metrics"]) metrics = DataFrame() labels = [] colors = [] style = [] for name, group in groups: style.append(linestyleMap[name] + markerMap[name]) colors.append(colorMap[name]) if name == "md_file_delete": name = "file_delete" if name == "md_file_create": name = "file_create" metrics[name] = [x[2] for x in group.values] labels.append(name) fsize = (8, 1 + 1.1 * len(labels)) fsizeFixed = (8, 2) fsizeHist = (8, 4) pyplot.close('all') if len(labels) < 4: ax = metrics.plot(legend=True, sharex=True, grid=True, sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style) ax.set_ylabel("Value") else: ax = metrics.plot(subplots=True, legend=False, sharex=True, grid=True, sharey=True, markersize=10, figsize=fsize, color=colors, style=style) for (i, l) in zip(range(0, len(labels)), labels): ax[i].set_ylabel(l) pyplot.xlabel("Segment number") pyplot.savefig(prefix + "timeseries" + jobid + fileformat, bbox_inches='tight', dpi=150) # Create a facetted grid #g = sns.FacetGrid(tips, col="time", margin_titles=True) #bins = np.linspace(0, 60, 13) #g.map(plt.hist, "total_bill", color="steelblue", bins=bins) ax = metrics.hist(sharex=True, grid=True, sharey=True, figsize=fsizeHist, bins=10) pyplot.savefig(prefix + "hist" + jobid + fileformat, bbox_inches='tight', dpi=150) # Plot first 30 segments if len(timeseries) <= 50: return if len(labels) < 4: ax = metrics.plot(legend=True, xlim=(0, 30), sharex=True, grid=True, sharey=True, markersize=10, figsize=fsizeFixed, color=colors, style=style) ax.set_ylabel("Value") else: ax = metrics.plot(subplots=True, xlim=(0, 30), legend=False, sharex=True, grid=True, sharey=True, markersize=10, figsize=fsize, color=colors, style=style) for (i, l) in zip(range(0, len(labels)), labels): ax[i].set_ylabel(l) pyplot.xlabel("Segment number") pyplot.savefig(prefix + "timeseries" + jobid + "-30" + fileformat, bbox_inches='tight', dpi=150)
print(ao[:2]) print(ao[-2:]) print("Dates:nao") print(nao[:2]) print(nao[-2:]) print("Length") print(len(ao)) print(len(nao)) ## Create a series dates_nao=pd.date_range('1950-01',periods=nao.shape[0],freq='M') NAO=Series(nao[:,2],index=dates_nao) print(NAO) print(NAO.index) aonao=DataFrame({'AO':AO,'NAO':NAO}) aonao.plot(subplots=True) plt.close() print(aonao.head()) print(aonao.tail()) print(aonao['NAO']) print(aonao.NAO) aonao['Diff']=aonao['AO']-aonao['NAO'] print(aonao.head()) del aonao['Diff'] print(aonao.tail()) ## Crazy Combination aonao.loc[(aonao.AO>0)&(aonao.NAO<0) &\ (aonao.index>datetime.datetime(1980,10,1)) &\ (aonao.index<datetime.datetime(1989,1,1)),\ 'NAO'].plot(kind='barh')
df_data_corn_price_ = df_data_corn_price.groupby(['日期'])[['价格']].mean()/100 df_data_corn_price = df_data_corn_price_.reset_index() df_data_corn_price = df_data_corn_price.sort_values(by='日期') df_data_corn_price.rename(columns={'价格': '玉米'}, inplace=True) # 猪肉 df_data_baby_data = DataFrame() for i in range(18): temp1 = get_data_by_item_name('baby_data', cities[i]) df_temp1 = DataFrame(temp1[1: ], columns=temp1[0]) df_data_baby_data = df_data_baby_data.append(df_temp1, ignore_index=True) del df_data_baby_data['地区'] del df_data_baby_data['品种'] del df_data_baby_data['分类'] df_data_baby_data['价格'] = df_data_baby_data['价格'].apply(float) df_data_baby_data = df_data_baby_data.groupby(['日期'])[['价格']].mean() df_data_baby_data = df_data_corn_price_ * 2 df_data_baby_data = df_data_baby_data.reset_index() df_data_baby_data = df_data_baby_data.sort_values(by='日期') df_data_baby_data.rename(columns={'价格': '猪肉'}, inplace=True) plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False fig,axes = plt.subplots(1) df_data_corn_price.plot(ax=axes, kind='line', x='日期', color='green', title='全国价格走势图') df_data_baby_data.plot(ax=axes, x='日期', kind='line', color='red') df_data_bean_price.plot(ax=axes, kind='line', x='日期', color='blue') # 猪肉 元/kg # 豆粕 玉米 元/10kg plt.show()
def test_from_resampling_area_line_mixed(self): idxh = date_range('1/1/1999', periods=52, freq='W') idxl = date_range('1/1/1999', periods=12, freq='M') high = DataFrame(np.random.rand(len(idxh), 3), index=idxh, columns=[0, 1, 2]) low = DataFrame(np.random.rand(len(idxl), 3), index=idxl, columns=[0, 1, 2]) # low to high for kind1, kind2 in [('line', 'area'), ('area', 'line')]: ax = low.plot(kind=kind1, stacked=True) ax = high.plot(kind=kind2, stacked=True, ax=ax) # check low dataframe result expected_x = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562], dtype=np.float64) expected_y = np.zeros(len(expected_x), dtype=np.float64) for i in range(3): l = ax.lines[i] self.assertEqual(PeriodIndex(l.get_xdata()).freq, idxh.freq) self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x) # check stacked values are correct expected_y += low[i].values self.assert_numpy_array_equal( l.get_ydata(orig=False), expected_y) # check high dataframe result expected_x = idxh.to_period().asi8.astype(np.float64) expected_y = np.zeros(len(expected_x), dtype=np.float64) for i in range(3): l = ax.lines[3 + i] self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x) expected_y += high[i].values self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y) # high to low for kind1, kind2 in [('line', 'area'), ('area', 'line')]: ax = high.plot(kind=kind1, stacked=True) ax = low.plot(kind=kind2, stacked=True, ax=ax) # check high dataframe result expected_x = idxh.to_period().asi8.astype(np.float64) expected_y = np.zeros(len(expected_x), dtype=np.float64) for i in range(3): l = ax.lines[i] self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) self.assert_numpy_array_equal( l.get_xdata(orig=False), expected_x) expected_y += high[i].values self.assert_numpy_array_equal( l.get_ydata(orig=False), expected_y) # check low dataframe result expected_x = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562], dtype=np.float64) expected_y = np.zeros(len(expected_x), dtype=np.float64) for i in range(3): l = ax.lines[3 + i] self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x) expected_y += low[i].values self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y)
def test_rotation(self): df = DataFrame(np.random.randn(5, 5)) ax = df.plot(rot=30) for l in ax.get_xticklabels(): self.assert_(l.get_rotation() == 30)
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg import tkinter as tk from pandas import DataFrame import matplotlib.pyplot as plt irsData = { 'Contact Method': [ 'Phone Call: Landline', 'Phone', 'Phone Call: Mobile/Cell', 'Internet/E-mail', 'Mobile: Text/Email/ IM', 'Mail', 'I Initiated Contact', 'Unknown', 'Internet Web Site', 'In Person', 'Internet (Other)', 'Wireless', 'Fax', 'Print', 'TV/Radio' ], 'Total': [10044, 2769, 2543, 226, 58, 57, 36, 34, 30, 16, 11, 7, 3, 3, 1] } df1 = DataFrame(irsData, columns=['Contact Method', 'Total']) df1 = df1[['Contact Method', 'Total']].groupby('Contact Method').sum() root = tk.Tk() root.title('Contact Method') root.geometry("800x800") figure1 = plt.Figure(figsize=(6, 4), dpi=100) ax1 = figure1.add_subplot(111) ax1.set_xlabel('Contact Methods') bar1 = FigureCanvasTkAgg(figure1, root) bar1.get_tk_widget().pack(side=tk.LEFT, fill=tk.BOTH) df1.plot(kind='bar', legend=True, ax=ax1) ax1.set_title('Contact Method Vs. Total') root.mainloop()
def test_nonnumeric_exclude(self): import matplotlib.pyplot as plt df = DataFrame({'A': ["x", "y", "z"], 'B': [1, 2, 3]}) ax = df.plot() self.assertEqual(len(ax.get_lines()), 1) # B was plotted
def start(self): """ 策略回测。结束后打印出收益曲线(沪深300基准)、年化收益、最大回撤、 :param begin_date: 回测开始日期 :param end_date: 回测结束日期 """ total_capital = 1E7 cash = 1E7 single_position = 2E5 # 初始化信号对象 daily_k_break_ma10 = DailyKBreakMA10Signal() low_pe_stock_pool = LowPeStockPool(self.begin_date, self.end_date, 7) # 保存持仓股的日期 code_date_volume_dict = dict() # 时间为key的净值、收益和同期沪深基准 df_profit = DataFrame(columns=['net_value', 'profit', 'hs300']) # 因为上证指数没有停牌不会缺数,所以用它作为交易日历, szzz_hq_df = self.dm.get_k_data('000001', index=True, begin_date=self.begin_date, end_date=self.end_date) all_dates = list(szzz_hq_df['date']) # 获取沪深300在统计周期内的第一天的值 hs300_k = self.dm.get_k_data('000300', index=True, begin_date=all_dates[0], end_date=all_dates[0]) hs300_begin_value = hs300_k.loc[hs300_k.index[0]]['close'] # 获取股票池数据 rebalance_dates, date_codes_dict = low_pe_stock_pool.get_option_stocks( ) # 获取回测周期内股票池内所有股票的收盘价和前收价 all_option_code_set = set() for rebalance_date in rebalance_dates: for code in date_codes_dict[rebalance_date]: all_option_code_set.add(code) # 缓存股票的日线数据 for code in all_option_code_set: dailies_df = self.dm.get_k_data(code, autype=None, begin_date=self.begin_date, end_date=self.end_date) dailies_hfq_df = self.dm.get_k_data(code, autype='hfq', begin_date=self.begin_date, end_date=self.end_date) # 计算复权因子 dailies_df[ 'au_factor'] = dailies_hfq_df['close'] / dailies_df['close'] dailies_df.set_index(['date'], inplace=True) self.code_daily_cache[code] = dailies_df last_phase_codes = None this_phase_codes = None to_be_sold_codes = set() to_be_bought_codes = set() holding_code_dict = dict() last_date = None # 按照日期一步步回测 for _date in all_dates: print('Backtest at %s.' % _date) # 当期持仓股票列表 before_sell_holding_codes = list(holding_code_dict.keys()) # 处理复权 if last_date is not None and len(before_sell_holding_codes) > 0: for code in before_sell_holding_codes: try: dailies = self.code_daily_cache[code] current_au_factor = dailies.loc[_date]['au_factor'] before_volume = holding_code_dict[code]['volume'] last_au_factor = dailies.loc[last_date]['au_factor'] after_volume = int( before_volume * (current_au_factor / last_au_factor)) holding_code_dict[code]['volume'] = after_volume print('持仓量调整:%s, %6d, %10.6f, %6d, %10.6f' % (code, before_volume, last_au_factor, after_volume, current_au_factor), flush=True) except: print('持仓量调整时,发生错误:%s, %s' % (code, _date), flush=True) # 卖出 if len(to_be_sold_codes) > 0: code_set_tmp = set(to_be_sold_codes) for code in code_set_tmp: try: if code in before_sell_holding_codes: holding_stock = holding_code_dict[code] holding_volume = holding_stock['volume'] sell_price = self.code_daily_cache[code].loc[ _date]['open'] sell_amount = holding_volume * sell_price cash += sell_amount cost = holding_stock['cost'] single_profit = (sell_amount - cost) * 100 / cost print('卖出 %s, %6d, %6.2f, %8.2f, %4.2f' % (code, holding_volume, sell_price, sell_amount, single_profit)) del holding_code_dict[code] to_be_sold_codes.remove(code) except: print('卖出时,发生异常:%s, %s' % (code, _date), flush=True) print('卖出后,现金: %10.2f' % cash) # 买入 if len(to_be_bought_codes) > 0: sorted_to_be_bought_list = list(to_be_bought_codes) sorted_to_be_bought_list.sort() for code in sorted_to_be_bought_list: try: if cash > single_position: buy_price = self.code_daily_cache[code].loc[_date][ 'open'] volume = int( int(single_position / buy_price) / 100) * 100 buy_amount = buy_price * volume cash -= buy_amount holding_code_dict[code] = { 'volume': volume, 'cost': buy_amount, 'last_value': buy_amount } print('买入 %s, %6d, %6.2f, %8.2f' % (code, volume, buy_price, buy_amount), flush=True) except: print('买入时,发生错误:%s, %s' % (code, _date), flush=True) print('买入后,现金: %10.2f' % cash) # 持仓股代码列表 holding_codes = list(holding_code_dict.keys()) # 如果调整日,则获取新一期的股票列表 if _date in rebalance_dates: # 暂存为上期的日期 if this_phase_codes is not None: last_phase_codes = this_phase_codes this_phase_codes = date_codes_dict[_date] # 找到所有调出股票代码,在第二日开盘时卖出 if last_phase_codes is not None: out_codes = self.find_out_stocks(last_phase_codes, this_phase_codes) for out_code in out_codes: if out_code in holding_code_dict: to_be_sold_codes.add(out_code) # 获取检测信号的开始日期和结束日期 current_date_index = all_dates.index(_date) signal_begin_date = None if current_date_index >= 10: signal_begin_date = all_dates[current_date_index - 10] # 检查是否有需要第二天卖出的股票 for holding_code in holding_codes: if daily_k_break_ma10.is_k_down_break_ma10( holding_code, begin_date=signal_begin_date, end_date=_date): to_be_sold_codes.add(holding_code) # 检查是否有需要第二天买入的股票 to_be_bought_codes.clear() if this_phase_codes is not None: for _code in this_phase_codes: if _code not in holding_codes and \ daily_k_break_ma10.is_k_up_break_ma10(_code, begin_date=signal_begin_date, end_date=_date): to_be_bought_codes.add(_code) # 计算总资产 total_value = 0 for code in holding_codes: try: holding_stock = holding_code_dict[code] value = self.code_daily_cache[code].loc[_date][ 'close'] * holding_stock['volume'] total_value += value # 计算总收益 profit = (value - holding_stock['cost'] ) * 100 / holding_stock['cost'] # 计算单日收益 one_day_profit = (value - holding_stock['last_value'] ) * 100 / holding_stock['last_value'] # 暂存当日市值 holding_stock['last_value'] = value print('持仓: %s, %10.2f, %4.2f, %4.2f' % (code, value, profit, one_day_profit)) # 保存每一日股票的持仓数 code_date_volume_dict[code + '_' + _date] = holding_stock['volume'] except: print('计算收益时发生错误:%s, %s' % (code, _date), flush=True) total_capital = total_value + cash hs300_k_current = self.dm.get_k_data('000300', index=True, begin_date=_date, end_date=_date) hs300_current_value = hs300_k_current.loc[ hs300_k_current.index[0]]['close'] print('收盘后,现金: %10.2f, 总资产: %10.2f' % (cash, total_capital)) last_date = _date df_profit.loc[_date] = { 'net_value': round(total_capital / 1e7, 2), 'profit': round(100 * (total_capital - 1e7) / 1e7, 2), 'hs300': round( 100 * (hs300_current_value - hs300_begin_value) / hs300_begin_value, 2) } # 打印回测收益曲线数值 print('Profit history start') for index_date in df_profit.index: print('%s, %6.2f, %6.2f' % (index_date, df_profit.loc[index_date]['profit'], df_profit.loc[index_date]['hs300']), flush=True) print('Profit history end') drawdown = self.compute_drawdown(df_profit['net_value']) annual_profit, sharpe_ratio = self.compute_sharpe_ratio( df_profit['net_value']) print('回测结果 %s - %s,年化收益: %7.3f, 最大回撤:%7.3f, 夏普比率:%4.2f' % (self.begin_date, self.end_date, annual_profit, drawdown, sharpe_ratio)) df_profit.plot(title='Backtest Result', y=['profit', 'hs300'], kind='line') plt.show()
# Fare,补充 test_df 中的缺失值,有一条数据缺少 Fare 信息,使用中位数填充 test_df['Fare'].fillna(test_df['Fare'].median(), inplace=True) # 从 float 型转换为 int 型 train_df['Fare'] = train_df['Fare'].astype(int) test_df['Fare'] = test_df['Fare'].astype(int) # 分别获取幸存者和遇难者的 Fare fare_not_survived = train_df['Fare'][train_df['Survived'] == 0] fare_survived = train_df['Fare'][train_df['Survived'] == 1] # 计算平均值和标准差 average_fare = DataFrame([fare_not_survived.mean(), fare_survived.mean()]) std_fare = DataFrame([fare_not_survived.std(), fare_survived.std()]) # 画图 train_df['Fare'].plot(kind='hist', figsize=(15,3), bins=100, xlim=(0,50)) plt.savefig('Fare_1.png') average_fare.index.names = std_fare.index.names = ['Survived'] average_fare.plot(yerr=std_fare, kind='bar', legend=False) plt.savefig('Fare_2.png') plt.close('all') # 年龄 fig, (axis1, axis2) = plt.subplots(1, 2, figsize=(15,4)) axis1.set_title('Original Age Values - Titanic') axis2.set_title('New Age Values - Titanic') # 获取训练集中 Age 的平均值、标准差和 null 的个数 average_age_train = train_df['Age'].mean() std_age_train = train_df['Age'].std() count_nan_age_train = train_df['Age'].isnull().sum() # 获取测试集中 Age 的平均值、标准差和 null 的个数 average_age_test = test_df['Age'].mean() std_age_test = test_df['Age'].std()
def test_import_error_message(): # GH-19810 df = DataFrame({"A": [1, 2]}) with pytest.raises(ImportError, match="matplotlib is required for plotting"): df.plot()
import plotly.plotly as py import plotly.graph_objs as go import pandas as pd from pandas import Series from pandas import DataFrame from pandas import TimeGrouper from pandas import parser from matplotlib import pyplot #df = pd.read_csv("Data.csv") series = Series.from_csv('Data.csv',header=0) df = DataFrame(series) #df = pd.read_csv('Data.csv',header=0) df.plot() pyplot.show()
def test_bar_stacked_center(self): #GH2157 df = DataFrame({'A': [3] * 5, 'B': range(5)}, index=range(5)) ax = df.plot(kind='bar', stacked='True', grid=True) self.assertEqual(ax.xaxis.get_ticklocs()[0], ax.patches[0].get_x() + ax.patches[0].get_width() / 2)
def test_df_series_secondary_legend(self): # GH 9779 df = DataFrame(np.random.randn(30, 3), columns=list("abc")) s = Series(np.random.randn(30), name="x") # primary -> secondary (without passing ax) _, ax = self.plt.subplots() ax = df.plot(ax=ax) s.plot(legend=True, secondary_y=True, ax=ax) # both legends are drawn on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"]) assert ax.get_yaxis().get_visible() assert ax.right_ax.get_yaxis().get_visible() tm.close() # primary -> secondary (with passing ax) _, ax = self.plt.subplots() ax = df.plot(ax=ax) s.plot(ax=ax, legend=True, secondary_y=True) # both legends are drawn on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"]) assert ax.get_yaxis().get_visible() assert ax.right_ax.get_yaxis().get_visible() tm.close() # secondary -> secondary (without passing ax) _, ax = self.plt.subplots() ax = df.plot(secondary_y=True, ax=ax) s.plot(legend=True, secondary_y=True, ax=ax) # both legends are drawn on left ax # left axis must be invisible and right axis must be visible expected = ["a (right)", "b (right)", "c (right)", "x (right)"] self._check_legend_labels(ax.left_ax, labels=expected) assert not ax.left_ax.get_yaxis().get_visible() assert ax.get_yaxis().get_visible() tm.close() # secondary -> secondary (with passing ax) _, ax = self.plt.subplots() ax = df.plot(secondary_y=True, ax=ax) s.plot(ax=ax, legend=True, secondary_y=True) # both legends are drawn on left ax # left axis must be invisible and right axis must be visible expected = ["a (right)", "b (right)", "c (right)", "x (right)"] self._check_legend_labels(ax.left_ax, expected) assert not ax.left_ax.get_yaxis().get_visible() assert ax.get_yaxis().get_visible() tm.close() # secondary -> secondary (with passing ax) _, ax = self.plt.subplots() ax = df.plot(secondary_y=True, mark_right=False, ax=ax) s.plot(ax=ax, legend=True, secondary_y=True) # both legends are drawn on left ax # left axis must be invisible and right axis must be visible expected = ["a", "b", "c", "x (right)"] self._check_legend_labels(ax.left_ax, expected) assert not ax.left_ax.get_yaxis().get_visible() assert ax.get_yaxis().get_visible() tm.close()
def test_bar_center(self): df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) ax = df.plot(kind='bar', grid=True) self.assertEqual(ax.xaxis.get_ticklocs()[0], ax.patches[0].get_x() + ax.patches[0].get_width())
history = [x for x in train] predictions = list() bias = -0.348572 for i in range(len(test)): # predict model = ARIMA(history, order=(2, 1, 3)) model_fit = model.fit(trend='nc', disp=0) yhat = bias + float(model_fit.forecast()[0]) predictions.append(yhat) # observation obs = test[i] history.append(obs) # report performance mse = mean_squared_error(test, predictions) rmse = sqrt(mse) print('RMSE: %.3f' % rmse) # summarise residual errors residuals = [test[i] - predictions[i] for i in range(len(test))] residuals = DataFrame(residuals) print(residuals.describe()) # plot residual errors pyplot.figure() pyplot.subplot(211) residuals.hist(ax=pyplot.gca()) pyplot.subplot(212) residuals.plot(kind='kde', ax=pyplot.gca()) pyplot.show()