Example #1
0
 def test_mixed_freq_irreg_period(self):
     ts = tm.makeTimeSeries()
     irreg = ts[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 18, 29]]
     rng = period_range('1/3/2000', periods=30, freq='B')
     ps = Series(np.random.randn(len(rng)), rng)
     irreg.plot()
     ps.plot()
def slide_11():
    fig, axes = plt.subplots(2, 1)
    data = Series(np.random.rand(16), index=list('abcdefghijklmnop'))

    data.plot(kind='bar', ax=axes[0], color='k', alpha=0.7)
    data.plot(kind='barh', ax=axes[1], color='k', alpha=0.7)

    df = DataFrame(np.random.rand(6, 4),
                   index=['one', 'two', 'three', 'four', 'five', 'six'],
                   columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
    print df
    df.plot(kind='bar')
    df.plot(kind='barh', stacked=True, alpha=0.5)

    tips = pd.read_csv(TIPSCSVPATH)
    print tips.head()
    party_counts = pd.crosstab(index=tips.day, columns=tips.sizes)
    print '曜日とパーティの大きさ別に仕分け'
    print party_counts
    party_counts = party_counts.ix[:, 2: 5]
    print 'サイズ1と6のパーティは少ないから除外'
    print party_counts
    print '正規化'
    party_pcts = party_counts.div(party_counts.sum(1).astype(float), axis=0)
    print party_pcts
    party_pcts.plot(kind='bar', stacked=True)
Example #3
0
    def test_ts_plot_format_coord(self):
        def check_format_of_first_point(ax, expected_string):
            first_line = ax.get_lines()[0]
            first_x = first_line.get_xdata()[0].ordinal
            first_y = first_line.get_ydata()[0]
            try:
                self.assertEqual(expected_string,
                                 ax.format_coord(first_x, first_y))
            except (ValueError):
                raise nose.SkipTest("skipping test because issue forming "
                                    "test comparison GH7664")

        annual = Series(1, index=date_range('2014-01-01', periods=3,
                                            freq='A-DEC'))
        check_format_of_first_point(annual.plot(), 't = 2014  y = 1.000000')

        # note this is added to the annual plot already in existence, and
        # changes its freq field
        daily = Series(1, index=date_range('2014-01-01', periods=3, freq='D'))
        check_format_of_first_point(daily.plot(),
                                    't = 2014-01-01  y = 1.000000')
        tm.close()

        # tsplot
        import matplotlib.pyplot as plt
        from pandas.tseries.plotting import tsplot
        tsplot(annual, plt.Axes.plot)
        check_format_of_first_point(plt.gca(), 't = 2014  y = 1.000000')
        tsplot(daily, plt.Axes.plot)
        check_format_of_first_point(plt.gca(), 't = 2014-01-01  y = 1.000000')
Example #4
0
    def test_from_weekly_resampling(self):
        idxh = date_range('1/1/1999', periods=52, freq='W')
        idxl = date_range('1/1/1999', periods=12, freq='M')
        high = Series(np.random.randn(len(idxh)), idxh)
        low = Series(np.random.randn(len(idxl)), idxl)
        low.plot()
        ax = high.plot()

        expected_h = idxh.to_period().asi8.astype(np.float64)
        expected_l = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544,
                               1549, 1553, 1558, 1562], dtype=np.float64)
        for l in ax.get_lines():
            self.assertTrue(PeriodIndex(data=l.get_xdata()).freq, idxh.freq)
            xdata = l.get_xdata(orig=False)
            if len(xdata) == 12:  # idxl lines
                self.assert_numpy_array_equal(xdata, expected_l)
            else:
                self.assert_numpy_array_equal(xdata, expected_h)
        tm.close()

        # tsplot
        from pandas.tseries.plotting import tsplot
        import matplotlib.pyplot as plt

        tsplot(low, plt.Axes.plot)
        lines = tsplot(high, plt.Axes.plot)
        for l in lines:
            self.assertTrue(PeriodIndex(data=l.get_xdata()).freq, idxh.freq)
            xdata = l.get_xdata(orig=False)
            if len(xdata) == 12:  # idxl lines
                self.assert_numpy_array_equal(xdata, expected_l)
            else:
                self.assert_numpy_array_equal(xdata, expected_h)
Example #5
0
    def test_secondary_y_ts(self):
        import matplotlib.pyplot as plt
        idx = date_range('1/1/2000', periods=10)
        ser = Series(np.random.randn(10), idx)
        ser2 = Series(np.random.randn(10), idx)
        ax = ser.plot(secondary_y=True)
        self.assertTrue(hasattr(ax, 'left_ax'))
        self.assertFalse(hasattr(ax, 'right_ax'))
        fig = ax.get_figure()
        axes = fig.get_axes()
        l = ax.get_lines()[0]
        xp = Series(l.get_ydata(), l.get_xdata()).to_timestamp()
        assert_series_equal(ser, xp)
        self.assertEqual(ax.get_yaxis().get_ticks_position(), 'right')
        self.assertFalse(axes[0].get_yaxis().get_visible())
        plt.close(fig)

        ax2 = ser2.plot()
        self.assertEqual(ax2.get_yaxis().get_ticks_position(),
                         self.default_tick_position)
        plt.close(ax2.get_figure())

        ax = ser2.plot()
        ax2 = ser.plot(secondary_y=True)
        self.assertTrue(ax.get_yaxis().get_visible())
Example #6
0
    def test_secondary_y(self):
        import matplotlib.pyplot as plt

        ser = Series(np.random.randn(10))
        ser2 = Series(np.random.randn(10))
        ax = ser.plot(secondary_y=True)
        self.assertTrue(hasattr(ax, 'left_ax'))
        self.assertFalse(hasattr(ax, 'right_ax'))
        fig = ax.get_figure()
        axes = fig.get_axes()
        l = ax.get_lines()[0]
        xp = Series(l.get_ydata(), l.get_xdata())
        assert_series_equal(ser, xp)
        self.assertEqual(ax.get_yaxis().get_ticks_position(), 'right')
        self.assertFalse(axes[0].get_yaxis().get_visible())
        plt.close(fig)

        ax2 = ser2.plot()
        self.assertEqual(ax2.get_yaxis().get_ticks_position(), 'default')
        plt.close(ax2.get_figure())

        ax = ser2.plot()
        ax2 = ser.plot(secondary_y=True)
        self.assertTrue(ax.get_yaxis().get_visible())
        self.assertFalse(hasattr(ax, 'left_ax'))
        self.assertTrue(hasattr(ax, 'right_ax'))
        self.assertTrue(hasattr(ax2, 'left_ax'))
        self.assertFalse(hasattr(ax2, 'right_ax'))
Example #7
0
def pd_plot():
    s = Series(np.random.randn(10).cumsum(), index=range(0, 100, 10))
    print(s)
    s.plot()
    # 为啥不能显示,只能在ipython上作用 ?
    df = pd.DataFrame(np.random.randn(10, 4).cumsum(0), index=np.arange(0, 100, 10), columns=['A', 'B', 'C', 'D'])
    df.plot()
Example #8
0
    def test_invalid_plot_data(self):
        s = Series(list("abcd"))
        kinds = "line", "bar", "barh", "kde", "density"

        for kind in kinds:
            with tm.assertRaises(TypeError):
                s.plot(kind=kind)
Example #9
0
    def test_partially_invalid_plot_data(self):
        s = Series(['a', 'b', 1.0, 2])
        kinds = 'line', 'bar', 'barh', 'kde', 'density'

        for kind in kinds:
            with tm.assertRaises(TypeError):
                s.plot(kind=kind)
Example #10
0
    def test_invalid_plot_data(self):
        s = Series(list('abcd'))
        kinds = 'line', 'bar', 'barh', 'kde', 'density'

        for kind in kinds:
            with tm.assertRaises(TypeError):
                s.plot(kind=kind)
Example #11
0
 def test_invalid_plot_data(self):
     s = Series(list('abcd'))
     for kind in plotting._common_kinds:
         if not _ok_for_gaussian_kde(kind):
             continue
         with tm.assertRaises(TypeError):
             s.plot(kind=kind)
Example #12
0
 def test_kind_both_ways(self):
     s = Series(range(3))
     for kind in plotting._common_kinds + plotting._series_kinds:
         if not _ok_for_gaussian_kde(kind):
             continue
         s.plot(kind=kind)
         getattr(s.plot, kind)()
Example #13
0
 def test_label(self):
     s = Series([1, 2])
     _, ax = self.plt.subplots()
     ax = s.plot(label='LABEL', legend=True, ax=ax)
     self._check_legend_labels(ax, labels=['LABEL'])
     self.plt.close()
     _, ax = self.plt.subplots()
     ax = s.plot(legend=True, ax=ax)
     self._check_legend_labels(ax, labels=['None'])
     self.plt.close()
     # get name from index
     s.name = 'NAME'
     _, ax = self.plt.subplots()
     ax = s.plot(legend=True, ax=ax)
     self._check_legend_labels(ax, labels=['NAME'])
     self.plt.close()
     # override the default
     _, ax = self.plt.subplots()
     ax = s.plot(legend=True, label='LABEL', ax=ax)
     self._check_legend_labels(ax, labels=['LABEL'])
     self.plt.close()
     # Add lebel info, but don't draw
     _, ax = self.plt.subplots()
     ax = s.plot(legend=False, label='LABEL', ax=ax)
     assert ax.get_legend() is None  # Hasn't been drawn
     ax.legend()  # draw it
     self._check_legend_labels(ax, labels=['LABEL'])
Example #14
0
 def test_partially_invalid_plot_data(self):
     s = Series(['a', 'b', 1.0, 2])
     for kind in plotting._common_kinds:
         if not _ok_for_gaussian_kde(kind):
             continue
         with tm.assertRaises(TypeError):
             s.plot(kind=kind)
Example #15
0
    def test_partially_invalid_plot_data(self):
        s = Series(["a", "b", 1.0, 2])
        kinds = "line", "bar", "barh", "kde", "density"

        for kind in kinds:
            with tm.assertRaises(TypeError):
                s.plot(kind=kind)
Example #16
0
    def test_ts_plot_format_coord(self):
        def check_format_of_first_point(ax, expected_string):
            first_line = ax.get_lines()[0]
            first_x = first_line.get_xdata()[0].ordinal
            first_y = first_line.get_ydata()[0]
            try:
                assert expected_string == ax.format_coord(first_x, first_y)
            except (ValueError):
                pytest.skip("skipping test because issue forming "
                            "test comparison GH7664")

        annual = Series(1, index=date_range('2014-01-01', periods=3,
                                            freq='A-DEC'))
        _, ax = self.plt.subplots()
        annual.plot(ax=ax)
        check_format_of_first_point(ax, 't = 2014  y = 1.000000')

        # note this is added to the annual plot already in existence, and
        # changes its freq field
        daily = Series(1, index=date_range('2014-01-01', periods=3, freq='D'))
        daily.plot(ax=ax)
        check_format_of_first_point(ax,
                                    't = 2014-01-01  y = 1.000000')
        tm.close()

        # tsplot
        _, ax = self.plt.subplots()
        from pandas.tseries.plotting import tsplot
        tsplot(annual, self.plt.Axes.plot, ax=ax)
        check_format_of_first_point(ax, 't = 2014  y = 1.000000')
        tsplot(daily, self.plt.Axes.plot, ax=ax)
        check_format_of_first_point(ax, 't = 2014-01-01  y = 1.000000')
Example #17
0
 def test_fake_inferred_business(self):
     _, ax = self.plt.subplots()
     rng = date_range('2001-1-1', '2001-1-10')
     ts = Series(lrange(len(rng)), rng)
     ts = ts[:3].append(ts[5:])
     ts.plot(ax=ax)
     assert not hasattr(ax, 'freq')
Example #18
0
    def test_secondary_y_ts(self):
        idx = date_range('1/1/2000', periods=10)
        ser = Series(np.random.randn(10), idx)
        ser2 = Series(np.random.randn(10), idx)
        fig, _ = self.plt.subplots()
        ax = ser.plot(secondary_y=True)
        assert hasattr(ax, 'left_ax')
        assert not hasattr(ax, 'right_ax')
        axes = fig.get_axes()
        l = ax.get_lines()[0]
        xp = Series(l.get_ydata(), l.get_xdata()).to_timestamp()
        assert_series_equal(ser, xp)
        assert ax.get_yaxis().get_ticks_position() == 'right'
        assert not axes[0].get_yaxis().get_visible()
        self.plt.close(fig)

        _, ax2 = self.plt.subplots()
        ser2.plot(ax=ax2)
        assert (ax2.get_yaxis().get_ticks_position() ==
                self.default_tick_position)
        self.plt.close(ax2.get_figure())

        ax = ser2.plot()
        ax2 = ser.plot(secondary_y=True)
        assert ax.get_yaxis().get_visible()
Example #19
0
    def test_errorbar_plot(self):

        s = Series(np.arange(10))
        s_err = np.random.randn(10)

        # test line and bar plots
        kinds = ['line', 'bar']
        for kind in kinds:
            _check_plot_works(s.plot, yerr=Series(s_err), kind=kind)
            _check_plot_works(s.plot, yerr=s_err, kind=kind)
            _check_plot_works(s.plot, yerr=s_err.tolist(), kind=kind)

        _check_plot_works(s.plot, xerr=s_err)

        # test time series plotting
        ix = date_range('1/1/2000', '1/1/2001', freq='M')
        ts = Series(np.arange(12), index=ix)
        ts_err = Series(np.random.randn(12), index=ix)

        _check_plot_works(ts.plot, yerr=ts_err)

        # check incorrect lengths and types
        with tm.assertRaises(ValueError):
            s.plot(yerr=np.arange(11))

        s_err = ['zzz']*10
        with tm.assertRaises(TypeError):
            s.plot(yerr=s_err)
Example #20
0
    def test_invalid_plot_data(self):
        s = Series(list('abcd'))
        _, ax = self.plt.subplots()
        for kind in plotting._core._common_kinds:

            msg = "no numeric data to plot"
            with pytest.raises(TypeError, match=msg):
                s.plot(kind=kind, ax=ax)
Example #21
0
def plot_mortality_rate(curve, string = True):
    if string:
        curve = curve.split(',')
    if len(curve) < 2:
        return
    from pandas import Series
    series = Series(curve[2:]).astype('int') / int(curve[1]) * 1000
    series.plot(label=curve[0])
Example #22
0
 def test_invalid_plot_data(self):
     s = Series(list('abcd'))
     _, ax = self.plt.subplots()
     for kind in plotting._core._common_kinds:
         if not _ok_for_gaussian_kde(kind):
             continue
         with pytest.raises(TypeError):
             s.plot(kind=kind, ax=ax)
Example #23
0
    def test_partially_invalid_plot_data(self):
        s = Series(['a', 'b', 1.0, 2])
        _, ax = self.plt.subplots()
        for kind in plotting._core._common_kinds:

            msg = "no numeric data to plot"
            with pytest.raises(TypeError, match=msg):
                s.plot(kind=kind, ax=ax)
Example #24
0
 def test_partially_invalid_plot_data(self):
     s = Series(['a', 'b', 1.0, 2])
     _, ax = self.plt.subplots()
     for kind in plotting._core._common_kinds:
         if not _ok_for_gaussian_kde(kind):
             continue
         with pytest.raises(TypeError):
             s.plot(kind=kind, ax=ax)
Example #25
0
def plot_mortality_rate_avg(curve, string = True):
    if string:
        curve = curve.split(',')
    if len(curve) < 2:
        return
    from pandas import Series
    series = Series(curve[2:])
    series.plot(label=curve[0])
Example #26
0
 def test_mixed_freq_hf_first(self):
     idxh = date_range('1/1/1999', periods=365, freq='D')
     idxl = date_range('1/1/1999', periods=12, freq='M')
     high = Series(np.random.randn(len(idxh)), idxh)
     low = Series(np.random.randn(len(idxl)), idxl)
     high.plot()
     ax = low.plot()
     for l in ax.get_lines():
         self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'D')
Example #27
0
    def test_kind_both_ways(self):
        s = Series(range(3))
        kinds = (plotting._core._common_kinds +
                 plotting._core._series_kinds)
        _, ax = self.plt.subplots()
        for kind in kinds:

            s.plot(kind=kind, ax=ax)
            getattr(s.plot, kind)()
Example #28
0
    def test_pandas_plots_register(self):
        pytest.importorskip("matplotlib.pyplot")
        s = Series(range(12), index=date_range('2017', periods=12))
        # Set to the "warn" state, in case this isn't the first test run
        converter._WARN = True
        with tm.assert_produces_warning(None) as w:
            s.plot()

        assert len(w) == 0
Example #29
0
 def test_to_weekly_resampling(self):
     idxh = date_range('1/1/1999', periods=52, freq='W')
     idxl = date_range('1/1/1999', periods=12, freq='M')
     high = Series(np.random.randn(len(idxh)), idxh)
     low = Series(np.random.randn(len(idxl)), idxl)
     high.plot()
     ax = low.plot()
     for l in ax.get_lines():
         self.assertTrue(PeriodIndex(data=l.get_xdata()).freq.startswith('W'))
Example #30
0
 def test_finder_monthly_long(self):
     rng = period_range('1988Q1', periods=24 * 12, freq='M')
     ser = Series(np.random.randn(len(rng)), rng)
     _, ax = self.plt.subplots()
     ser.plot(ax=ax)
     xaxis = ax.get_xaxis()
     rs = xaxis.get_majorticklocs()[0]
     xp = Period('1989Q1', 'M').ordinal
     assert rs == xp
Example #31
0
 """lag plot stationary"""
 fig1 = plt.figure()
 lag_plot(diff, s=1, c="k")
 plt.title("lag_plot stationarized waveform, Energy={}keV".format(
     energy1[i]))
 plt.savefig("{0:03}_diff_lag.png".format(i))
 plt.close(fig1)
 """lag plot waveform"""
 fig2 = plt.figure()
 lag_plot(full, s=1, c="k")
 plt.title("lag_plot, Energy={}keV".format(energy1[i]))
 plt.savefig("{0:03}_full_lag.png".format(i))
 plt.close(fig2)
 """Simple stationary waveform"""
 fig3 = plt.figure()
 diff.plot()
 plt.title("stationarized waveform, Energy={}keV".format(energy1[i]))
 plt.savefig("{0:03}_diff_waveform.png".format(i))
 plt.close(fig3)
 """Simple waveform"""
 fig4 = plt.figure()
 full.plot()
 plt.title("Waveform, Energy={}keV".format(energy1[i]))
 plt.savefig("{0:03}_full_waveform".format(i))
 plt.close(fig4)
 """Partial Autocorrelation stationarized"""
 fig5 = plt.figure()
 # autocorrelation_plot(diff)
 sm.graphics.tsa.plot_pacf(diff, lags=30)
 plt.title("Autocorrelation stationarized waveform, Energy={}keV".format(
     energy1[i]))
Example #32
0
 def test_plot_fails_with_dupe_color_and_style(self):
     x = Series(randn(2))
     with pytest.raises(ValueError):
         _, ax = self.plt.subplots()
         x.plot(style="k--", color="k", ax=ax)
Example #33
0
    4.2, 2.3, 5.6, 4.5, 4.8, 3.9, 5.9, 2.4, 5.9, 6, 4, 3.7, 5, 5.2, 4.5, 3.6,
    5, 6, 2.8, 3.3, 5.5, 4.2, 4.9, 5.1
])
noten.sort_values(ascending=True)
noten.median()

# a) change three values that median remains the same
noten2 = Series([
    4, 2, 5, 4.5, 4.8, 3.9, 5.9, 2.4, 5.9, 6, 4, 3.7, 5, 5.2, 4.5, 3.6, 5, 6,
    2.8, 3.3, 5.5, 4.2, 4.9, 5.1
])
noten2.median()

# b) create histogram and boxplot
plt.subplot(221)
noten.plot(kind="hist", edgecolor="black")

plt.subplot(222)
noten.plot(kind="box")

plt.subplot(223)
noten2.plot(kind="hist", edgecolor="black")

plt.subplot(224)
noten2.plot(kind="box")

# -------------------------
# Exercise 2.2
# -------------------------
schlamm = pd.read_csv("./data/klaerschlamm.dat", sep=" ", index_col=0)
schlamm = schlamm.drop("Labor", 1)
Example #34
0
    train_X, train_y = cnn.split_sequence(time_series)

    ann.train(train_X, train_y)
    cnn.train(train_X, train_y)

    pre_cnn = cnn.predict(train_X)
    pre_ann = ann.predict(train_X)

    index = time_series.index[4:]

    pre_cnn = Series(pre_cnn, index=index)
    pre_ann = Series(pre_ann, index=index)
    plt.figure(figsize=(6, 6))
    plt.subplot(211)
    pre_cnn.plot(color='green', label='Predicts', legend=True)
    time_series.plot(color='blue', label='Original', legend=True)
    residual_cnn = time_series - pre_cnn
    # with open('./residual_pickle/cnn_vehicle_df.pkl', 'wb') as f:
    #     pk.dump(residual_cnn, f)
    plt.title('CNN')
    plt.grid(which='both')

    plt.subplot(212)
    pre_ann.plot(color='green', label='Predicts', legend=True)
    time_series.plot(color='blue', label='Original', legend=True)
    residual_ann = time_series - pre_ann
    # with open('./residual_pickle/fcnn_vehicle_df.pkl', 'wb') as f:
    #     pk.dump(residual_ann, f)
    plt.title('FCNN')
    plt.grid(which='both')
Example #35
0
print(myseries)
print(myseries['대구'])
print(type(myseries['대구']))  # type : numpy
print(myseries[['대구']])
print(type(myseries[['대구']]))  # type : Series
print(myseries['대구':'목포'])  # 문자열 색인으로 슬라이싱 하는 경우 양쪽 모두 포함됨
print(myseries[[2]])  # 인덱싱 할 때 대괄호 2개
print(myseries[0:5:2])  # 콜론으로 슬라이싱 하는 경우 대괄호 1개
print(myseries[[1, 3, 5]])  # 콤마를 사용하는 경우 대괄호 2개
myseries[2:5] = 33
print(myseries)
myseries[['서울', '대구']] = 44
print(myseries)
myseries[0::2] = 77
print(myseries)

colors = ['r', 'g', 'b', 'y', 'royalblue', 'c', 'm']
myseries.plot(kind='bar', rot=0, color=colors)
plt.xlabel('도시')
plt.ylabel('점수')
ratio = 100 * myseries / myseries.sum()
for idx in range(myseries.size):
    value = str(myseries[idx])
    ratioval = f'{ratio[idx]:.1f}%'
    plt.text(x=idx, y=myseries[idx] + 1, s=value, horizontalalignment='center')
    plt.text(x=idx,
             y=myseries[idx] / 2,
             s=ratioval,
             horizontalalignment='center')
graphfile = 'SeriesR&W_Graph.png'
plt.savefig(graphfile)
Example #36
0
print('mse = {:12,.0f}'.format(mse))
ridge_coef = Series(
    glm_ridge.coef_, dfTrain[[
        'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11'
    ]].columns).sort_values()
print('type of ridge_coef:', type(ridge_coef))
# print(ridge_coef.size)
print('beta coefficient of variables')
# for idx,value in enumerate(ridge_coef):
# 	print('{} : {:10.8f}'.format(idx,value))
for i in range(ridge_coef.size):
    print('{:4s} : {:10.8f}'.format(ridge_coef.index.values[i],
                                    ridge_coef.values[i]))

plt.figure(1)
ridge_coef.plot(kind='bar', grid=True)
plt.savefig("ridge_alpha0.png")

print('[alpha=100]')
glm_ridge = lm.Ridge(alpha=100).fit(
    dfTrain[[
        'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11'
    ]], dfTrain['Y1'])
predLabelTrain = glm_ridge.predict(dfTrain[[
    'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11'
]])
r2 = r2_score(dfTrain['Y1'], predLabelTrain)
print('r-squared = ', r2)
mse = mean_squared_error(dfTrain['Y1'], predLabelTrain)
print('mse = {:12,.0f}'.format(mse))
ridge_coef = Series(
Example #37
0
 def test_plot_fails_with_dupe_color_and_style(self):
     x = Series(randn(2))
     with tm.assertRaises(ValueError):
         x.plot(style='k--', color='k')
Example #38
0
 def test_invalid_kind(self):
     s = Series([1, 2])
     with tm.assertRaises(ValueError):
         s.plot(kind='aasdf')
Example #39
0
 def test_xticklabels(self):
     # GH11529
     s = Series(np.arange(10), index=['P%02d' % i for i in range(10)])
     ax = s.plot(xticks=[0, 3, 5, 9])
     exp = ['P%02d' % i for i in [0, 3, 5, 9]]
     self._check_text_labels(ax.get_xticklabels(), exp)
Example #40
0
 def test_secondary_bar(self):
     ser = Series(np.random.randn(10))
     ax = ser.plot(secondary_y=True, kind='bar')
     fig = ax.get_figure()
     axes = fig.get_axes()
     self.assertEqual(axes[1].get_yaxis().get_ticks_position(), 'right')
def _penalized_linear_regression_train(table,
                                       feature_cols,
                                       label_col,
                                       regression_type='ridge',
                                       alpha=1.0,
                                       l1_ratio=0.5,
                                       fit_intercept=True,
                                       max_iter=1000,
                                       tol=0.0001,
                                       random_state=None):
    out_table = table.copy()
    features = out_table[feature_cols]
    label = out_table[label_col]
    if regression_type == 'ridge':
        regression_model = Ridge(alpha=alpha,
                                 fit_intercept=fit_intercept,
                                 max_iter=None,
                                 tol=tol,
                                 solver='auto',
                                 random_state=random_state)
    elif regression_type == 'lasso':
        regression_model = Lasso(alpha=alpha,
                                 fit_intercept=fit_intercept,
                                 max_iter=max_iter,
                                 tol=tol,
                                 random_state=random_state,
                                 selection='random')
    elif regression_type == 'elastic_net':
        regression_model = ElasticNet(alpha=alpha,
                                      l1_ratio=l1_ratio,
                                      fit_intercept=fit_intercept,
                                      max_iter=max_iter,
                                      tol=tol,
                                      random_state=random_state,
                                      selection='random')
    else:
        raise_runtime_error("Please check 'regression_type'.")

    regression_model.fit(features, label)

    out_table1 = pd.DataFrame([])
    out_table1['x_variable_name'] = [variable for variable in feature_cols]
    out_table1['coefficient'] = regression_model.fit(features, label).coef_
    intercept = pd.DataFrame(
        [['intercept',
          regression_model.fit(features, label).intercept_]],
        columns=['x_variable_name', 'coefficient'])
    if fit_intercept == True:
        out_table1 = out_table1.append(intercept, ignore_index=True)

    predict = regression_model.predict(features)
    residual = label - predict

    out_table['predict'] = predict
    out_table['residual'] = residual

    if regression_type == 'elastic_net':
        params = {
            'Feature Columns': feature_cols,
            'Label Column': label_col,
            'Regression Type': regression_type,
            'Regularization (Penalty Weight)': alpha,
            'L1 Ratio': l1_ratio,
            'Fit Intercept': fit_intercept,
            'Maximum Number of Iterations': max_iter,
            'Tolerance': tol
        }
    else:
        params = {
            'Feature Columns': feature_cols,
            'Label Column': label_col,
            'Regression Type': regression_type,
            'Regularization (Penalty Weight)': alpha,
            'Fit Intercept': fit_intercept,
            'Maxium Number of Iterations': max_iter,
            'Tolerance': tol
        }

    score = {
        'MSE': mean_squared_error(label, predict),
        'R2': r2_score(label, predict)
    }

    plt.figure()
    plt.scatter(predict, label)
    plt.xlabel('Predicted values for ' + label_col)
    plt.ylabel('Actual values for ' + label_col)
    x = predict
    p1x = np.min(x)
    p2x = np.max(x)
    plt.plot([p1x, p2x], [p1x, p2x], 'r--')
    fig_actual_predict = plt2MD(plt)
    plt.clf()

    plt.figure()
    plt.scatter(predict, residual)
    plt.xlabel('Predicted values for ' + label_col)
    plt.ylabel('Residuals')
    plt.axhline(y=0, color='r', linestyle='--')
    fig_residual_1 = plt2MD(plt)
    plt.clf()

    plt.figure()
    sm.qqplot(residual, line='s')
    plt.ylabel('Residuals')
    fig_residual_2 = plt2MD(plt)
    plt.clf()

    plt.figure()
    sns.distplot(residual)
    plt.xlabel('Residuals')
    fig_residual_3 = plt2MD(plt)
    plt.clf()

    # checking the magnitude of coefficients

    plt.figure()
    predictors = features.columns
    coef = Series(regression_model.coef_, predictors).sort_values()
    coef.plot(kind='bar', title='Model Coefficients')
    plt.tight_layout()
    fig_model_coefficients = plt2MD(plt)
    plt.clf()

    rb = BrtcReprBuilder()
    rb.addMD(
        strip_margin("""
    | # Penalized Linear Regression Result
    | ### Selected Parameters: 
    | {params}
    |
    | ## Results
    | ### Model Parameters
    | {out_table1}
    |
    | ### Prediction and Residual
    | {out_table2}
    |
    | ### Regression Score
    | {score}
    |
    """.format(params=dict2MD(params),
               out_table1=pandasDF2MD(out_table1),
               out_table2=pandasDF2MD(out_table, num_rows=len(out_table) + 1),
               score=dict2MD(score))))
    rb.addMD(
        strip_margin("""
    |
    | ### Predicted vs Actual
    | {image1}
    |
    | ### Fit Diagnostics
    | {image2}
    | {image3}
    | {image4}
    |
    | ### Magnitude of Coefficients
    | {image5}
    |
    """.format(image1=fig_actual_predict,
               image2=fig_residual_1,
               image3=fig_residual_2,
               image4=fig_residual_3,
               image5=fig_model_coefficients)))

    model = _model_dict('penalized_linear_regression_model')
    model['feature_cols'] = feature_cols
    model['label_col'] = label_col
    model['regression_type'] = regression_type
    model['regression_model'] = regression_model
    model['parameters'] = params
    model['model_parameters'] = out_table1
    model['prediction_residual'] = out_table
    model['_repr_brtc_'] = rb.get()

    return {'model': model}
Example #42
0
import inline
import matplotlib.pyplot as plt
from pandas import Series

s3 = Series(
    [1.2, 2.5, -2.2, 3.1, -0.8, -3.2, 1.4],
    index=['Jan 1', 'Jan 2', 'Jan 3', 'Jan 4', 'Jan 5', 'Jan 6', 'Jan 7'])
s3.plot(kind='bar', title='Bar plot')
plt.show()
    y_pre = (theta_19_pre + theta_01_pre)/2

    index = time_series.index[4:]
    y_pre = Series(y_pre, index)

    #test_y = time_series[-2:]

    # print(sMAPE(y_pre, test_y))
    # print(MSE(y_pre, test_y))

    theta_19_pre = Series(theta_19_pre, index=index)
    theta_01_pre = Series(theta_01_pre, index=index)
    y_pre = Series(y_pre, index=index)

    plt.figure(figsize=(6, 6))
    theta_19_pre.plot(color='g', label='Theta=1.9 predicts', legend=True)
    theta_19.plot(color='r', label='Theta=1.9', legend=True)
    theta_01_pre.plot(color='c', label='Theta=0.1 predicts', legend=True)
    theta_01.plot(color='m', label='Theta=0.1', legend=True)
    y_pre.plot(color='black', label='Predicts', legend=True)
    time_series.plot(color='blue', label='Original', legend=True)
    residual = time_series - y_pre
    # with open('./residual_pickle/theta_vehicle_df.pkl', 'wb') as f:
    #     pk.dump(residual, f)
    plt.title('Theta')
    plt.grid(which='both')
    plt.savefig('./pre_plot/Theta.jpg')
    plt.show()
    plt.figure(figsize=(6, 6))
    plt.subplot(211)
    residual.plot(label='residual for Theta', legend=True)
Example #44
0
class GwSym:
    """Generate synthetic groundwater series with noise for simulations"""
    def __repr__(self):
        return (f'GwSyn object')

    def __init__(self):

        self.detpar = {}
        self.noisepar = {}
        self.head = None
        self.noise = None
        self.name = None

    def name(self, name=None):
        """Return name of GwSym object. If name is given, object is 
        renamed and new name returned"""
        if name is not None:
            self.name = name
        return self.name

    def generate_head(self,
                      rain=None,
                      Atrue=800,
                      ntrue=1.1,
                      atrue=5,
                      dtrue=20):
        """ Generate the heads from rain with deterministic model 
        parameters """

        self.rain = rain
        self.detpar['Atrue'] = Atrue
        self.detpar['ntrue'] = ntrue
        self.detpar['atrue'] = atrue
        self.detpar['dtrue'] = dtrue

        # from Pastas notebook 15
        step = ps.Gamma().block([Atrue, ntrue, atrue])
        h = dtrue * np.ones(len(rain) + step.size)
        for i in range(len(rain)):
            h[i:i + step.size] += rain[i] * step
        head = pd.Series(index=rain.index, data=h[:len(rain)], name='head')

        ##head = head['1990':'2015']
        # ignore first ten years
        year = str(head.first_valid_index().year + 10)
        head = head[f'{year}-01-01':].copy()

        self.head = head
        return head

    def generate_noise(self, alpha=0.7, beta=0.7, noise_perc=0.2, head=None):
        """Generate series of random distributed noise """

        self.noisepar['alpha'] = alpha
        self.noisepar['beta'] = beta

        if head is None:
            head = self.head

        # generate samples using Numpy
        random_seed = np.random.RandomState(1234)
        n = len(head)
        innovation = random_seed.normal(0, 1, n) * np.std(head.values) * \
                     noise_perc
        noise = np.zeros(n)

        for i in range(1, n):
            # beta = theta, alpha = phi
            noise[i] = innovation[i] + innovation[i - 1] * beta + \
                       noise[i - 1] * alpha

        # head_noise = head[0] + noise
        self.noise = Series(noise, head.index)
        return self.noise

    def plot_series(self, figdir=None):
        """Plot head and noise"""

        fig, [ax1, ax2, ax3] = plt.subplots(nrows=3, ncols=1, figsize=(15, 15))

        sr = self.head + self.noise
        sr.plot(ax=ax1, title='head+noise')
        self.head.plot(ax=ax2, title='head')
        self.noise.plot(ax=ax3, title='noise')

        title = self.model_name()
        fig.suptitle(title, fontsize=14)

        if figdir is not None:
            figname = self.model_name()
            figpath = f'{figdir}{figname}.jpg'
            fig.savefig(figpath)

        plt.close()
        return

    def plot_noise_check(self, sr=None, figtitle=None, figdir=None):
        """Plot noise and noise histogram """

        if sr is None:
            sr = self.noise

        if figtitle is None:
            alpha = self.noisepar['alpha']
            beta = self.noisepar['beta']
            figtitle = 'alfa=' + str(alpha) + ' beta=' + str(beta)

        fig, [ax1, ax2] = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))

        sr.plot(ax=ax1, title=figtitle)
        sr.plot.hist(grid=False,
                     bins=20,
                     rwidth=0.9,
                     color='#607c8e',
                     ax=ax2,
                     density=True,
                     title=figtitle)

        # find minimum and maximum of xticks, so we know
        # where we should compute theoretical distribution
        xt = ax2.get_xticks()
        xmin, xmax = min(xt), max(xt)
        lnspc = np.linspace(xmin, xmax, len(sr))

        # plot normal distribution
        m, s = norm.fit(sr)  # get mean and standard deviation
        pdf_g = norm.pdf(lnspc, m,
                         s)  # now get theoretical values in our interval
        ax2.plot(lnspc, pdf_g)

        if figdir is not None:
            figpath = f'{figdir}{figtitle}.jpg'
            fig.savefig(figpath)

        plt.close()
        return

    def pastas_model(self, figdir=None):
        """Create and sove Pastas model using generated head and noise"""

        # create Pasytas model
        head_noise = self.head + self.noise
        self.ml = ps.Model(head_noise)
        self.sm = ps.StressModel(self.rain,
                                 ps.Gamma,
                                 name='recharge',
                                 settings='prec')
        self.ml.add_stressmodel(self.sm)
        self.ml.add_noisemodel(ps.ArmaModel())

        # solve Pastas model
        self.ml.solve(noise=True, report=False)

        if figdir is not None:
            # plot figure with model diagnostics

            axes = self.ml.plots.results(figsize=(10, 5))
            fig = axes[0].get_figure()

            # add real step function to plot
            Atrue = self.detpar['Atrue']
            ntrue = self.detpar['ntrue']
            atrue = self.detpar['atrue']
            axes[-1].plot(ps.Gamma().step([Atrue, ntrue, atrue]))

            # figname = f'Atrue={Atrue} ntrue={ntrue} atrue={atrue}'
            figname = self.model_name()
            fig.suptitle(figname, fontsize=12)

            figpath = f'{figdir}Model {figname}.jpg'
            fig.savefig(figpath)
            plt.close()

        return self.ml

    def model_name(self):
        """Return model name"""
        Atrue = self.detpar['Atrue']
        ntrue = self.detpar['ntrue']
        atrue = self.detpar['atrue']
        alpha = self.noisepar['alpha']
        beta = self.noisepar['beta']
        name = f'Atrue={Atrue} ntrue={ntrue} atrue={atrue} alpha={alpha} beta={beta}'
        return name

    def parameters(self):
        """Return table with true and estimated parameters"""

        par = collections.OrderedDict()

        par['A_true'] = self.detpar['Atrue']
        par['n_true'] = self.detpar['ntrue']
        par['a_true'] = self.detpar['atrue']
        par['alpha'] = self.noisepar['alpha']
        par['beta'] = self.noisepar['beta']

        sr = self.ml.parameters['optimal']
        par['A_est'] = sr['recharge_A']
        par['n_est'] = sr['recharge_n']
        par['a_est'] = sr['recharge_a']
        par['alpha_est'] = np.exp(-1. / sr["noise_alpha"])
        pm = sr["noise_beta"] / np.abs(sr["noise_beta"])
        par['beta_est'] = pm * np.exp(-1. / np.abs(sr["noise_beta"]))

        self.par = DataFrame([par])
        self.par.index.name = 'casename'

        return self.par

    def test_statistics(self):
        """Return test statistics for innovations"""
        self.test_stats = ps.stats.diagnostics(self.ml.noise(), nparam=2)
        self.test_stats.index.name = 'casename'
        return self.test_stats
Example #45
0
# Read data
ao = np.loadtxt('monthly.ao.index.b50.current.ascii')
print(ao[0:2])
print(ao.shape)

# Convert to time series data
dates = pd.date_range('1950-01', periods=ao.shape[0], freq='M')
print(dates)
print(dates.shape)

# Create first time series data
AO = Series(ao[:, 2], index=dates)
print(AO)

# Plot AO data
AO.plot(title='Daily Atlantic Oscillation')
plt.show()
AO['1980':'1990'].plot()
plt.show()
AO['1980-05':'1981-03'].plot()
plt.show()

# Print some data
print(AO[120])
print(AO['1960-01'])
print(AO['1960'])
print(AO[AO > 0])

# Craete another time series data
nao = np.loadtxt('norm.nao.monthly.b5001.current.ascii')
dates_nao = pd.date_range('1950-01', periods=nao.shape[0], freq='M')
Example #46
0
"""

import pandas as pd
import numpy as np
from pandas import Series, DataFrame, Panel
import matplotlib.pyplot as plt

ao = np.loadtxt(
    'monthly.ao.index.b50.current.ascii')  # load ascii file as ao numpy array

dates = pd.date_range(
    '1950-01', periods=ao.shape[0],
    freq='M')  #define date starting 1950-01 and monthly frequency
AO = Series(ao[:, 2], index=dates)
#
AO.plot().get_figure().savefig('AtlanticOscillation.png')
plt.show()

#plt.figure()
#AO['2001':'2011'].plot()
#plt.show()
#plt.close()
nao = np.loadtxt('norm.nao.monthly.b5001.current.ascii')
dates_nao = pd.date_range('1950-01', periods=nao.shape[0], freq='M')
NAO = Series(nao[:, 2], index=dates_nao)
aonao = DataFrame({'AO': AO, 'NAO': NAO})

#aonao.plot(subplots=True).get_figure().savefig(')
#aonao.plot()
#plt.show()
#plt.close()
Example #47
0
def plotSeries(series: pd.Series, **kwargs):
    series.plot(**kwargs)
    plt.show()
Example #48
0
plt.title("Welcome to the ML World!")  #可以加上标题
plt.show()

print("==============散点图scatter=================")
x = np.random.normal(0, 1, 5000)
y = np.random.normal(0, 1, 5000)
plt.scatter(x, y, alpha=0.5, marker="x")  #可以指定点的透明度,可以指定点的形状
plt.show()  #绘制一个标准的二维正态分布散点图

print("==============直方图=================")
s = Series(np.random.randn(1000))  #符合标准正态分布
plt.hist(s, rwidth=0.9, bins=20)
plt.show()

print("==============密度图=================")
s.plot(kind='kde')
plt.show()

print("==============子图subplots(通常用这种)=================")
x = np.linspace(0, 10, 100)

figure, ax = plt.subplots(2, 2)
ax[0][0].plot(x, siny, color="red")
ax[0][1].plot(x, cosy, color="blue")
ax[1][0].plot(x, siny, color="green")
ax[1][1].plot(x, cosy, color="black")
plt.show()

print("==============子图subplot=================")
plt.subplot(2, 1, 1)  #定义一个两行一列的subplot,切换到第一张子图进行绘制
plt.plot(x, siny, color="red")
Example #49
0
def EV_Plot(dic_length_RNA, total_RNA, prefix, dic_YRNA_type):

    ### length distribution of different RNA types
    #df = DataFrame(dic_length_RNA).T
    df = dic_length_RNA.fillna(value=0)
    df = df.sort_index(axis=0, ascending=True)
    df_RNA = df.loc[:, [
        'miRNA', 'YRNA', 'tsRNA', 'rsRNA', 'snoRNA', 'lncRNA', 'mRNA'
    ]]

    df_RNA_other = pd.DataFrame(df.sum(axis=1) - df_RNA.sum(axis=1),
                                columns=["others"])
    df_RNA = df_RNA.join(df_RNA_other)

    plot_RNA = prefix + ".length_RNA_counts.pdf"
    df_RNA.plot(kind='bar',
                stacked=True,
                fontsize=15,
                color=colors,
                width=1,
                linewidth=0.01)
    plt.xticks(range(0, 35, 5), ("17", "22", "27", "32", "37", "42", "47"),
               fontsize=15,
               rotation=0)
    plt.xlabel("Length", fontsize=15)
    plt.ylabel("Counts", fontsize=15)
    plt.savefig(plot_RNA, bbox_inches='tight')
    plt.close()
    df_RNA_csv = prefix + ".length_RNA_counts.txt"
    df_RNA.to_csv(df_RNA_csv, header=False, sep='\t')

    ### length plot percent
    df_RNA_percent = df_RNA / total_RNA * 100
    plot_RNA_percent = prefix + ".length_RNA_percent.pdf"
    df_RNA_percent.plot(kind='bar',
                        stacked=True,
                        figsize=(5.5, 4),
                        fontsize=15,
                        color=colors,
                        width=1,
                        linewidth=0.01)
    plt.xticks(range(0, 35, 5), ("17", "22", "27", "32", "37", "42", "47"),
               fontsize=15,
               rotation=0)
    plt.xlabel("Length", fontsize=15)
    plt.ylabel("Percent", fontsize=15)
    plt.savefig(plot_RNA_percent, bbox_inches='tight')
    plt.close()

    ### pie plot
    pie_RNA = prefix + ".pie_RNA.pdf"
    df_RNA_sum = df_RNA.sum(axis=0)
    df_RNA_sum.name = ''
    df_RNA_sum.plot(kind='pie',
                    figsize=(6, 6),
                    colors=colors,
                    autopct='%.1f',
                    fontsize=15)
    pie_csv = prefix + ".pie_RNA.txt"
    df_RNA_sum = df_RNA_sum.fillna(value=0)
    df_RNA_sum.to_csv(pie_csv, header=False, sep='\t')
    plt.savefig(pie_RNA, bbox_inches='tight')
    plt.close()

    ### pie plot of YRNA type
    df_YRNA = Series(dic_YRNA_type)
    df_YRNA = df_YRNA.fillna(value=0)
    df_YRNA = df_YRNA.reindex(['Y5', 'Y4', 'Y3', 'Y1'], fill_value=0)
    pie_YRNA = prefix + ".pie_YRNA.pdf"
    df_YRNA.name = ''
    df_YRNA.plot(kind='pie',
                 figsize=(4, 4),
                 colors=colors,
                 autopct='%.1f',
                 fontsize=15)
    plt.savefig(pie_YRNA, bbox_inches='tight')
    plt.close()

    pie_YRNA_csv = prefix + ".pie_YRNA.txt"
    df_YRNA.to_csv(pie_YRNA_csv, header=False, sep='\t')
Example #50
0
    for ax in axes:
        ax.plot(x, y, 'r')
        ax.set_xlabel('x')
        ax.set_ylabel('y')
        ax.set_title('title')

    fig.tight_layout()
    show()
    ##########################################################
    #Метод .plot() Для Series и DataFrame обьектов , это всего лишь
    #обёртка для plt.plot:

    ts = Series(randn(1000), index=date_range('1/1/2000', periods=1000))
    ts = ts.cumsum()
    ts.plot()

    df = DataFrame(randn(1000, 4), index=ts.index, columns=list('ABCD'))
    df = df.cumsum()

    plt.figure()
    df.plot()
    plt.legend(loc='best')

    show()

    ##########################################################
    #Для того, чтобы перейти на логарифмическую шкалу надо задать параметр Logy.
    #df.plot(logy = True)

    plt.figure()
Example #51
0
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],
                            rotation=30,
                            fontsize='small')
plt.yticks([-10, 0, 10], ['h', 'j', 'k'], rotation=0, color='r')
plt.ylabel('xx', color='g', fontsize=14, rotation=20)
plt.title('oh,you are so beautiful')

ax.annotate('yes',
            xy=(667.88, -23.3617),
            xytext=(+10, +30),
            textcoords='offset points',
            fontsize=16,
            arrowprops=dict(arrowstyle='->'))  ##annotate

fig = plt.figure()
axes = plt.subplots(2, 1)
data = Series(np.random.rand(16), index=list('abcdefghijklmnop'))
#data.plot(kind='bar',ax=axes[0],color='k',alpha=0.7)
ax1 = plt.subplot(211)
ax2 = plt.subplot(212)
plt.sca(ax1)  ##figure on the first
data.plot(kind='bar', color='k', alpha=0.7)
plt.sca(ax2)
data.plot(kind='barh', color='g', alpha=0.7)

tips = pd.read_csv('names/yob1880.txt', names=['name', 'sex', 'birth'])

df = pd.DataFrame(np.random.randn(6, 4),
                  index=['one', 'two', 'three', 'four', 'five', 'six'],
                  columns=pd.Index(['A', 'B', 'C', 'D'], names='haah'))
ao.shape


#create as many elements as time stamps we have in data using month as the stamp and Jan 1950 as the start (this info is from the dataframe)
dates = pd.date_range('1950-01', periods=ao.shape[0], freq='M')

#shape of array
dates.shape


#create dataframe that syncs the data index to values 
AO = Series(ao[:,2], index=dates)
AO

#plot the entire time series 
AO.plot()
#save AO as PDF
plt.savefig('AO_plot.pdf')  


#pther plots or just parts of the time series
AO['1980':'1990'].plot()
AO['1980-05':'1981-03'].plot()


#accessing values by numbers or by index
AO[120]
AO['1960-01']
AO['1960']

#import additional data
plt.ylim([0,1])
plt.xlabel('Date')
plt.ylabel('distribution')
plt.title('distribution vs. Time')
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
# plt.savefig('allocation.png')

# Plot stock prices and shifted returns
fig, axes = plt.subplots(nrows=2,ncols=1)
stock_price.plot(ax=axes[0])
shift_returns.plot(ax=axes[1])
axes[0].set_title('Stock Prices')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Price')
axes[0].legend(loc='center left', bbox_to_anchor=(1, 0.5))
axes[1].set_title(str(shift)+ ' Day Shift returns')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('returns ' + str(shift) + ' Days Apart')
axes[1].legend(loc='center left', bbox_to_anchor=(1, 0.5))
# plt.savefig('stocks.png', pad_inches=1)
fig.tight_layout()

# Plot portfolio returns vs. time
plt.figure()
returns.plot()
plt.xlabel('Date')
plt.ylabel('Portolio returns')
plt.title('Portfolio returns vs. Time')
# plt.savefig('returns.png')

plt.show()
Example #54
0
 def test_invalid_kind(self):
     s = Series([1, 2])
     with pytest.raises(ValueError):
         s.plot(kind="aasdf")
myindex = ['강감찬', '홍길동', '이순신', '최영']
members = Series(data=[20, 60, 80, 40], index=myindex)
print(members)

# 그래프의 종류별 예제

# kind는 line, bar, barh, pie, kde(커널 밀도 추정)
# rot : 눈금 rotation
# ylim : y축 상하한 값
# color : 색상 지정
# legend : 범례, label :  범례에 들어갈 문자열
# stacked : 누적 그래프
members.plot(kind='bar',
             use_index=True,
             color=['r', 'g', 'b', 'y'],
             rot=0,
             ylim=[0, members.max() + 20])
# members.plot(kind='bar', use_index=False, color=['r','g','b','y'], rot=0, ylim=[0,members.max()+20])

plt.title('학생별 국어 시험')
plt.xlabel('학생 이름')
plt.ylabel('점수')
# plt.grid(True)

ratio = 100 * members / members.sum()
print(ratio)

for idx in range(members.size):
    value = str(members[idx]) + '건'  # 60건
    ratioval = '%.1f%%' % (ratio[idx])  # 20.0%
Example #56
0
r2 = r2_score(dfTrain['Y1'], predLabelTrain)
print('r-squared = ', r2)
mse = mean_squared_error(dfTrain['Y1'], predLabelTrain)
print('mse = {:12,.0f}'.format(mse))
lasso_coef = Series(
    glm_lasso.coef_, dfTrain[[
        'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11'
    ]].columns).sort_values()
print('type of lasso_coef:', type(lasso_coef))
# print(lasso_coef.size)
print('beta coefficient of variables')
for i in range(lasso_coef.size):
    print('{:4s} : {:10.8f}'.format(lasso_coef.index.values[i],
                                    lasso_coef.values[i]))
plt.figure(1)
lasso_coef.plot(kind='bar', grid=True)
plt.savefig("lasso_alpha0.png")
print('[alpha=100]')
glm_lasso = lm.Lasso(alpha=10).fit(
    dfTrain[[
        'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11'
    ]], dfTrain['Y1'])
predLabelTrain = glm_lasso.predict(dfTrain[[
    'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11'
]])
r2 = r2_score(dfTrain['Y1'], predLabelTrain)
print('r-squared = ', r2)
mse = mean_squared_error(dfTrain['Y1'], predLabelTrain)
print('mse = {:12,.0f}'.format(mse))
lasso_coef = Series(
    glm_lasso.coef_, dfTrain[[
Example #57
0
 def test_style_single_ok(self):
     s = Series([1, 2])
     ax = s.plot(style="s", color="C3")
     assert ax.lines[0].get_color() == "C3"
Example #58
0
    def test_df_series_secondary_legend(self):
        # GH 9779
        df = DataFrame(np.random.randn(30, 3), columns=list("abc"))
        s = Series(np.random.randn(30), name="x")

        # primary -> secondary (without passing ax)
        _, ax = self.plt.subplots()
        ax = df.plot(ax=ax)
        s.plot(legend=True, secondary_y=True, ax=ax)
        # both legends are dran on left ax
        # left and right axis must be visible
        self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"])
        assert ax.get_yaxis().get_visible()
        assert ax.right_ax.get_yaxis().get_visible()
        tm.close()

        # primary -> secondary (with passing ax)
        _, ax = self.plt.subplots()
        ax = df.plot(ax=ax)
        s.plot(ax=ax, legend=True, secondary_y=True)
        # both legends are dran on left ax
        # left and right axis must be visible
        self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"])
        assert ax.get_yaxis().get_visible()
        assert ax.right_ax.get_yaxis().get_visible()
        tm.close()

        # secondary -> secondary (without passing ax)
        _, ax = self.plt.subplots()
        ax = df.plot(secondary_y=True, ax=ax)
        s.plot(legend=True, secondary_y=True, ax=ax)
        # both legends are dran on left ax
        # left axis must be invisible and right axis must be visible
        expected = ["a (right)", "b (right)", "c (right)", "x (right)"]
        self._check_legend_labels(ax.left_ax, labels=expected)
        assert not ax.left_ax.get_yaxis().get_visible()
        assert ax.get_yaxis().get_visible()
        tm.close()

        # secondary -> secondary (with passing ax)
        _, ax = self.plt.subplots()
        ax = df.plot(secondary_y=True, ax=ax)
        s.plot(ax=ax, legend=True, secondary_y=True)
        # both legends are dran on left ax
        # left axis must be invisible and right axis must be visible
        expected = ["a (right)", "b (right)", "c (right)", "x (right)"]
        self._check_legend_labels(ax.left_ax, expected)
        assert not ax.left_ax.get_yaxis().get_visible()
        assert ax.get_yaxis().get_visible()
        tm.close()

        # secondary -> secondary (with passing ax)
        _, ax = self.plt.subplots()
        ax = df.plot(secondary_y=True, mark_right=False, ax=ax)
        s.plot(ax=ax, legend=True, secondary_y=True)
        # both legends are dran on left ax
        # left axis must be invisible and right axis must be visible
        expected = ["a", "b", "c", "x (right)"]
        self._check_legend_labels(ax.left_ax, expected)
        assert not ax.left_ax.get_yaxis().get_visible()
        assert ax.get_yaxis().get_visible()
        tm.close()
Example #59
0
#Splitting into Training and CV for Cross Validation
X = train.loc[:,['Outlet_Establishment_Year', 'Item_MRP']]
x_train, x_cv, y_train, y_cv = train_test_split(X, train.Item_Outlet_Sales)

#Lasso Regression
lassoReg = Lasso(alpha=0.5, normalize=True)
lassoReg.fit(x_train,y_train)
pred = lassoReg.predict(x_cv)

#Calculating the mean squared error
mse = np.mean((pred - y_cv)**2)
print('Mean Squared Error:',mse)
print('Score:',lassoReg.score(x_cv,y_cv))

#Calculation of coefficients
coeff = DataFrame(x_train.columns)
coeff['Coefficient Estimate'] = Series(lassoReg.coef_)
print(coeff)

#Plotting Analysis through a Residual Plot
x_plot = plt.scatter(pred, (pred - y_cv), c='b')
plt.hlines(y=0, xmin=-1000, xmax=5000)
plt.title('Residual Plot')
plt.show()

#Magnitude of Coefficents
predictors = x_train.columns
coef = Series(lassoReg.coef_,predictors).sort_values()
coef.plot(kind='bar', title='Modal Coefficients')
plt.show()

ao = np.loadtxt('monthly.ao.index.b50.current.ascii') #loads data 
ao[0:2]
ao.shape #displays number of rows and columns


#Time Series 
dates = pd.date_range('1950-01', periods=ao.shape[0], freq='M') #creates range
dates
dates.shape

#First Time Series 
AO = Series(ao[:,2], index = dates)
AO
AO.plot() #graph
AO['1980':'1990'].plot()
AO['1980-05':'1981-03'].plot()

AO[120] #individual value 
AO['1960-01'] #by index
AO['1960'] #by specified year
AO[AO > 0] #subset of values


#Data Frame 

#Download dataset (same procedure as begining of tutorial)
!wget http://www.cpc.ncep.noaa.gov/products/precip/CWlink/pna/norm.nao.monthly.b5001.current.ascii
nao = np.loadtxt('norm.nao.monthly.b5001.current.ascii')
dates_nao = pd.date_range('1950-01', periods=nao.shape[0], freq='M')