def test_mixed_freq_irreg_period(self): ts = tm.makeTimeSeries() irreg = ts[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 18, 29]] rng = period_range('1/3/2000', periods=30, freq='B') ps = Series(np.random.randn(len(rng)), rng) irreg.plot() ps.plot()
def slide_11(): fig, axes = plt.subplots(2, 1) data = Series(np.random.rand(16), index=list('abcdefghijklmnop')) data.plot(kind='bar', ax=axes[0], color='k', alpha=0.7) data.plot(kind='barh', ax=axes[1], color='k', alpha=0.7) df = DataFrame(np.random.rand(6, 4), index=['one', 'two', 'three', 'four', 'five', 'six'], columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus')) print df df.plot(kind='bar') df.plot(kind='barh', stacked=True, alpha=0.5) tips = pd.read_csv(TIPSCSVPATH) print tips.head() party_counts = pd.crosstab(index=tips.day, columns=tips.sizes) print '曜日とパーティの大きさ別に仕分け' print party_counts party_counts = party_counts.ix[:, 2: 5] print 'サイズ1と6のパーティは少ないから除外' print party_counts print '正規化' party_pcts = party_counts.div(party_counts.sum(1).astype(float), axis=0) print party_pcts party_pcts.plot(kind='bar', stacked=True)
def test_ts_plot_format_coord(self): def check_format_of_first_point(ax, expected_string): first_line = ax.get_lines()[0] first_x = first_line.get_xdata()[0].ordinal first_y = first_line.get_ydata()[0] try: self.assertEqual(expected_string, ax.format_coord(first_x, first_y)) except (ValueError): raise nose.SkipTest("skipping test because issue forming " "test comparison GH7664") annual = Series(1, index=date_range('2014-01-01', periods=3, freq='A-DEC')) check_format_of_first_point(annual.plot(), 't = 2014 y = 1.000000') # note this is added to the annual plot already in existence, and # changes its freq field daily = Series(1, index=date_range('2014-01-01', periods=3, freq='D')) check_format_of_first_point(daily.plot(), 't = 2014-01-01 y = 1.000000') tm.close() # tsplot import matplotlib.pyplot as plt from pandas.tseries.plotting import tsplot tsplot(annual, plt.Axes.plot) check_format_of_first_point(plt.gca(), 't = 2014 y = 1.000000') tsplot(daily, plt.Axes.plot) check_format_of_first_point(plt.gca(), 't = 2014-01-01 y = 1.000000')
def test_from_weekly_resampling(self): idxh = date_range('1/1/1999', periods=52, freq='W') idxl = date_range('1/1/1999', periods=12, freq='M') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) low.plot() ax = high.plot() expected_h = idxh.to_period().asi8.astype(np.float64) expected_l = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562], dtype=np.float64) for l in ax.get_lines(): self.assertTrue(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) xdata = l.get_xdata(orig=False) if len(xdata) == 12: # idxl lines self.assert_numpy_array_equal(xdata, expected_l) else: self.assert_numpy_array_equal(xdata, expected_h) tm.close() # tsplot from pandas.tseries.plotting import tsplot import matplotlib.pyplot as plt tsplot(low, plt.Axes.plot) lines = tsplot(high, plt.Axes.plot) for l in lines: self.assertTrue(PeriodIndex(data=l.get_xdata()).freq, idxh.freq) xdata = l.get_xdata(orig=False) if len(xdata) == 12: # idxl lines self.assert_numpy_array_equal(xdata, expected_l) else: self.assert_numpy_array_equal(xdata, expected_h)
def test_secondary_y_ts(self): import matplotlib.pyplot as plt idx = date_range('1/1/2000', periods=10) ser = Series(np.random.randn(10), idx) ser2 = Series(np.random.randn(10), idx) ax = ser.plot(secondary_y=True) self.assertTrue(hasattr(ax, 'left_ax')) self.assertFalse(hasattr(ax, 'right_ax')) fig = ax.get_figure() axes = fig.get_axes() l = ax.get_lines()[0] xp = Series(l.get_ydata(), l.get_xdata()).to_timestamp() assert_series_equal(ser, xp) self.assertEqual(ax.get_yaxis().get_ticks_position(), 'right') self.assertFalse(axes[0].get_yaxis().get_visible()) plt.close(fig) ax2 = ser2.plot() self.assertEqual(ax2.get_yaxis().get_ticks_position(), self.default_tick_position) plt.close(ax2.get_figure()) ax = ser2.plot() ax2 = ser.plot(secondary_y=True) self.assertTrue(ax.get_yaxis().get_visible())
def test_secondary_y(self): import matplotlib.pyplot as plt ser = Series(np.random.randn(10)) ser2 = Series(np.random.randn(10)) ax = ser.plot(secondary_y=True) self.assertTrue(hasattr(ax, 'left_ax')) self.assertFalse(hasattr(ax, 'right_ax')) fig = ax.get_figure() axes = fig.get_axes() l = ax.get_lines()[0] xp = Series(l.get_ydata(), l.get_xdata()) assert_series_equal(ser, xp) self.assertEqual(ax.get_yaxis().get_ticks_position(), 'right') self.assertFalse(axes[0].get_yaxis().get_visible()) plt.close(fig) ax2 = ser2.plot() self.assertEqual(ax2.get_yaxis().get_ticks_position(), 'default') plt.close(ax2.get_figure()) ax = ser2.plot() ax2 = ser.plot(secondary_y=True) self.assertTrue(ax.get_yaxis().get_visible()) self.assertFalse(hasattr(ax, 'left_ax')) self.assertTrue(hasattr(ax, 'right_ax')) self.assertTrue(hasattr(ax2, 'left_ax')) self.assertFalse(hasattr(ax2, 'right_ax'))
def pd_plot(): s = Series(np.random.randn(10).cumsum(), index=range(0, 100, 10)) print(s) s.plot() # 为啥不能显示,只能在ipython上作用 ? df = pd.DataFrame(np.random.randn(10, 4).cumsum(0), index=np.arange(0, 100, 10), columns=['A', 'B', 'C', 'D']) df.plot()
def test_invalid_plot_data(self): s = Series(list("abcd")) kinds = "line", "bar", "barh", "kde", "density" for kind in kinds: with tm.assertRaises(TypeError): s.plot(kind=kind)
def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) kinds = 'line', 'bar', 'barh', 'kde', 'density' for kind in kinds: with tm.assertRaises(TypeError): s.plot(kind=kind)
def test_invalid_plot_data(self): s = Series(list('abcd')) kinds = 'line', 'bar', 'barh', 'kde', 'density' for kind in kinds: with tm.assertRaises(TypeError): s.plot(kind=kind)
def test_invalid_plot_data(self): s = Series(list('abcd')) for kind in plotting._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): s.plot(kind=kind)
def test_kind_both_ways(self): s = Series(range(3)) for kind in plotting._common_kinds + plotting._series_kinds: if not _ok_for_gaussian_kde(kind): continue s.plot(kind=kind) getattr(s.plot, kind)()
def test_label(self): s = Series([1, 2]) _, ax = self.plt.subplots() ax = s.plot(label='LABEL', legend=True, ax=ax) self._check_legend_labels(ax, labels=['LABEL']) self.plt.close() _, ax = self.plt.subplots() ax = s.plot(legend=True, ax=ax) self._check_legend_labels(ax, labels=['None']) self.plt.close() # get name from index s.name = 'NAME' _, ax = self.plt.subplots() ax = s.plot(legend=True, ax=ax) self._check_legend_labels(ax, labels=['NAME']) self.plt.close() # override the default _, ax = self.plt.subplots() ax = s.plot(legend=True, label='LABEL', ax=ax) self._check_legend_labels(ax, labels=['LABEL']) self.plt.close() # Add lebel info, but don't draw _, ax = self.plt.subplots() ax = s.plot(legend=False, label='LABEL', ax=ax) assert ax.get_legend() is None # Hasn't been drawn ax.legend() # draw it self._check_legend_labels(ax, labels=['LABEL'])
def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) for kind in plotting._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): s.plot(kind=kind)
def test_partially_invalid_plot_data(self): s = Series(["a", "b", 1.0, 2]) kinds = "line", "bar", "barh", "kde", "density" for kind in kinds: with tm.assertRaises(TypeError): s.plot(kind=kind)
def test_ts_plot_format_coord(self): def check_format_of_first_point(ax, expected_string): first_line = ax.get_lines()[0] first_x = first_line.get_xdata()[0].ordinal first_y = first_line.get_ydata()[0] try: assert expected_string == ax.format_coord(first_x, first_y) except (ValueError): pytest.skip("skipping test because issue forming " "test comparison GH7664") annual = Series(1, index=date_range('2014-01-01', periods=3, freq='A-DEC')) _, ax = self.plt.subplots() annual.plot(ax=ax) check_format_of_first_point(ax, 't = 2014 y = 1.000000') # note this is added to the annual plot already in existence, and # changes its freq field daily = Series(1, index=date_range('2014-01-01', periods=3, freq='D')) daily.plot(ax=ax) check_format_of_first_point(ax, 't = 2014-01-01 y = 1.000000') tm.close() # tsplot _, ax = self.plt.subplots() from pandas.tseries.plotting import tsplot tsplot(annual, self.plt.Axes.plot, ax=ax) check_format_of_first_point(ax, 't = 2014 y = 1.000000') tsplot(daily, self.plt.Axes.plot, ax=ax) check_format_of_first_point(ax, 't = 2014-01-01 y = 1.000000')
def test_fake_inferred_business(self): _, ax = self.plt.subplots() rng = date_range('2001-1-1', '2001-1-10') ts = Series(lrange(len(rng)), rng) ts = ts[:3].append(ts[5:]) ts.plot(ax=ax) assert not hasattr(ax, 'freq')
def test_secondary_y_ts(self): idx = date_range('1/1/2000', periods=10) ser = Series(np.random.randn(10), idx) ser2 = Series(np.random.randn(10), idx) fig, _ = self.plt.subplots() ax = ser.plot(secondary_y=True) assert hasattr(ax, 'left_ax') assert not hasattr(ax, 'right_ax') axes = fig.get_axes() l = ax.get_lines()[0] xp = Series(l.get_ydata(), l.get_xdata()).to_timestamp() assert_series_equal(ser, xp) assert ax.get_yaxis().get_ticks_position() == 'right' assert not axes[0].get_yaxis().get_visible() self.plt.close(fig) _, ax2 = self.plt.subplots() ser2.plot(ax=ax2) assert (ax2.get_yaxis().get_ticks_position() == self.default_tick_position) self.plt.close(ax2.get_figure()) ax = ser2.plot() ax2 = ser.plot(secondary_y=True) assert ax.get_yaxis().get_visible()
def test_errorbar_plot(self): s = Series(np.arange(10)) s_err = np.random.randn(10) # test line and bar plots kinds = ['line', 'bar'] for kind in kinds: _check_plot_works(s.plot, yerr=Series(s_err), kind=kind) _check_plot_works(s.plot, yerr=s_err, kind=kind) _check_plot_works(s.plot, yerr=s_err.tolist(), kind=kind) _check_plot_works(s.plot, xerr=s_err) # test time series plotting ix = date_range('1/1/2000', '1/1/2001', freq='M') ts = Series(np.arange(12), index=ix) ts_err = Series(np.random.randn(12), index=ix) _check_plot_works(ts.plot, yerr=ts_err) # check incorrect lengths and types with tm.assertRaises(ValueError): s.plot(yerr=np.arange(11)) s_err = ['zzz']*10 with tm.assertRaises(TypeError): s.plot(yerr=s_err)
def test_invalid_plot_data(self): s = Series(list('abcd')) _, ax = self.plt.subplots() for kind in plotting._core._common_kinds: msg = "no numeric data to plot" with pytest.raises(TypeError, match=msg): s.plot(kind=kind, ax=ax)
def plot_mortality_rate(curve, string = True): if string: curve = curve.split(',') if len(curve) < 2: return from pandas import Series series = Series(curve[2:]).astype('int') / int(curve[1]) * 1000 series.plot(label=curve[0])
def test_invalid_plot_data(self): s = Series(list('abcd')) _, ax = self.plt.subplots() for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with pytest.raises(TypeError): s.plot(kind=kind, ax=ax)
def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) _, ax = self.plt.subplots() for kind in plotting._core._common_kinds: msg = "no numeric data to plot" with pytest.raises(TypeError, match=msg): s.plot(kind=kind, ax=ax)
def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) _, ax = self.plt.subplots() for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with pytest.raises(TypeError): s.plot(kind=kind, ax=ax)
def plot_mortality_rate_avg(curve, string = True): if string: curve = curve.split(',') if len(curve) < 2: return from pandas import Series series = Series(curve[2:]) series.plot(label=curve[0])
def test_mixed_freq_hf_first(self): idxh = date_range('1/1/1999', periods=365, freq='D') idxl = date_range('1/1/1999', periods=12, freq='M') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) high.plot() ax = low.plot() for l in ax.get_lines(): self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, 'D')
def test_kind_both_ways(self): s = Series(range(3)) kinds = (plotting._core._common_kinds + plotting._core._series_kinds) _, ax = self.plt.subplots() for kind in kinds: s.plot(kind=kind, ax=ax) getattr(s.plot, kind)()
def test_pandas_plots_register(self): pytest.importorskip("matplotlib.pyplot") s = Series(range(12), index=date_range('2017', periods=12)) # Set to the "warn" state, in case this isn't the first test run converter._WARN = True with tm.assert_produces_warning(None) as w: s.plot() assert len(w) == 0
def test_to_weekly_resampling(self): idxh = date_range('1/1/1999', periods=52, freq='W') idxl = date_range('1/1/1999', periods=12, freq='M') high = Series(np.random.randn(len(idxh)), idxh) low = Series(np.random.randn(len(idxl)), idxl) high.plot() ax = low.plot() for l in ax.get_lines(): self.assertTrue(PeriodIndex(data=l.get_xdata()).freq.startswith('W'))
def test_finder_monthly_long(self): rng = period_range('1988Q1', periods=24 * 12, freq='M') ser = Series(np.random.randn(len(rng)), rng) _, ax = self.plt.subplots() ser.plot(ax=ax) xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] xp = Period('1989Q1', 'M').ordinal assert rs == xp
"""lag plot stationary""" fig1 = plt.figure() lag_plot(diff, s=1, c="k") plt.title("lag_plot stationarized waveform, Energy={}keV".format( energy1[i])) plt.savefig("{0:03}_diff_lag.png".format(i)) plt.close(fig1) """lag plot waveform""" fig2 = plt.figure() lag_plot(full, s=1, c="k") plt.title("lag_plot, Energy={}keV".format(energy1[i])) plt.savefig("{0:03}_full_lag.png".format(i)) plt.close(fig2) """Simple stationary waveform""" fig3 = plt.figure() diff.plot() plt.title("stationarized waveform, Energy={}keV".format(energy1[i])) plt.savefig("{0:03}_diff_waveform.png".format(i)) plt.close(fig3) """Simple waveform""" fig4 = plt.figure() full.plot() plt.title("Waveform, Energy={}keV".format(energy1[i])) plt.savefig("{0:03}_full_waveform".format(i)) plt.close(fig4) """Partial Autocorrelation stationarized""" fig5 = plt.figure() # autocorrelation_plot(diff) sm.graphics.tsa.plot_pacf(diff, lags=30) plt.title("Autocorrelation stationarized waveform, Energy={}keV".format( energy1[i]))
def test_plot_fails_with_dupe_color_and_style(self): x = Series(randn(2)) with pytest.raises(ValueError): _, ax = self.plt.subplots() x.plot(style="k--", color="k", ax=ax)
4.2, 2.3, 5.6, 4.5, 4.8, 3.9, 5.9, 2.4, 5.9, 6, 4, 3.7, 5, 5.2, 4.5, 3.6, 5, 6, 2.8, 3.3, 5.5, 4.2, 4.9, 5.1 ]) noten.sort_values(ascending=True) noten.median() # a) change three values that median remains the same noten2 = Series([ 4, 2, 5, 4.5, 4.8, 3.9, 5.9, 2.4, 5.9, 6, 4, 3.7, 5, 5.2, 4.5, 3.6, 5, 6, 2.8, 3.3, 5.5, 4.2, 4.9, 5.1 ]) noten2.median() # b) create histogram and boxplot plt.subplot(221) noten.plot(kind="hist", edgecolor="black") plt.subplot(222) noten.plot(kind="box") plt.subplot(223) noten2.plot(kind="hist", edgecolor="black") plt.subplot(224) noten2.plot(kind="box") # ------------------------- # Exercise 2.2 # ------------------------- schlamm = pd.read_csv("./data/klaerschlamm.dat", sep=" ", index_col=0) schlamm = schlamm.drop("Labor", 1)
train_X, train_y = cnn.split_sequence(time_series) ann.train(train_X, train_y) cnn.train(train_X, train_y) pre_cnn = cnn.predict(train_X) pre_ann = ann.predict(train_X) index = time_series.index[4:] pre_cnn = Series(pre_cnn, index=index) pre_ann = Series(pre_ann, index=index) plt.figure(figsize=(6, 6)) plt.subplot(211) pre_cnn.plot(color='green', label='Predicts', legend=True) time_series.plot(color='blue', label='Original', legend=True) residual_cnn = time_series - pre_cnn # with open('./residual_pickle/cnn_vehicle_df.pkl', 'wb') as f: # pk.dump(residual_cnn, f) plt.title('CNN') plt.grid(which='both') plt.subplot(212) pre_ann.plot(color='green', label='Predicts', legend=True) time_series.plot(color='blue', label='Original', legend=True) residual_ann = time_series - pre_ann # with open('./residual_pickle/fcnn_vehicle_df.pkl', 'wb') as f: # pk.dump(residual_ann, f) plt.title('FCNN') plt.grid(which='both')
print(myseries) print(myseries['대구']) print(type(myseries['대구'])) # type : numpy print(myseries[['대구']]) print(type(myseries[['대구']])) # type : Series print(myseries['대구':'목포']) # 문자열 색인으로 슬라이싱 하는 경우 양쪽 모두 포함됨 print(myseries[[2]]) # 인덱싱 할 때 대괄호 2개 print(myseries[0:5:2]) # 콜론으로 슬라이싱 하는 경우 대괄호 1개 print(myseries[[1, 3, 5]]) # 콤마를 사용하는 경우 대괄호 2개 myseries[2:5] = 33 print(myseries) myseries[['서울', '대구']] = 44 print(myseries) myseries[0::2] = 77 print(myseries) colors = ['r', 'g', 'b', 'y', 'royalblue', 'c', 'm'] myseries.plot(kind='bar', rot=0, color=colors) plt.xlabel('도시') plt.ylabel('점수') ratio = 100 * myseries / myseries.sum() for idx in range(myseries.size): value = str(myseries[idx]) ratioval = f'{ratio[idx]:.1f}%' plt.text(x=idx, y=myseries[idx] + 1, s=value, horizontalalignment='center') plt.text(x=idx, y=myseries[idx] / 2, s=ratioval, horizontalalignment='center') graphfile = 'SeriesR&W_Graph.png' plt.savefig(graphfile)
print('mse = {:12,.0f}'.format(mse)) ridge_coef = Series( glm_ridge.coef_, dfTrain[[ 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11' ]].columns).sort_values() print('type of ridge_coef:', type(ridge_coef)) # print(ridge_coef.size) print('beta coefficient of variables') # for idx,value in enumerate(ridge_coef): # print('{} : {:10.8f}'.format(idx,value)) for i in range(ridge_coef.size): print('{:4s} : {:10.8f}'.format(ridge_coef.index.values[i], ridge_coef.values[i])) plt.figure(1) ridge_coef.plot(kind='bar', grid=True) plt.savefig("ridge_alpha0.png") print('[alpha=100]') glm_ridge = lm.Ridge(alpha=100).fit( dfTrain[[ 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11' ]], dfTrain['Y1']) predLabelTrain = glm_ridge.predict(dfTrain[[ 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11' ]]) r2 = r2_score(dfTrain['Y1'], predLabelTrain) print('r-squared = ', r2) mse = mean_squared_error(dfTrain['Y1'], predLabelTrain) print('mse = {:12,.0f}'.format(mse)) ridge_coef = Series(
def test_plot_fails_with_dupe_color_and_style(self): x = Series(randn(2)) with tm.assertRaises(ValueError): x.plot(style='k--', color='k')
def test_invalid_kind(self): s = Series([1, 2]) with tm.assertRaises(ValueError): s.plot(kind='aasdf')
def test_xticklabels(self): # GH11529 s = Series(np.arange(10), index=['P%02d' % i for i in range(10)]) ax = s.plot(xticks=[0, 3, 5, 9]) exp = ['P%02d' % i for i in [0, 3, 5, 9]] self._check_text_labels(ax.get_xticklabels(), exp)
def test_secondary_bar(self): ser = Series(np.random.randn(10)) ax = ser.plot(secondary_y=True, kind='bar') fig = ax.get_figure() axes = fig.get_axes() self.assertEqual(axes[1].get_yaxis().get_ticks_position(), 'right')
def _penalized_linear_regression_train(table, feature_cols, label_col, regression_type='ridge', alpha=1.0, l1_ratio=0.5, fit_intercept=True, max_iter=1000, tol=0.0001, random_state=None): out_table = table.copy() features = out_table[feature_cols] label = out_table[label_col] if regression_type == 'ridge': regression_model = Ridge(alpha=alpha, fit_intercept=fit_intercept, max_iter=None, tol=tol, solver='auto', random_state=random_state) elif regression_type == 'lasso': regression_model = Lasso(alpha=alpha, fit_intercept=fit_intercept, max_iter=max_iter, tol=tol, random_state=random_state, selection='random') elif regression_type == 'elastic_net': regression_model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, fit_intercept=fit_intercept, max_iter=max_iter, tol=tol, random_state=random_state, selection='random') else: raise_runtime_error("Please check 'regression_type'.") regression_model.fit(features, label) out_table1 = pd.DataFrame([]) out_table1['x_variable_name'] = [variable for variable in feature_cols] out_table1['coefficient'] = regression_model.fit(features, label).coef_ intercept = pd.DataFrame( [['intercept', regression_model.fit(features, label).intercept_]], columns=['x_variable_name', 'coefficient']) if fit_intercept == True: out_table1 = out_table1.append(intercept, ignore_index=True) predict = regression_model.predict(features) residual = label - predict out_table['predict'] = predict out_table['residual'] = residual if regression_type == 'elastic_net': params = { 'Feature Columns': feature_cols, 'Label Column': label_col, 'Regression Type': regression_type, 'Regularization (Penalty Weight)': alpha, 'L1 Ratio': l1_ratio, 'Fit Intercept': fit_intercept, 'Maximum Number of Iterations': max_iter, 'Tolerance': tol } else: params = { 'Feature Columns': feature_cols, 'Label Column': label_col, 'Regression Type': regression_type, 'Regularization (Penalty Weight)': alpha, 'Fit Intercept': fit_intercept, 'Maxium Number of Iterations': max_iter, 'Tolerance': tol } score = { 'MSE': mean_squared_error(label, predict), 'R2': r2_score(label, predict) } plt.figure() plt.scatter(predict, label) plt.xlabel('Predicted values for ' + label_col) plt.ylabel('Actual values for ' + label_col) x = predict p1x = np.min(x) p2x = np.max(x) plt.plot([p1x, p2x], [p1x, p2x], 'r--') fig_actual_predict = plt2MD(plt) plt.clf() plt.figure() plt.scatter(predict, residual) plt.xlabel('Predicted values for ' + label_col) plt.ylabel('Residuals') plt.axhline(y=0, color='r', linestyle='--') fig_residual_1 = plt2MD(plt) plt.clf() plt.figure() sm.qqplot(residual, line='s') plt.ylabel('Residuals') fig_residual_2 = plt2MD(plt) plt.clf() plt.figure() sns.distplot(residual) plt.xlabel('Residuals') fig_residual_3 = plt2MD(plt) plt.clf() # checking the magnitude of coefficients plt.figure() predictors = features.columns coef = Series(regression_model.coef_, predictors).sort_values() coef.plot(kind='bar', title='Model Coefficients') plt.tight_layout() fig_model_coefficients = plt2MD(plt) plt.clf() rb = BrtcReprBuilder() rb.addMD( strip_margin(""" | # Penalized Linear Regression Result | ### Selected Parameters: | {params} | | ## Results | ### Model Parameters | {out_table1} | | ### Prediction and Residual | {out_table2} | | ### Regression Score | {score} | """.format(params=dict2MD(params), out_table1=pandasDF2MD(out_table1), out_table2=pandasDF2MD(out_table, num_rows=len(out_table) + 1), score=dict2MD(score)))) rb.addMD( strip_margin(""" | | ### Predicted vs Actual | {image1} | | ### Fit Diagnostics | {image2} | {image3} | {image4} | | ### Magnitude of Coefficients | {image5} | """.format(image1=fig_actual_predict, image2=fig_residual_1, image3=fig_residual_2, image4=fig_residual_3, image5=fig_model_coefficients))) model = _model_dict('penalized_linear_regression_model') model['feature_cols'] = feature_cols model['label_col'] = label_col model['regression_type'] = regression_type model['regression_model'] = regression_model model['parameters'] = params model['model_parameters'] = out_table1 model['prediction_residual'] = out_table model['_repr_brtc_'] = rb.get() return {'model': model}
import inline import matplotlib.pyplot as plt from pandas import Series s3 = Series( [1.2, 2.5, -2.2, 3.1, -0.8, -3.2, 1.4], index=['Jan 1', 'Jan 2', 'Jan 3', 'Jan 4', 'Jan 5', 'Jan 6', 'Jan 7']) s3.plot(kind='bar', title='Bar plot') plt.show()
y_pre = (theta_19_pre + theta_01_pre)/2 index = time_series.index[4:] y_pre = Series(y_pre, index) #test_y = time_series[-2:] # print(sMAPE(y_pre, test_y)) # print(MSE(y_pre, test_y)) theta_19_pre = Series(theta_19_pre, index=index) theta_01_pre = Series(theta_01_pre, index=index) y_pre = Series(y_pre, index=index) plt.figure(figsize=(6, 6)) theta_19_pre.plot(color='g', label='Theta=1.9 predicts', legend=True) theta_19.plot(color='r', label='Theta=1.9', legend=True) theta_01_pre.plot(color='c', label='Theta=0.1 predicts', legend=True) theta_01.plot(color='m', label='Theta=0.1', legend=True) y_pre.plot(color='black', label='Predicts', legend=True) time_series.plot(color='blue', label='Original', legend=True) residual = time_series - y_pre # with open('./residual_pickle/theta_vehicle_df.pkl', 'wb') as f: # pk.dump(residual, f) plt.title('Theta') plt.grid(which='both') plt.savefig('./pre_plot/Theta.jpg') plt.show() plt.figure(figsize=(6, 6)) plt.subplot(211) residual.plot(label='residual for Theta', legend=True)
class GwSym: """Generate synthetic groundwater series with noise for simulations""" def __repr__(self): return (f'GwSyn object') def __init__(self): self.detpar = {} self.noisepar = {} self.head = None self.noise = None self.name = None def name(self, name=None): """Return name of GwSym object. If name is given, object is renamed and new name returned""" if name is not None: self.name = name return self.name def generate_head(self, rain=None, Atrue=800, ntrue=1.1, atrue=5, dtrue=20): """ Generate the heads from rain with deterministic model parameters """ self.rain = rain self.detpar['Atrue'] = Atrue self.detpar['ntrue'] = ntrue self.detpar['atrue'] = atrue self.detpar['dtrue'] = dtrue # from Pastas notebook 15 step = ps.Gamma().block([Atrue, ntrue, atrue]) h = dtrue * np.ones(len(rain) + step.size) for i in range(len(rain)): h[i:i + step.size] += rain[i] * step head = pd.Series(index=rain.index, data=h[:len(rain)], name='head') ##head = head['1990':'2015'] # ignore first ten years year = str(head.first_valid_index().year + 10) head = head[f'{year}-01-01':].copy() self.head = head return head def generate_noise(self, alpha=0.7, beta=0.7, noise_perc=0.2, head=None): """Generate series of random distributed noise """ self.noisepar['alpha'] = alpha self.noisepar['beta'] = beta if head is None: head = self.head # generate samples using Numpy random_seed = np.random.RandomState(1234) n = len(head) innovation = random_seed.normal(0, 1, n) * np.std(head.values) * \ noise_perc noise = np.zeros(n) for i in range(1, n): # beta = theta, alpha = phi noise[i] = innovation[i] + innovation[i - 1] * beta + \ noise[i - 1] * alpha # head_noise = head[0] + noise self.noise = Series(noise, head.index) return self.noise def plot_series(self, figdir=None): """Plot head and noise""" fig, [ax1, ax2, ax3] = plt.subplots(nrows=3, ncols=1, figsize=(15, 15)) sr = self.head + self.noise sr.plot(ax=ax1, title='head+noise') self.head.plot(ax=ax2, title='head') self.noise.plot(ax=ax3, title='noise') title = self.model_name() fig.suptitle(title, fontsize=14) if figdir is not None: figname = self.model_name() figpath = f'{figdir}{figname}.jpg' fig.savefig(figpath) plt.close() return def plot_noise_check(self, sr=None, figtitle=None, figdir=None): """Plot noise and noise histogram """ if sr is None: sr = self.noise if figtitle is None: alpha = self.noisepar['alpha'] beta = self.noisepar['beta'] figtitle = 'alfa=' + str(alpha) + ' beta=' + str(beta) fig, [ax1, ax2] = plt.subplots(nrows=1, ncols=2, figsize=(15, 5)) sr.plot(ax=ax1, title=figtitle) sr.plot.hist(grid=False, bins=20, rwidth=0.9, color='#607c8e', ax=ax2, density=True, title=figtitle) # find minimum and maximum of xticks, so we know # where we should compute theoretical distribution xt = ax2.get_xticks() xmin, xmax = min(xt), max(xt) lnspc = np.linspace(xmin, xmax, len(sr)) # plot normal distribution m, s = norm.fit(sr) # get mean and standard deviation pdf_g = norm.pdf(lnspc, m, s) # now get theoretical values in our interval ax2.plot(lnspc, pdf_g) if figdir is not None: figpath = f'{figdir}{figtitle}.jpg' fig.savefig(figpath) plt.close() return def pastas_model(self, figdir=None): """Create and sove Pastas model using generated head and noise""" # create Pasytas model head_noise = self.head + self.noise self.ml = ps.Model(head_noise) self.sm = ps.StressModel(self.rain, ps.Gamma, name='recharge', settings='prec') self.ml.add_stressmodel(self.sm) self.ml.add_noisemodel(ps.ArmaModel()) # solve Pastas model self.ml.solve(noise=True, report=False) if figdir is not None: # plot figure with model diagnostics axes = self.ml.plots.results(figsize=(10, 5)) fig = axes[0].get_figure() # add real step function to plot Atrue = self.detpar['Atrue'] ntrue = self.detpar['ntrue'] atrue = self.detpar['atrue'] axes[-1].plot(ps.Gamma().step([Atrue, ntrue, atrue])) # figname = f'Atrue={Atrue} ntrue={ntrue} atrue={atrue}' figname = self.model_name() fig.suptitle(figname, fontsize=12) figpath = f'{figdir}Model {figname}.jpg' fig.savefig(figpath) plt.close() return self.ml def model_name(self): """Return model name""" Atrue = self.detpar['Atrue'] ntrue = self.detpar['ntrue'] atrue = self.detpar['atrue'] alpha = self.noisepar['alpha'] beta = self.noisepar['beta'] name = f'Atrue={Atrue} ntrue={ntrue} atrue={atrue} alpha={alpha} beta={beta}' return name def parameters(self): """Return table with true and estimated parameters""" par = collections.OrderedDict() par['A_true'] = self.detpar['Atrue'] par['n_true'] = self.detpar['ntrue'] par['a_true'] = self.detpar['atrue'] par['alpha'] = self.noisepar['alpha'] par['beta'] = self.noisepar['beta'] sr = self.ml.parameters['optimal'] par['A_est'] = sr['recharge_A'] par['n_est'] = sr['recharge_n'] par['a_est'] = sr['recharge_a'] par['alpha_est'] = np.exp(-1. / sr["noise_alpha"]) pm = sr["noise_beta"] / np.abs(sr["noise_beta"]) par['beta_est'] = pm * np.exp(-1. / np.abs(sr["noise_beta"])) self.par = DataFrame([par]) self.par.index.name = 'casename' return self.par def test_statistics(self): """Return test statistics for innovations""" self.test_stats = ps.stats.diagnostics(self.ml.noise(), nparam=2) self.test_stats.index.name = 'casename' return self.test_stats
# Read data ao = np.loadtxt('monthly.ao.index.b50.current.ascii') print(ao[0:2]) print(ao.shape) # Convert to time series data dates = pd.date_range('1950-01', periods=ao.shape[0], freq='M') print(dates) print(dates.shape) # Create first time series data AO = Series(ao[:, 2], index=dates) print(AO) # Plot AO data AO.plot(title='Daily Atlantic Oscillation') plt.show() AO['1980':'1990'].plot() plt.show() AO['1980-05':'1981-03'].plot() plt.show() # Print some data print(AO[120]) print(AO['1960-01']) print(AO['1960']) print(AO[AO > 0]) # Craete another time series data nao = np.loadtxt('norm.nao.monthly.b5001.current.ascii') dates_nao = pd.date_range('1950-01', periods=nao.shape[0], freq='M')
""" import pandas as pd import numpy as np from pandas import Series, DataFrame, Panel import matplotlib.pyplot as plt ao = np.loadtxt( 'monthly.ao.index.b50.current.ascii') # load ascii file as ao numpy array dates = pd.date_range( '1950-01', periods=ao.shape[0], freq='M') #define date starting 1950-01 and monthly frequency AO = Series(ao[:, 2], index=dates) # AO.plot().get_figure().savefig('AtlanticOscillation.png') plt.show() #plt.figure() #AO['2001':'2011'].plot() #plt.show() #plt.close() nao = np.loadtxt('norm.nao.monthly.b5001.current.ascii') dates_nao = pd.date_range('1950-01', periods=nao.shape[0], freq='M') NAO = Series(nao[:, 2], index=dates_nao) aonao = DataFrame({'AO': AO, 'NAO': NAO}) #aonao.plot(subplots=True).get_figure().savefig(') #aonao.plot() #plt.show() #plt.close()
def plotSeries(series: pd.Series, **kwargs): series.plot(**kwargs) plt.show()
plt.title("Welcome to the ML World!") #可以加上标题 plt.show() print("==============散点图scatter=================") x = np.random.normal(0, 1, 5000) y = np.random.normal(0, 1, 5000) plt.scatter(x, y, alpha=0.5, marker="x") #可以指定点的透明度,可以指定点的形状 plt.show() #绘制一个标准的二维正态分布散点图 print("==============直方图=================") s = Series(np.random.randn(1000)) #符合标准正态分布 plt.hist(s, rwidth=0.9, bins=20) plt.show() print("==============密度图=================") s.plot(kind='kde') plt.show() print("==============子图subplots(通常用这种)=================") x = np.linspace(0, 10, 100) figure, ax = plt.subplots(2, 2) ax[0][0].plot(x, siny, color="red") ax[0][1].plot(x, cosy, color="blue") ax[1][0].plot(x, siny, color="green") ax[1][1].plot(x, cosy, color="black") plt.show() print("==============子图subplot=================") plt.subplot(2, 1, 1) #定义一个两行一列的subplot,切换到第一张子图进行绘制 plt.plot(x, siny, color="red")
def EV_Plot(dic_length_RNA, total_RNA, prefix, dic_YRNA_type): ### length distribution of different RNA types #df = DataFrame(dic_length_RNA).T df = dic_length_RNA.fillna(value=0) df = df.sort_index(axis=0, ascending=True) df_RNA = df.loc[:, [ 'miRNA', 'YRNA', 'tsRNA', 'rsRNA', 'snoRNA', 'lncRNA', 'mRNA' ]] df_RNA_other = pd.DataFrame(df.sum(axis=1) - df_RNA.sum(axis=1), columns=["others"]) df_RNA = df_RNA.join(df_RNA_other) plot_RNA = prefix + ".length_RNA_counts.pdf" df_RNA.plot(kind='bar', stacked=True, fontsize=15, color=colors, width=1, linewidth=0.01) plt.xticks(range(0, 35, 5), ("17", "22", "27", "32", "37", "42", "47"), fontsize=15, rotation=0) plt.xlabel("Length", fontsize=15) plt.ylabel("Counts", fontsize=15) plt.savefig(plot_RNA, bbox_inches='tight') plt.close() df_RNA_csv = prefix + ".length_RNA_counts.txt" df_RNA.to_csv(df_RNA_csv, header=False, sep='\t') ### length plot percent df_RNA_percent = df_RNA / total_RNA * 100 plot_RNA_percent = prefix + ".length_RNA_percent.pdf" df_RNA_percent.plot(kind='bar', stacked=True, figsize=(5.5, 4), fontsize=15, color=colors, width=1, linewidth=0.01) plt.xticks(range(0, 35, 5), ("17", "22", "27", "32", "37", "42", "47"), fontsize=15, rotation=0) plt.xlabel("Length", fontsize=15) plt.ylabel("Percent", fontsize=15) plt.savefig(plot_RNA_percent, bbox_inches='tight') plt.close() ### pie plot pie_RNA = prefix + ".pie_RNA.pdf" df_RNA_sum = df_RNA.sum(axis=0) df_RNA_sum.name = '' df_RNA_sum.plot(kind='pie', figsize=(6, 6), colors=colors, autopct='%.1f', fontsize=15) pie_csv = prefix + ".pie_RNA.txt" df_RNA_sum = df_RNA_sum.fillna(value=0) df_RNA_sum.to_csv(pie_csv, header=False, sep='\t') plt.savefig(pie_RNA, bbox_inches='tight') plt.close() ### pie plot of YRNA type df_YRNA = Series(dic_YRNA_type) df_YRNA = df_YRNA.fillna(value=0) df_YRNA = df_YRNA.reindex(['Y5', 'Y4', 'Y3', 'Y1'], fill_value=0) pie_YRNA = prefix + ".pie_YRNA.pdf" df_YRNA.name = '' df_YRNA.plot(kind='pie', figsize=(4, 4), colors=colors, autopct='%.1f', fontsize=15) plt.savefig(pie_YRNA, bbox_inches='tight') plt.close() pie_YRNA_csv = prefix + ".pie_YRNA.txt" df_YRNA.to_csv(pie_YRNA_csv, header=False, sep='\t')
for ax in axes: ax.plot(x, y, 'r') ax.set_xlabel('x') ax.set_ylabel('y') ax.set_title('title') fig.tight_layout() show() ########################################################## #Метод .plot() Для Series и DataFrame обьектов , это всего лишь #обёртка для plt.plot: ts = Series(randn(1000), index=date_range('1/1/2000', periods=1000)) ts = ts.cumsum() ts.plot() df = DataFrame(randn(1000, 4), index=ts.index, columns=list('ABCD')) df = df.cumsum() plt.figure() df.plot() plt.legend(loc='best') show() ########################################################## #Для того, чтобы перейти на логарифмическую шкалу надо задать параметр Logy. #df.plot(logy = True) plt.figure()
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'], rotation=30, fontsize='small') plt.yticks([-10, 0, 10], ['h', 'j', 'k'], rotation=0, color='r') plt.ylabel('xx', color='g', fontsize=14, rotation=20) plt.title('oh,you are so beautiful') ax.annotate('yes', xy=(667.88, -23.3617), xytext=(+10, +30), textcoords='offset points', fontsize=16, arrowprops=dict(arrowstyle='->')) ##annotate fig = plt.figure() axes = plt.subplots(2, 1) data = Series(np.random.rand(16), index=list('abcdefghijklmnop')) #data.plot(kind='bar',ax=axes[0],color='k',alpha=0.7) ax1 = plt.subplot(211) ax2 = plt.subplot(212) plt.sca(ax1) ##figure on the first data.plot(kind='bar', color='k', alpha=0.7) plt.sca(ax2) data.plot(kind='barh', color='g', alpha=0.7) tips = pd.read_csv('names/yob1880.txt', names=['name', 'sex', 'birth']) df = pd.DataFrame(np.random.randn(6, 4), index=['one', 'two', 'three', 'four', 'five', 'six'], columns=pd.Index(['A', 'B', 'C', 'D'], names='haah'))
ao.shape #create as many elements as time stamps we have in data using month as the stamp and Jan 1950 as the start (this info is from the dataframe) dates = pd.date_range('1950-01', periods=ao.shape[0], freq='M') #shape of array dates.shape #create dataframe that syncs the data index to values AO = Series(ao[:,2], index=dates) AO #plot the entire time series AO.plot() #save AO as PDF plt.savefig('AO_plot.pdf') #pther plots or just parts of the time series AO['1980':'1990'].plot() AO['1980-05':'1981-03'].plot() #accessing values by numbers or by index AO[120] AO['1960-01'] AO['1960'] #import additional data
plt.ylim([0,1]) plt.xlabel('Date') plt.ylabel('distribution') plt.title('distribution vs. Time') ax.legend(loc='center left', bbox_to_anchor=(1, 0.5)) # plt.savefig('allocation.png') # Plot stock prices and shifted returns fig, axes = plt.subplots(nrows=2,ncols=1) stock_price.plot(ax=axes[0]) shift_returns.plot(ax=axes[1]) axes[0].set_title('Stock Prices') axes[0].set_xlabel('Date') axes[0].set_ylabel('Price') axes[0].legend(loc='center left', bbox_to_anchor=(1, 0.5)) axes[1].set_title(str(shift)+ ' Day Shift returns') axes[1].set_xlabel('Date') axes[1].set_ylabel('returns ' + str(shift) + ' Days Apart') axes[1].legend(loc='center left', bbox_to_anchor=(1, 0.5)) # plt.savefig('stocks.png', pad_inches=1) fig.tight_layout() # Plot portfolio returns vs. time plt.figure() returns.plot() plt.xlabel('Date') plt.ylabel('Portolio returns') plt.title('Portfolio returns vs. Time') # plt.savefig('returns.png') plt.show()
def test_invalid_kind(self): s = Series([1, 2]) with pytest.raises(ValueError): s.plot(kind="aasdf")
myindex = ['강감찬', '홍길동', '이순신', '최영'] members = Series(data=[20, 60, 80, 40], index=myindex) print(members) # 그래프의 종류별 예제 # kind는 line, bar, barh, pie, kde(커널 밀도 추정) # rot : 눈금 rotation # ylim : y축 상하한 값 # color : 색상 지정 # legend : 범례, label : 범례에 들어갈 문자열 # stacked : 누적 그래프 members.plot(kind='bar', use_index=True, color=['r', 'g', 'b', 'y'], rot=0, ylim=[0, members.max() + 20]) # members.plot(kind='bar', use_index=False, color=['r','g','b','y'], rot=0, ylim=[0,members.max()+20]) plt.title('학생별 국어 시험') plt.xlabel('학생 이름') plt.ylabel('점수') # plt.grid(True) ratio = 100 * members / members.sum() print(ratio) for idx in range(members.size): value = str(members[idx]) + '건' # 60건 ratioval = '%.1f%%' % (ratio[idx]) # 20.0%
r2 = r2_score(dfTrain['Y1'], predLabelTrain) print('r-squared = ', r2) mse = mean_squared_error(dfTrain['Y1'], predLabelTrain) print('mse = {:12,.0f}'.format(mse)) lasso_coef = Series( glm_lasso.coef_, dfTrain[[ 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11' ]].columns).sort_values() print('type of lasso_coef:', type(lasso_coef)) # print(lasso_coef.size) print('beta coefficient of variables') for i in range(lasso_coef.size): print('{:4s} : {:10.8f}'.format(lasso_coef.index.values[i], lasso_coef.values[i])) plt.figure(1) lasso_coef.plot(kind='bar', grid=True) plt.savefig("lasso_alpha0.png") print('[alpha=100]') glm_lasso = lm.Lasso(alpha=10).fit( dfTrain[[ 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11' ]], dfTrain['Y1']) predLabelTrain = glm_lasso.predict(dfTrain[[ 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11' ]]) r2 = r2_score(dfTrain['Y1'], predLabelTrain) print('r-squared = ', r2) mse = mean_squared_error(dfTrain['Y1'], predLabelTrain) print('mse = {:12,.0f}'.format(mse)) lasso_coef = Series( glm_lasso.coef_, dfTrain[[
def test_style_single_ok(self): s = Series([1, 2]) ax = s.plot(style="s", color="C3") assert ax.lines[0].get_color() == "C3"
def test_df_series_secondary_legend(self): # GH 9779 df = DataFrame(np.random.randn(30, 3), columns=list("abc")) s = Series(np.random.randn(30), name="x") # primary -> secondary (without passing ax) _, ax = self.plt.subplots() ax = df.plot(ax=ax) s.plot(legend=True, secondary_y=True, ax=ax) # both legends are dran on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"]) assert ax.get_yaxis().get_visible() assert ax.right_ax.get_yaxis().get_visible() tm.close() # primary -> secondary (with passing ax) _, ax = self.plt.subplots() ax = df.plot(ax=ax) s.plot(ax=ax, legend=True, secondary_y=True) # both legends are dran on left ax # left and right axis must be visible self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"]) assert ax.get_yaxis().get_visible() assert ax.right_ax.get_yaxis().get_visible() tm.close() # secondary -> secondary (without passing ax) _, ax = self.plt.subplots() ax = df.plot(secondary_y=True, ax=ax) s.plot(legend=True, secondary_y=True, ax=ax) # both legends are dran on left ax # left axis must be invisible and right axis must be visible expected = ["a (right)", "b (right)", "c (right)", "x (right)"] self._check_legend_labels(ax.left_ax, labels=expected) assert not ax.left_ax.get_yaxis().get_visible() assert ax.get_yaxis().get_visible() tm.close() # secondary -> secondary (with passing ax) _, ax = self.plt.subplots() ax = df.plot(secondary_y=True, ax=ax) s.plot(ax=ax, legend=True, secondary_y=True) # both legends are dran on left ax # left axis must be invisible and right axis must be visible expected = ["a (right)", "b (right)", "c (right)", "x (right)"] self._check_legend_labels(ax.left_ax, expected) assert not ax.left_ax.get_yaxis().get_visible() assert ax.get_yaxis().get_visible() tm.close() # secondary -> secondary (with passing ax) _, ax = self.plt.subplots() ax = df.plot(secondary_y=True, mark_right=False, ax=ax) s.plot(ax=ax, legend=True, secondary_y=True) # both legends are dran on left ax # left axis must be invisible and right axis must be visible expected = ["a", "b", "c", "x (right)"] self._check_legend_labels(ax.left_ax, expected) assert not ax.left_ax.get_yaxis().get_visible() assert ax.get_yaxis().get_visible() tm.close()
#Splitting into Training and CV for Cross Validation X = train.loc[:,['Outlet_Establishment_Year', 'Item_MRP']] x_train, x_cv, y_train, y_cv = train_test_split(X, train.Item_Outlet_Sales) #Lasso Regression lassoReg = Lasso(alpha=0.5, normalize=True) lassoReg.fit(x_train,y_train) pred = lassoReg.predict(x_cv) #Calculating the mean squared error mse = np.mean((pred - y_cv)**2) print('Mean Squared Error:',mse) print('Score:',lassoReg.score(x_cv,y_cv)) #Calculation of coefficients coeff = DataFrame(x_train.columns) coeff['Coefficient Estimate'] = Series(lassoReg.coef_) print(coeff) #Plotting Analysis through a Residual Plot x_plot = plt.scatter(pred, (pred - y_cv), c='b') plt.hlines(y=0, xmin=-1000, xmax=5000) plt.title('Residual Plot') plt.show() #Magnitude of Coefficents predictors = x_train.columns coef = Series(lassoReg.coef_,predictors).sort_values() coef.plot(kind='bar', title='Modal Coefficients') plt.show()
ao = np.loadtxt('monthly.ao.index.b50.current.ascii') #loads data ao[0:2] ao.shape #displays number of rows and columns #Time Series dates = pd.date_range('1950-01', periods=ao.shape[0], freq='M') #creates range dates dates.shape #First Time Series AO = Series(ao[:,2], index = dates) AO AO.plot() #graph AO['1980':'1990'].plot() AO['1980-05':'1981-03'].plot() AO[120] #individual value AO['1960-01'] #by index AO['1960'] #by specified year AO[AO > 0] #subset of values #Data Frame #Download dataset (same procedure as begining of tutorial) !wget http://www.cpc.ncep.noaa.gov/products/precip/CWlink/pna/norm.nao.monthly.b5001.current.ascii nao = np.loadtxt('norm.nao.monthly.b5001.current.ascii') dates_nao = pd.date_range('1950-01', periods=nao.shape[0], freq='M')