def plot_call_summary(self):
		""" """
		# Gets the most expensive 1000 calls
		top_n_expensive_calls = self.df.sort_index(by='elapsed', ascending=False)[:1000]
		calls = [ call  for call, group in top_n_expensive_calls.groupby('call') ]

		data = []
		for call in calls:
			call_info = self.df[self.df['call']==call]['elapsed']
			call_data = {}
			call_data['call'] = call
			call_data['mean'] = call_info.mean()
			call_data['count'] = call_info.count()
			call_data['max'] = call_info.max()
			call_data['min'] = call_info.min()
			data.append(call_data)

		call_data_df = DataFrame(data)

		fig = plt.figure()
		fig.suptitle('{0}   -   {1}'.format(self.min_timestamp, self.max_timestamp), fontsize=16)

		ax = fig.add_subplot(3, 1, 1)
		#ax.get_xaxis().set_visible(False)
		self.df[self.df['elapsed']>=5].sort_index(by='timestamp').plot(title='Response time > 5 seconds', ax=ax, x='timestamp', y='elapsed')

		ax = fig.add_subplot(3, 1, 2)
		call_data_df[['call', 'min', 'max', 'mean']].set_index('call').plot(title='Response Time', ax=ax, kind='barh')

		ax = fig.add_subplot(3, 1, 3)
		call_data_df.plot(title='Call Count', ax=ax, x='call', y='count', kind='barh')

		fig.show()

		self.plot_calls_distribution(calls)
    def test_time(self):
        t = datetime(1, 1, 1, 3, 30, 0)
        deltas = np.random.randint(1, 20, 3).cumsum()
        ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas])
        df = DataFrame({'a': np.random.randn(len(ts)),
                        'b': np.random.randn(len(ts))},
                       index=ts)
        _, ax = self.plt.subplots()
        df.plot(ax=ax)

        # verify tick labels
        ticks = ax.get_xticks()
        labels = ax.get_xticklabels()
        for t, l in zip(ticks, labels):
            m, s = divmod(int(t), 60)
            h, m = divmod(m, 60)
            xp = l.get_text()
            if len(xp) > 0:
                rs = time(h, m, s).strftime('%H:%M:%S')
                assert xp == rs

        # change xlim
        ax.set_xlim('1:30', '5:00')

        # check tick labels again
        ticks = ax.get_xticks()
        labels = ax.get_xticklabels()
        for t, l in zip(ticks, labels):
            m, s = divmod(int(t), 60)
            h, m = divmod(m, 60)
            xp = l.get_text()
            if len(xp) > 0:
                rs = time(h, m, s).strftime('%H:%M:%S')
                assert xp == rs
 def test_fontsize_set_correctly(self):
     # For issue #8765
     df = DataFrame(np.random.randn(10, 9), index=range(10))
     fig, ax = self.plt.subplots()
     df.plot(fontsize=2, ax=ax)
     for label in (ax.get_xticklabels() + ax.get_yticklabels()):
         assert label.get_fontsize() == 2
Beispiel #4
0
    def test_bar_colors(self):
        import matplotlib.pyplot as plt
        import matplotlib.colors as colors

        default_colors = plt.rcParams.get('axes.color_cycle')
        custom_colors = 'rgcby'

        plt.close('all')
        df = DataFrame(np.random.randn(5, 5))
        ax = df.plot(kind='bar')

        rects = ax.patches

        conv = colors.colorConverter
        for i, rect in enumerate(rects[::5]):
            xp = conv.to_rgba(default_colors[i % len(default_colors)])
            rs = rect.get_facecolor()
            self.assert_(xp == rs)

        plt.close('all')

        ax = df.plot(kind='bar', color=custom_colors)

        rects = ax.patches

        conv = colors.colorConverter
        for i, rect in enumerate(rects[::5]):
            xp = conv.to_rgba(custom_colors[i])
            rs = rect.get_facecolor()
            self.assert_(xp == rs)

        plt.close('all')
        df.ix[:, [0]].plot(kind='bar', color='DodgerBlue')
Beispiel #5
0
    def test_line_colors(self):
        import matplotlib.pyplot as plt
        import sys
        from StringIO import StringIO

        custom_colors = 'rgcby'

        plt.close('all')
        df = DataFrame(np.random.randn(5, 5))

        ax = df.plot(color=custom_colors)

        lines = ax.get_lines()
        for i, l in enumerate(lines):
            xp = custom_colors[i]
            rs = l.get_color()
            self.assert_(xp == rs)

        tmp = sys.stderr
        sys.stderr = StringIO()
        try:
            plt.close('all')
            ax2 = df.plot(colors=custom_colors)
            lines2 = ax2.get_lines()
            for l1, l2 in zip(lines, lines2):
                self.assert_(l1.get_color(), l2.get_color())
        finally:
            sys.stderr = tmp

        # make color a list if plotting one column frame
        # handles cases like df.plot(color='DodgerBlue')
        plt.close('all')
        df.ix[:, [0]].plot(color='DodgerBlue')
Beispiel #6
0
    def test_bar_colors(self):
        import matplotlib.pyplot as plt
        import matplotlib.colors as colors

        default_colors = 'brgyk'
        custom_colors = 'rgcby'

        plt.close('all')
        df = DataFrame(np.random.randn(5, 5))
        ax = df.plot(kind='bar')

        rects = ax.patches

        conv = colors.colorConverter
        for i, rect in enumerate(rects[:5]):
            xp = conv.to_rgba(default_colors[i])
            rs = rect.get_facecolor()
            self.assert_(xp, rs)

        plt.close('all')

        ax = df.plot(kind='bar', color=custom_colors)

        rects = ax.patches

        conv = colors.colorConverter
        for i, rect in enumerate(rects[:5]):
            xp = conv.to_rgba(custom_colors[i])
            rs = rect.get_facecolor()
            self.assert_(xp, rs)
Beispiel #7
0
    def test_bar_colors(self):
        import matplotlib.pyplot as plt
        import matplotlib.colors as colors

        default_colors = "brgyk"
        custom_colors = "rgcby"

        plt.close("all")
        df = DataFrame(np.random.randn(5, 5))
        ax = df.plot(kind="bar")

        rects = ax.patches

        conv = colors.colorConverter
        for i, rect in enumerate(rects[::5]):
            xp = conv.to_rgba(default_colors[i])
            rs = rect.get_facecolor()
            self.assert_(xp == rs)

        plt.close("all")

        ax = df.plot(kind="bar", color=custom_colors)

        rects = ax.patches

        conv = colors.colorConverter
        for i, rect in enumerate(rects[::5]):
            xp = conv.to_rgba(custom_colors[i])
            rs = rect.get_facecolor()
            self.assert_(xp == rs)

        plt.close("all")
        df.ix[:, [0]].plot(kind="bar", color="DodgerBlue")
def fare_analyze(is_plot=True):
    fare_test = titanic_df['Fare']
    print '+' * 40
    print fare_test.head()

    fare_not_survived = titanic_df["Fare"][titanic_df["Survived"] == 0]
    fare_survived = titanic_df["Fare"][titanic_df["Survived"] == 1]

    fare_not_survived = titanic_df['Fare'][titanic_df['Survived'] == 0]
    fare_survived = titanic_df['Fare'][titanic_df['Survived'] == 1]

    # 计算存活和死亡的均值和方差
    avgerage_fare = DataFrame([fare_not_survived.mean(), fare_survived.mean()])
    std_fare = DataFrame([fare_not_survived.std(), fare_survived.std()])

    avgerage_fare = DataFrame([fare_not_survived.mean(), fare_survived.mean()])
    std_fare = DataFrame([fare_not_survived.std(), fare_survived.std()])

    print '-'
    print avgerage_fare
    print std_fare
    # plot
    titanic_df['Fare'].plot(kind='hist', figsize=(15,3), bins=100, xlim=(0,50))

    avgerage_fare.index.names = std_fare.index.names = ["Survived"]

    if is_plot:
        avgerage_fare.plot(yerr=std_fare,kind='bar',legend=False)
Beispiel #9
0
 def test_partially_invalid_plot_data(self):
     kinds = "line", "bar", "barh", "kde", "density"
     df = DataFrame(randn(10, 2), dtype=object)
     df[np.random.rand(df.shape[0]) > 0.5] = "a"
     for kind in kinds:
         with tm.assertRaises(TypeError):
             df.plot(kind=kind)
Beispiel #10
0
def plot_class_distribution(target, ax=None):
    """ Plot the distribution of the classes.

        Parameters
        ----------
        target : array
            The target column of the dataset.

        ax : Matplotlib Axes object
            A matplotlib Axes instance.

        Returns
        -------
        ax : Matplotlib Axes object
            The matplotlib Axes instance where the figure is drawn.
    """

    if not ax:
        ax = plt.gca()

    counts = DataFrame(target).apply(pd.value_counts)
    counts.plot(ax=ax, kind="bar", fontsize=12, legend=False)
    ax.set_xticklabels(labels=counts.index, rotation=0)

    format_thousands = lambda x, pos: format(int(x), ',')
    ax.get_yaxis().set_major_formatter(FuncFormatter(format_thousands))
    ax.grid(False)

    return ax
Beispiel #11
0
def plot_scores(scores, title, x_label, classifier_names):
    """ Make a barplot of the scores of some performance measure.

        Parameters
        ----------
        scores : dict
            Where the keys are the classifier names and the values are the scores.

        title : str
            Title of the plot.

        x_label : str
            Label for the x-axis

        classifier_names : array
            List of the names of the classifiers, the order of which will be used
            to order the bars.
    """

    scores = DataFrame(scores, index=[x_label])
    scores = scores.reindex(columns=classifier_names)

    format_as_percent_plot = lambda x, pos: "{:.0f}%".format(x * 100)
    fig, ax = plt.subplots(figsize=(9, 5))
    scores.plot(ax=ax, kind="bar", title=title, fontsize=12)
    ax.legend(bbox_to_anchor = (1.5, 0.6))
    ax.set_xticklabels([], rotation=0)
    ax.get_yaxis().set_major_formatter(FuncFormatter(format_as_percent_plot))

    plt.show()
Beispiel #12
0
    def test_line_colors(self):
        import matplotlib.pyplot as plt
        import sys
        from StringIO import StringIO

        custom_colors = 'rgcby'

        plt.close('all')
        df = DataFrame(np.random.randn(5, 5))

        ax = df.plot(color=custom_colors)

        lines = ax.get_lines()
        for i, l in enumerate(lines):
            xp = custom_colors[i]
            rs = l.get_color()
            self.assert_(xp == rs)

        tmp = sys.stderr
        sys.stderr = StringIO()
        try:
            plt.close('all')
            ax2 = df.plot(colors=custom_colors)
            lines2 = ax2.get_lines()
            for l1, l2 in zip(lines, lines2):
                self.assert_(l1.get_color(), l2.get_color())
        finally:
            sys.stderr = tmp
    def test_from_resampling_area_line_mixed(self):
        idxh = date_range("1/1/1999", periods=52, freq="W")
        idxl = date_range("1/1/1999", periods=12, freq="M")
        high = DataFrame(np.random.rand(len(idxh), 3), index=idxh, columns=[0, 1, 2])
        low = DataFrame(np.random.rand(len(idxl), 3), index=idxl, columns=[0, 1, 2])

        # low to high
        for kind1, kind2 in [("line", "area"), ("area", "line")]:
            ax = low.plot(kind=kind1, stacked=True)
            ax = high.plot(kind=kind2, stacked=True, ax=ax)

            # check low dataframe result
            expected_x = np.array(
                [1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562], dtype=np.float64
            )
            expected_y = np.zeros(len(expected_x), dtype=np.float64)
            for i in range(3):
                l = ax.lines[i]
                self.assertEqual(PeriodIndex(l.get_xdata()).freq, idxh.freq)
                self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x)
                # check stacked values are correct
                expected_y += low[i].values
                self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y)

            # check high dataframe result
            expected_x = idxh.to_period().asi8.astype(np.float64)
            expected_y = np.zeros(len(expected_x), dtype=np.float64)
            for i in range(3):
                l = ax.lines[3 + i]
                self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, idxh.freq)
                self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x)
                expected_y += high[i].values
                self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y)

        # high to low
        for kind1, kind2 in [("line", "area"), ("area", "line")]:
            ax = high.plot(kind=kind1, stacked=True)
            ax = low.plot(kind=kind2, stacked=True, ax=ax)

            # check high dataframe result
            expected_x = idxh.to_period().asi8.astype(np.float64)
            expected_y = np.zeros(len(expected_x), dtype=np.float64)
            for i in range(3):
                l = ax.lines[i]
                self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, idxh.freq)
                self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x)
                expected_y += high[i].values
                self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y)

            # check low dataframe result
            expected_x = np.array(
                [1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562], dtype=np.float64
            )
            expected_y = np.zeros(len(expected_x), dtype=np.float64)
            for i in range(3):
                l = ax.lines[3 + i]
                self.assertEqual(PeriodIndex(data=l.get_xdata()).freq, idxh.freq)
                self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x)
                expected_y += low[i].values
                self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y)
def slide_11():
    fig, axes = plt.subplots(2, 1)
    data = Series(np.random.rand(16), index=list('abcdefghijklmnop'))

    data.plot(kind='bar', ax=axes[0], color='k', alpha=0.7)
    data.plot(kind='barh', ax=axes[1], color='k', alpha=0.7)

    df = DataFrame(np.random.rand(6, 4),
                   index=['one', 'two', 'three', 'four', 'five', 'six'],
                   columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
    print df
    df.plot(kind='bar')
    df.plot(kind='barh', stacked=True, alpha=0.5)

    tips = pd.read_csv(TIPSCSVPATH)
    print tips.head()
    party_counts = pd.crosstab(index=tips.day, columns=tips.sizes)
    print '曜日とパーティの大きさ別に仕分け'
    print party_counts
    party_counts = party_counts.ix[:, 2: 5]
    print 'サイズ1と6のパーティは少ないから除外'
    print party_counts
    print '正規化'
    party_pcts = party_counts.div(party_counts.sum(1).astype(float), axis=0)
    print party_pcts
    party_pcts.plot(kind='bar', stacked=True)
class BJWorksAverageWage():

    def __init__(self):
        self.url = 'http://www.bjrbj.gov.cn/bmfw/ywml/201601/t20160112_55858.html'
        self.wagelist = DataFrame(columns=['wage'])

    def crawl(self):
        r = requests.get(self.url)
        html = etree.HTML(r.text)
        tbody = html.xpath('//*/tbody')[0]
        for i, tr in enumerate(tbody):
            if i == 0:
                pass
            elif i == 1:
                year = datetime.strptime(tr[0][0].text,'%Y')
                wage = float(tr[1][0].text)
                self.wagelist.loc[year] = [wage]
            else:
                year = datetime.strptime(tr[0][0][0].text,'%Y')
                wage = float(tr[1][0][0].text)
                self.wagelist.loc[year] = [wage]

    def save(self):
        pass

    def plot(self):
        import matplotlib.pyplot as plt
        plt.figure()
        self.wagelist.plot()
Beispiel #16
0
    def test_subplots(self):
        df = DataFrame(np.random.rand(10, 3),
                       index=list(string.ascii_letters[:10]))

        axes = df.plot(subplots=True, sharex=True, legend=True)

        for ax in axes:
            self.assert_(ax.get_legend() is not None)

        axes = df.plot(subplots=True, sharex=True)
        for ax in axes[:-2]:
            [self.assert_(not label.get_visible())
             for label in ax.get_xticklabels()]
            [self.assert_(label.get_visible())
             for label in ax.get_yticklabels()]

        [self.assert_(label.get_visible())
         for label in axes[-1].get_xticklabels()]
        [self.assert_(label.get_visible())
         for label in axes[-1].get_yticklabels()]

        axes = df.plot(subplots=True, sharex=False)
        for ax in axes:
            [self.assert_(label.get_visible())
             for label in ax.get_xticklabels()]
            [self.assert_(label.get_visible())
             for label in ax.get_yticklabels()]
Beispiel #17
0
    def test_errorbar_plot(self):

        d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)}
        df = DataFrame(d)
        d_err = {'x': np.ones(12)*0.2, 'y': np.ones(12)*0.4}
        df_err = DataFrame(d_err)

        # check line plots
        _check_plot_works(df.plot, yerr=df_err, logy=True)
        _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True)

        kinds = ['line', 'bar', 'barh']
        for kind in kinds:
            _check_plot_works(df.plot, yerr=df_err['x'], kind=kind)
            _check_plot_works(df.plot, yerr=d_err, kind=kind)
            _check_plot_works(df.plot, yerr=df_err, xerr=df_err, kind=kind)
            _check_plot_works(df.plot, yerr=df_err['x'], xerr=df_err['x'], kind=kind)
            _check_plot_works(df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind)

        _check_plot_works((df+1).plot, yerr=df_err, xerr=df_err, kind='bar', log=True)

        # yerr is raw error values
        _check_plot_works(df['y'].plot, yerr=np.ones(12)*0.4)
        _check_plot_works(df.plot, yerr=np.ones((2, 12))*0.4)

        # yerr is column name
        df['yerr'] = np.ones(12)*0.2
        _check_plot_works(df.plot, y='y', x='x', yerr='yerr')

        with tm.assertRaises(ValueError):
            df.plot(yerr=np.random.randn(11))

        df_err = DataFrame({'x': ['zzz']*12, 'y': ['zzz']*12})
        with tm.assertRaises(TypeError):
            df.plot(yerr=df_err)
Beispiel #18
0
    def test_from_resampling_area_line_mixed(self):
        idxh = date_range('1/1/1999', periods=52, freq='W')
        idxl = date_range('1/1/1999', periods=12, freq='M')
        high = DataFrame(np.random.rand(len(idxh), 3),
                         index=idxh, columns=[0, 1, 2])
        low = DataFrame(np.random.rand(len(idxl), 3),
                     index=idxl, columns=[0, 1, 2])

        # low to high
        for kind1, kind2 in [('line', 'area'), ('area', 'line')]:
            ax = low.plot(kind=kind1, stacked=True)
            ax = high.plot(kind=kind2, stacked=True, ax=ax)

            # check low dataframe result
            expected_x = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549,
                                   1553, 1558, 1562])
            expected_y = np.zeros(len(expected_x))
            for i in range(3):
                l = ax.lines[i]
                self.assertTrue(PeriodIndex(data=l.get_xdata()).freq.startswith('W'))
                self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x)
                # check stacked values are correct
                expected_y += low[i].values
                self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y)

            # check high dataframe result
            expected_x = idxh.to_period().asi8
            expected_y = np.zeros(len(expected_x))
            for i in range(3):
                l = ax.lines[3 + i]
                self.assertTrue(PeriodIndex(data=l.get_xdata()).freq.startswith('W'))
                self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x)
                expected_y += high[i].values
                self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y)

        # high to low
        for kind1, kind2 in [('line', 'area'), ('area', 'line')]:
            ax = high.plot(kind=kind1, stacked=True)
            ax = low.plot(kind=kind2, stacked=True, ax=ax)

            # check high dataframe result
            expected_x = idxh.to_period().asi8
            expected_y = np.zeros(len(expected_x))
            for i in range(3):
                l = ax.lines[i]
                self.assertTrue(PeriodIndex(data=l.get_xdata()).freq.startswith('W'))
                self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x)
                expected_y += high[i].values
                self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y)

            # check low dataframe result
            expected_x = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549,
                                   1553, 1558, 1562])
            expected_y = np.zeros(len(expected_x))
            for i in range(3):
                l = ax.lines[3 + i]
                self.assertTrue(PeriodIndex(data=l.get_xdata()).freq.startswith('W'))
                self.assert_numpy_array_equal(l.get_xdata(orig=False), expected_x)
                expected_y += low[i].values
                self.assert_numpy_array_equal(l.get_ydata(orig=False), expected_y)
Beispiel #19
0
 def test_partially_invalid_plot_data(self):
     kinds = 'line', 'bar', 'barh', 'kde', 'density'
     df = DataFrame(randn(10, 2), dtype=object)
     df[np.random.rand(df.shape[0]) > 0.5] = 'a'
     for kind in kinds:
         with tm.assertRaises(TypeError):
             df.plot(kind=kind)
Beispiel #20
0
    def test_bar_colors(self):
        import matplotlib.pyplot as plt
        import matplotlib.colors as colors

        default_colors = plt.rcParams.get('axes.color_cycle')
        custom_colors = 'rgcby'

        df = DataFrame(randn(5, 5))
        ax = df.plot(kind='bar')

        rects = ax.patches

        conv = colors.colorConverter
        for i, rect in enumerate(rects[::5]):
            xp = conv.to_rgba(default_colors[i % len(default_colors)])
            rs = rect.get_facecolor()
            self.assertEqual(xp, rs)

        tm.close()

        ax = df.plot(kind='bar', color=custom_colors)

        rects = ax.patches

        conv = colors.colorConverter
        for i, rect in enumerate(rects[::5]):
            xp = conv.to_rgba(custom_colors[i])
            rs = rect.get_facecolor()
            self.assertEqual(xp, rs)

        tm.close()
        from matplotlib import cm

        # Test str -> colormap functionality
        ax = df.plot(kind='bar', colormap='jet')

        rects = ax.patches

        rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5))
        for i, rect in enumerate(rects[::5]):
            xp = rgba_colors[i]
            rs = rect.get_facecolor()
            self.assertEqual(xp, rs)

        tm.close()

        # Test colormap functionality
        ax = df.plot(kind='bar', colormap=cm.jet)

        rects = ax.patches

        rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5))
        for i, rect in enumerate(rects[::5]):
            xp = rgba_colors[i]
            rs = rect.get_facecolor()
            self.assertEqual(xp, rs)

        tm.close()
        df.ix[:, [0]].plot(kind='bar', color='DodgerBlue')
Beispiel #21
0
    def test_rotation(self):
        df = DataFrame(randn(5, 5))
        # Default rot 0
        axes = df.plot()
        self._check_ticks_props(axes, xrot=0)

        axes = df.plot(rot=30)
        self._check_ticks_props(axes, xrot=30)
Beispiel #22
0
    def test_line_colors(self):
        import matplotlib.pyplot as plt
        import sys
        from StringIO import StringIO
        from matplotlib import cm

        custom_colors = "rgcby"

        plt.close("all")
        df = DataFrame(np.random.randn(5, 5))

        ax = df.plot(color=custom_colors)

        lines = ax.get_lines()
        for i, l in enumerate(lines):
            xp = custom_colors[i]
            rs = l.get_color()
            self.assert_(xp == rs)

        tmp = sys.stderr
        sys.stderr = StringIO()
        try:
            plt.close("all")
            ax2 = df.plot(colors=custom_colors)
            lines2 = ax2.get_lines()
            for l1, l2 in zip(lines, lines2):
                self.assert_(l1.get_color(), l2.get_color())
        finally:
            sys.stderr = tmp

        plt.close("all")

        ax = df.plot(colormap="jet")

        rgba_colors = map(cm.jet, np.linspace(0, 1, len(df)))

        lines = ax.get_lines()
        for i, l in enumerate(lines):
            xp = rgba_colors[i]
            rs = l.get_color()
            self.assert_(xp == rs)

        plt.close("all")

        ax = df.plot(colormap=cm.jet)

        rgba_colors = map(cm.jet, np.linspace(0, 1, len(df)))

        lines = ax.get_lines()
        for i, l in enumerate(lines):
            xp = rgba_colors[i]
            rs = l.get_color()
            self.assert_(xp == rs)

        # make color a list if plotting one column frame
        # handles cases like df.plot(color='DodgerBlue')
        plt.close("all")
        df.ix[:, [0]].plot(color="DodgerBlue")
Beispiel #23
0
    def test_df_series_secondary_legend(self):
        # GH 9779
        df = DataFrame(np.random.randn(30, 3), columns=list('abc'))
        s = Series(np.random.randn(30), name='x')

        # primary -> secondary (without passing ax)
        ax = df.plot()
        s.plot(legend=True, secondary_y=True)
        # both legends are dran on left ax
        # left and right axis must be visible
        self._check_legend_labels(ax, labels=['a', 'b', 'c', 'x (right)'])
        self.assertTrue(ax.get_yaxis().get_visible())
        self.assertTrue(ax.right_ax.get_yaxis().get_visible())
        tm.close()

        # primary -> secondary (with passing ax)
        ax = df.plot()
        s.plot(ax=ax, legend=True, secondary_y=True)
        # both legends are dran on left ax
        # left and right axis must be visible
        self._check_legend_labels(ax, labels=['a', 'b', 'c', 'x (right)'])
        self.assertTrue(ax.get_yaxis().get_visible())
        self.assertTrue(ax.right_ax.get_yaxis().get_visible())
        tm.close()

        # seconcary -> secondary (without passing ax)
        ax = df.plot(secondary_y=True)
        s.plot(legend=True, secondary_y=True)
        # both legends are dran on left ax
        # left axis must be invisible and right axis must be visible
        expected = ['a (right)', 'b (right)', 'c (right)', 'x (right)']
        self._check_legend_labels(ax.left_ax, labels=expected)
        self.assertFalse(ax.left_ax.get_yaxis().get_visible())
        self.assertTrue(ax.get_yaxis().get_visible())
        tm.close()

        # secondary -> secondary (with passing ax)
        ax = df.plot(secondary_y=True)
        s.plot(ax=ax, legend=True, secondary_y=True)
        # both legends are dran on left ax
        # left axis must be invisible and right axis must be visible
        expected = ['a (right)', 'b (right)', 'c (right)', 'x (right)']
        self._check_legend_labels(ax.left_ax, expected)
        self.assertFalse(ax.left_ax.get_yaxis().get_visible())
        self.assertTrue(ax.get_yaxis().get_visible())
        tm.close()

        # secondary -> secondary (with passing ax)
        ax = df.plot(secondary_y=True, mark_right=False)
        s.plot(ax=ax, legend=True, secondary_y=True)
        # both legends are dran on left ax
        # left axis must be invisible and right axis must be visible
        expected = ['a', 'b', 'c', 'x (right)']
        self._check_legend_labels(ax.left_ax, expected)
        self.assertFalse(ax.left_ax.get_yaxis().get_visible())
        self.assertTrue(ax.get_yaxis().get_visible())
        tm.close()
def slide_10():
    s = Series(randn(10).cumsum(), index=np.arange(0, 100, 10))
    print s
    s.plot()

    df = DataFrame(randn(10, 4).cumsum(0),
                   columns=['A', 'B', 'C', 'D'],
                   index=np.arange(0, 100, 10))
    df.plot()
Beispiel #25
0
 def test_nonzero_base(self):
     # GH2571
     idx = (date_range('2012-12-20', periods=24, freq='H') + timedelta(
         minutes=30))
     df = DataFrame(np.arange(24), index=idx)
     _, ax = self.plt.subplots()
     df.plot(ax=ax)
     rs = ax.get_lines()[0].get_xdata()
     assert not Index(rs).is_normalized
Beispiel #26
0
    def test_line_colors(self):
        import matplotlib.pyplot as plt
        import sys
        from matplotlib import cm

        custom_colors = 'rgcby'

        df = DataFrame(randn(5, 5))

        ax = df.plot(color=custom_colors)

        lines = ax.get_lines()
        for i, l in enumerate(lines):
            xp = custom_colors[i]
            rs = l.get_color()
            self.assertEqual(xp, rs)

        tmp = sys.stderr
        sys.stderr = StringIO()
        try:
            tm.close()
            ax2 = df.plot(colors=custom_colors)
            lines2 = ax2.get_lines()
            for l1, l2 in zip(lines, lines2):
                self.assertEqual(l1.get_color(), l2.get_color())
        finally:
            sys.stderr = tmp

        tm.close()

        ax = df.plot(colormap='jet')

        rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df)))

        lines = ax.get_lines()
        for i, l in enumerate(lines):
            xp = rgba_colors[i]
            rs = l.get_color()
            self.assertEqual(xp, rs)

        tm.close()

        ax = df.plot(colormap=cm.jet)

        rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df)))

        lines = ax.get_lines()
        for i, l in enumerate(lines):
            xp = rgba_colors[i]
            rs = l.get_color()
            self.assertEqual(xp, rs)

        # make color a list if plotting one column frame
        # handles cases like df.plot(color='DodgerBlue')
        tm.close()
        df.ix[:, [0]].plot(color='DodgerBlue')
Beispiel #27
0
    def test_nonnumeric_exclude(self):
        idx = date_range('1/1/1987', freq='A', periods=3)
        df = DataFrame({'A': ["x", "y", "z"], 'B': [1, 2, 3]}, idx)

        fig, ax = self.plt.subplots()
        df.plot(ax=ax)  # it works
        assert len(ax.get_lines()) == 1  # B was plotted
        self.plt.close(fig)

        pytest.raises(TypeError, df['A'].plot)
Beispiel #28
0
 def test_kde(self):
     _skip_if_no_scipy()
     df = DataFrame(randn(100, 4))
     _check_plot_works(df.plot, kind='kde')
     _check_plot_works(df.plot, kind='kde', subplots=True)
     ax = df.plot(kind='kde')
     self.assert_(ax.get_legend() is not None)
     axes = df.plot(kind='kde', logy=True, subplots=True)
     for ax in axes:
         self.assert_(ax.get_yscale() == 'log')
Beispiel #29
0
    def test_rotation(self):
        df = DataFrame(randn(5, 5))
        # Default rot 0
        _, ax = self.plt.subplots()
        axes = df.plot(ax=ax)
        self._check_ticks_props(axes, xrot=0)

        _, ax = self.plt.subplots()
        axes = df.plot(rot=30, ax=ax)
        self._check_ticks_props(axes, xrot=30)
Beispiel #30
0
    def test_label(self):
        import matplotlib.pyplot as plt
        plt.close('all')
        df = DataFrame(np.random.randn(10, 3), columns=['a', 'b', 'c'])
        ax = df.plot(x='a', y='b')
        self.assert_(ax.xaxis.get_label().get_text() == 'a')

        plt.close('all')
        ax = df.plot(x='a', y='b', label='LABEL')
        self.assert_(ax.xaxis.get_label().get_text() == 'LABEL')
        # report performance
        rmse = sqrt(mean_squared_error(raw_values[-12:], predictions))
        print('%d) Test RMSE: %.3f' % (r + 1, rmse))
        error_scores.append(rmse)
    return error_scores, raw_values[-12:], predictions


# configure the experiment
series = read_csv('sales-of-shampoo-over-a-three-ye.csv',
                  header=0,
                  parse_dates=[0],
                  index_col=0,
                  squeeze=True,
                  date_parser=parser)
print(series)
# configure the experiment
n_lag = 1
n_repeats = 30
n_epochs = 1000
n_batch = 4
n_neurons = 3
results = DataFrame()
results['results'], raw_values1, predictions = experiment(
    series, n_lag, n_repeats, n_epochs, n_batch, n_neurons)

# summarize results
print(results.describe())
results.plot(title="LSTM RMSE Iteration")
pyplot.show()
pyplot.savefig('plot_lstm_rmse.png')
Beispiel #32
0
    def test_invalid_colormap(self):
        df = DataFrame(randn(3, 2), columns=['A', 'B'])

        with tm.assertRaises(ValueError):
            df.plot(colormap='invalid_colormap')
def demo2():
    data = DataFrame(np.random.randn(10, 4).cumsum(0),
                     columns=['a', 'b', 'c', 'd'],
                     index=np.arange(0, 100, 10))
    data.plot(style='o--')
    plt.show()
Beispiel #34
0
 def test_explicit_label(self):
     df = DataFrame(randn(10, 3), columns=['a', 'b', 'c'])
     ax = df.plot(x='a', y='b', label='LABEL')
     self.assertEqual(ax.xaxis.get_label().get_text(), 'LABEL')
    Selected = pd.DataFrame(S)
    Features = New_File.iloc[:,1:New_File.shape[1]-1]
    names = pd.DataFrame(Features.columns)
    list = pd.concat([names, Selected], axis=1)
    list.columns=['Feature', 'Boruta_ranking']
    #call transform() on X to filter it down to selected features
    X_filtered = boruta.transform(Xs.values)
    X_filtered = list.loc[(list['Boruta_ranking'] <= 15)] 
    X_filtered.to_csv(filenamee+'_Filtered_boruta_FS.csv')
    df = pd.DataFrame(New_File)
    df = df.loc[:, X_filtered['Feature']]
    df = pd.concat([study_1,df,outc_1], axis=1)
    df.to_csv(filenamee+'_Feature_selected.csv')

    X_filtered1 = DataFrame(X_filtered,columns=['Feature','Boruta_ranking'])
    X_filtered1.plot(x ='Feature', y='Boruta_ranking', kind = 'line')
    plt.xticks(rotation=30, ha='right')
    plt.ylabel('Boruta_Ranking')
    plt.xlabel('Feature selected')
    plt.tight_layout()
    plt.savefig(filenamee+'_Filtered_boruta_FS.pdf')
    s3_resource.meta.client.upload_file( 
        Filename=filenamee+'_Filtered_boruta_FS.csv',Bucket='superlearner',Key=filenamee+'_Filtered_boruta_FS.csv')
    s3_resource.meta.client.upload_file(
        Filename=filenamee+'_Feature_selected.csv',Bucket='superlearner',Key=filenamee+'_Feature_selected.csv')
    
    key= filenamee+'_Filtered_boruta_FS.csv'
    key_two =filenamee+'_Feature_selected.csv'
    bucket = 'superlearner'
    New_url =  f"https://{bucket}.s3.eu-west-2.amazonaws.com/{key}"
    New_url_two =  f"https://{bucket}.s3.eu-west-2.amazonaws.com/{key_two}"
Beispiel #36
0
    # 5.2 Measure the variance between the data and the values predicted by the model
    # You can measure whether the results of your model fit the underlying data by using the residual
    # sum of squares (RSS) metric. A small RSS indicates that the model fits tightly to the data.
    #
    # Yet another approach to validate the ARIMA model appropriateness is by performing residual analysis.
    #
    # Print the results of the ARIMA model and plot the residuals. A density plot of the residual error values
    # indicates a normal distribution centered around zero mean. Also, the residuals do not violate the assumptions of
    # constant location and scale with most values in the range (-1,1).



print(results_ARIMA.summary())
# plot residual errors
residuals = DataFrame(results_ARIMA.resid)
residuals.plot(kind='kde')
print(residuals.describe())


# 5.3 Scale predictions¶
# Now that the model is returning the results you want to see, you
# can scale the model predictions back to the original scale. For this, you will remove the first order
# differencing and take exponent to restore the predictions back to their original scale.
#
# The lower the root mean square error (RMSE) and the closer it is to 0, the better are the model
# predictions in being closer to actual values.

euro_predictions_ARIMA_diff = pd.Series(results_ARIMA.fittedvalues, copy=True)
print (euro_predictions_ARIMA_diff.head())

Beispiel #37
0
         'Temperature': [data[0][2], data[1][2], data[2][2], data[3][2], data[4][2], data[5][2], data[6][2], data[7][2],
                         data[8][2], data[9][2], data[10][2], data[11][2], data[12][2], data[13][2], data[14][2]]
         }
df2 = DataFrame(data2, columns=['Machine', 'Temperature'])

root = tk.Tk()
root.title("Machine's Temperatures")
root.grid()
# root.geometry("1200x1200")

figure1 = plt.Figure(figsize=(4, 4), dpi=100)
ax1 = figure1.add_subplot(111)
bar1 = FigureCanvasTkAgg(figure1, root)
bar1.get_tk_widget().grid(row=14, column=10)
df1 = df1[['Machine', 'Temperature']].groupby('Machine').sum()
df1.plot(kind='bar', legend=True, ax=ax1)
ax1.set_title('Machine Vs. Temperature')

figure2 = plt.Figure(figsize=(4, 4), dpi=100)
ax2 = figure2.add_subplot(111)
line2 = FigureCanvasTkAgg(figure2, root)
line2.get_tk_widget().grid(row=14, column=12)
df2 = df2[['Machine', 'Temperature']].groupby('Machine').sum()
df2.plot(kind='line', legend=True, ax=ax2, color='r', marker='o', fontsize=10)
ax2.set_title('Machine Vs. Temperature')

menu = Menu(root)
root.config(menu=menu)
filemenu = Menu(menu)
menu.add_cascade(label='File', menu=filemenu)
filemenu.add_command(label='New')
def plot_scatter(minsize, r):
    cd = load_catdict()
    cat_strats, article_strats = get_strats(minsize, r, cd)
    lonelies_cats, tps_cats, tns_cats, mixed_cats = cat_strats

    positive_url_cats = [
        c for c in cd if cd[c]["URLPattern"] or cd[c]["URLBracesPattern"]
    ]

    lonelies = DataFrame(columns=['#Positive', '#Negative'],
                         index=lonelies_cats)
    for c in lonelies_cats:
        lonelies.loc[c] = Series({
            '#Positive': cd[c]["#Positive"],
            '#Negative': cd[c]["#Negative"]
        })
    tps = DataFrame(columns=['#Positive', '#Negative'], index=tps_cats)
    for c in tps_cats:
        tps.loc[c] = Series({
            '#Positive': cd[c]["#Positive"],
            '#Negative': cd[c]["#Negative"]
        })
    tns = DataFrame(columns=['#Positive', '#Negative'], index=tns_cats)
    for c in tns_cats:
        tns.loc[c] = Series({
            '#Positive': cd[c]["#Positive"],
            '#Negative': cd[c]["#Negative"]
        })
    mixed = DataFrame(columns=['#Positive', '#Negative'], index=mixed_cats)
    for c in mixed_cats:
        mixed.loc[c] = Series({
            '#Positive': cd[c]["#Positive"],
            '#Negative': cd[c]["#Negative"]
        })
    positive_url = DataFrame(columns=['#Positive', '#Negative'],
                             index=positive_url_cats)
    for c in positive_url_cats:
        positive_url.loc[c] = Series({
            '#Positive': cd[c]["#Positive"],
            '#Negative': cd[c]["#Negative"]
        })

    linedf = DataFrame(columns=['#Positive', '#Negative'], index=cd.keys())
    x = 1
    for c in cd:
        if x < 2000:
            linedf.loc[c] = Series({'#Positive': x, '#Negative': x})
        x += 1

    fig, ax = plt.subplots(nrows=1, ncols=1)
    mixed.plot(x="#Positive",
               y="#Negative",
               kind="scatter",
               ax=ax,
               color="blue",
               loglog=False)
    lonelies.plot(x="#Positive",
                  y="#Negative",
                  kind="scatter",
                  ax=ax,
                  color="grey",
                  marker="x")
    tps.plot(x="#Positive",
             y="#Negative",
             kind="scatter",
             ax=ax,
             color="green")
    tns.plot(x="#Positive",
             y="#Negative",
             kind="scatter",
             ax=ax,
             color="black")
    linedf.plot(x="#Positive", y="#Negative", ls="--", ax=ax, color="grey")
    positive_url.plot(x="#Positive",
                      y="#Negative",
                      kind="scatter",
                      marker="x",
                      ax=ax,
                      color="pink")

    ax.set_title('Assessing Categories - ' + str(r))
    ax.legend(
        ["Regression Line", "Mixed", "Too Small", "Positive", "Negative"])
    plt.gca().set_aspect('equal', adjustable='box')

    plt.show()
Beispiel #39
0
 def test_secondary_bar_frame(self):
     df = DataFrame(np.random.randn(5, 3), columns=['a', 'b', 'c'])
     axes = df.plot(kind='bar', secondary_y=['a', 'c'], subplots=True)
     self.assertEqual(axes[0].get_yaxis().get_ticks_position(), 'right')
     self.assertEqual(axes[1].get_yaxis().get_ticks_position(), 'default')
     self.assertEqual(axes[2].get_yaxis().get_ticks_position(), 'right')
Beispiel #40
0
 def test_dataframe(self):
     bts = DataFrame({'a': tm.makeTimeSeries()})
     ax = bts.plot()
     idx = ax.get_lines()[0].get_xdata()
     assert_array_equal(bts.index.to_period(), idx)
Beispiel #41
0
 def test_invalid_kind(self):
     df = DataFrame(randn(10, 2))
     with tm.assertRaises(ValueError):
         df.plot(kind='aasdf')
Beispiel #42
0
 def test_all_invalid_plot_data(self):
     kinds = 'line', 'bar', 'barh', 'kde', 'density'
     df = DataFrame(list('abcd'))
     for kind in kinds:
         with tm.assertRaises(TypeError):
             df.plot(kind=kind)
Beispiel #43
0
 def test_rotation(self):
     df = DataFrame(randn(5, 5))
     ax = df.plot(rot=30)
     for l in ax.get_xticklabels():
         self.assertEqual(l.get_rotation(), 30)
Beispiel #44
0
	    arr1 = np.array(data);    
	    indexs = [timeformat(x) for x in arr1[:,0:1].ravel()];
	    values = [x for x in arr1[:,1:2].ravel()]
	    count = 0
	    for index in indexs:
	    	if((fundCode in all_data)==False):
	    		all_data[fundCode]={}
	    	all_data[fundCode][index]=values[count];
	    	count=count+1


# 保存数据 
fig,axes = plt.subplots(2, 1)
# 处理基本信息
df2 = DataFrame(all_data_base)
print(df2)
df2.stack().unstack(0).to_excel(f'result_{time.time()}.xlsx',sheet_name='out')
df2.iloc[1:5,:].plot.barh(ax=axes[0],grid=True,fontsize=25)

# 处理收益
df=DataFrame(all_data).sort_index().fillna(method='ffill')
print(df)
df.plot(ax=axes[1],grid=True,fontsize=25)


fig.set_size_inches(20, 20)
fig.savefig(f'result_{time.time()}.png')


# https://www.zhihu.com/question/25404709 matplotlib图例中文乱码
Beispiel #45
0
    def test_subplots_multiple_axes(self):
        # GH 5353, 6970, GH 7069
        fig, axes = self.plt.subplots(2, 3)
        df = DataFrame(np.random.rand(10, 3),
                       index=list(string.ascii_letters[:10]))

        returned = df.plot(subplots=True,
                           ax=axes[0],
                           sharex=False,
                           sharey=False)
        self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
        assert returned.shape == (3, )
        assert returned[0].figure is fig
        # draw on second row
        returned = df.plot(subplots=True,
                           ax=axes[1],
                           sharex=False,
                           sharey=False)
        self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
        assert returned.shape == (3, )
        assert returned[0].figure is fig
        self._check_axes_shape(axes, axes_num=6, layout=(2, 3))
        tm.close()

        with pytest.raises(ValueError):
            fig, axes = self.plt.subplots(2, 3)
            # pass different number of axes from required
            df.plot(subplots=True, ax=axes)

        # pass 2-dim axes and invalid layout
        # invalid lauout should not affect to input and return value
        # (show warning is tested in
        # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes
        fig, axes = self.plt.subplots(2, 2)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", UserWarning)
            df = DataFrame(np.random.rand(10, 4),
                           index=list(string.ascii_letters[:10]))

            returned = df.plot(subplots=True,
                               ax=axes,
                               layout=(2, 1),
                               sharex=False,
                               sharey=False)
            self._check_axes_shape(returned, axes_num=4, layout=(2, 2))
            assert returned.shape == (4, )

            returned = df.plot(subplots=True,
                               ax=axes,
                               layout=(2, -1),
                               sharex=False,
                               sharey=False)
            self._check_axes_shape(returned, axes_num=4, layout=(2, 2))
            assert returned.shape == (4, )

            returned = df.plot(subplots=True,
                               ax=axes,
                               layout=(-1, 2),
                               sharex=False,
                               sharey=False)
        self._check_axes_shape(returned, axes_num=4, layout=(2, 2))
        assert returned.shape == (4, )

        # single column
        fig, axes = self.plt.subplots(1, 1)
        df = DataFrame(np.random.rand(10, 1),
                       index=list(string.ascii_letters[:10]))

        axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False)
        self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
        assert axes.shape == (1, )
def plot(prefix, header, row):
    x = {h: d for (h, d) in zip(header, row)}
    jobid = x["jobid"]
    del x["jobid"]
    result = []
    for k in x:
        timeseries = x[k].split(":")
        timeseries = [float(x) for x in timeseries]
        if sum(timeseries) == 0:
            continue
        timeseries = [[k, x, s]
                      for (s, x) in zip(timeseries, range(0, len(timeseries)))]
        result.extend(timeseries)

    if len(result) == 0:
        print("Empty job! Cannot plot!")
        return

    data = DataFrame(result, columns=["metrics", "segment", "value"])
    groups = data.groupby(["metrics"])
    metrics = DataFrame()
    labels = []
    colors = []
    style = []
    for name, group in groups:
        style.append(linestyleMap[name] + markerMap[name])
        colors.append(colorMap[name])
        if name == "md_file_delete":
            name = "file_delete"
        if name == "md_file_create":
            name = "file_create"
        metrics[name] = [x[2] for x in group.values]
        labels.append(name)

    fsize = (8, 1 + 1.1 * len(labels))
    fsizeFixed = (8, 2)
    fsizeHist = (8, 4)

    pyplot.close('all')

    if len(labels) < 4:
        ax = metrics.plot(legend=True,
                          sharex=True,
                          grid=True,
                          sharey=True,
                          markersize=10,
                          figsize=fsizeFixed,
                          color=colors,
                          style=style)
        ax.set_ylabel("Value")
    else:
        ax = metrics.plot(subplots=True,
                          legend=False,
                          sharex=True,
                          grid=True,
                          sharey=True,
                          markersize=10,
                          figsize=fsize,
                          color=colors,
                          style=style)
        for (i, l) in zip(range(0, len(labels)), labels):
            ax[i].set_ylabel(l)

    pyplot.xlabel("Segment number")
    pyplot.savefig(prefix + "timeseries" + jobid + fileformat,
                   bbox_inches='tight',
                   dpi=150)

    # Create a facetted grid
    #g = sns.FacetGrid(tips, col="time", margin_titles=True)
    #bins = np.linspace(0, 60, 13)
    #g.map(plt.hist, "total_bill", color="steelblue", bins=bins)

    ax = metrics.hist(sharex=True,
                      grid=True,
                      sharey=True,
                      figsize=fsizeHist,
                      bins=10)
    pyplot.savefig(prefix + "hist" + jobid + fileformat,
                   bbox_inches='tight',
                   dpi=150)

    # Plot first 30 segments
    if len(timeseries) <= 50:
        return

    if len(labels) < 4:
        ax = metrics.plot(legend=True,
                          xlim=(0, 30),
                          sharex=True,
                          grid=True,
                          sharey=True,
                          markersize=10,
                          figsize=fsizeFixed,
                          color=colors,
                          style=style)
        ax.set_ylabel("Value")
    else:
        ax = metrics.plot(subplots=True,
                          xlim=(0, 30),
                          legend=False,
                          sharex=True,
                          grid=True,
                          sharey=True,
                          markersize=10,
                          figsize=fsize,
                          color=colors,
                          style=style)
        for (i, l) in zip(range(0, len(labels)), labels):
            ax[i].set_ylabel(l)

    pyplot.xlabel("Segment number")
    pyplot.savefig(prefix + "timeseries" + jobid + "-30" + fileformat,
                   bbox_inches='tight',
                   dpi=150)
Beispiel #47
0
print(ao[:2])
print(ao[-2:])
print("Dates:nao")
print(nao[:2])
print(nao[-2:])
print("Length")
print(len(ao))
print(len(nao))

## Create a series
dates_nao=pd.date_range('1950-01',periods=nao.shape[0],freq='M')
NAO=Series(nao[:,2],index=dates_nao)
print(NAO)
print(NAO.index)
aonao=DataFrame({'AO':AO,'NAO':NAO})
aonao.plot(subplots=True)
plt.close()
print(aonao.head())
print(aonao.tail())
print(aonao['NAO'])
print(aonao.NAO)
aonao['Diff']=aonao['AO']-aonao['NAO']
print(aonao.head())
del aonao['Diff']
print(aonao.tail())

## Crazy Combination
aonao.loc[(aonao.AO>0)&(aonao.NAO<0) &\
           (aonao.index>datetime.datetime(1980,10,1)) &\
           (aonao.index<datetime.datetime(1989,1,1)),\
           'NAO'].plot(kind='barh')
Beispiel #48
0
df_data_corn_price_ = df_data_corn_price.groupby(['日期'])[['价格']].mean()/100
df_data_corn_price = df_data_corn_price_.reset_index()
df_data_corn_price = df_data_corn_price.sort_values(by='日期')
df_data_corn_price.rename(columns={'价格': '玉米'}, inplace=True)

# 猪肉
df_data_baby_data = DataFrame()
for i in range(18):
    temp1 = get_data_by_item_name('baby_data', cities[i])
    df_temp1 = DataFrame(temp1[1: ], columns=temp1[0])
    df_data_baby_data = df_data_baby_data.append(df_temp1, ignore_index=True)
del df_data_baby_data['地区']
del df_data_baby_data['品种']
del df_data_baby_data['分类']
df_data_baby_data['价格'] = df_data_baby_data['价格'].apply(float)
df_data_baby_data = df_data_baby_data.groupby(['日期'])[['价格']].mean()
df_data_baby_data = df_data_corn_price_ * 2
df_data_baby_data = df_data_baby_data.reset_index()
df_data_baby_data = df_data_baby_data.sort_values(by='日期')
df_data_baby_data.rename(columns={'价格': '猪肉'}, inplace=True)

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
fig,axes = plt.subplots(1)
df_data_corn_price.plot(ax=axes, kind='line', x='日期', color='green', title='全国价格走势图')
df_data_baby_data.plot(ax=axes, x='日期', kind='line', color='red')
df_data_bean_price.plot(ax=axes, kind='line', x='日期', color='blue')

# 猪肉        元/kg
# 豆粕 玉米   元/10kg
plt.show()
Beispiel #49
0
    def test_from_resampling_area_line_mixed(self):
        idxh = date_range('1/1/1999', periods=52, freq='W')
        idxl = date_range('1/1/1999', periods=12, freq='M')
        high = DataFrame(np.random.rand(len(idxh), 3),
                         index=idxh, columns=[0, 1, 2])
        low = DataFrame(np.random.rand(len(idxl), 3),
                        index=idxl, columns=[0, 1, 2])

        # low to high
        for kind1, kind2 in [('line', 'area'), ('area', 'line')]:
            ax = low.plot(kind=kind1, stacked=True)
            ax = high.plot(kind=kind2, stacked=True, ax=ax)

            # check low dataframe result
            expected_x = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540,
                                   1544, 1549, 1553, 1558, 1562],
                                  dtype=np.float64)
            expected_y = np.zeros(len(expected_x), dtype=np.float64)
            for i in range(3):
                l = ax.lines[i]
                self.assertEqual(PeriodIndex(l.get_xdata()).freq, idxh.freq)
                self.assert_numpy_array_equal(l.get_xdata(orig=False),
                                              expected_x)
                # check stacked values are correct
                expected_y += low[i].values
                self.assert_numpy_array_equal(
                    l.get_ydata(orig=False), expected_y)

            # check high dataframe result
            expected_x = idxh.to_period().asi8.astype(np.float64)
            expected_y = np.zeros(len(expected_x), dtype=np.float64)
            for i in range(3):
                l = ax.lines[3 + i]
                self.assertEqual(PeriodIndex(data=l.get_xdata()).freq,
                                 idxh.freq)
                self.assert_numpy_array_equal(l.get_xdata(orig=False),
                                              expected_x)
                expected_y += high[i].values
                self.assert_numpy_array_equal(l.get_ydata(orig=False),
                                              expected_y)

        # high to low
        for kind1, kind2 in [('line', 'area'), ('area', 'line')]:
            ax = high.plot(kind=kind1, stacked=True)
            ax = low.plot(kind=kind2, stacked=True, ax=ax)

            # check high dataframe result
            expected_x = idxh.to_period().asi8.astype(np.float64)
            expected_y = np.zeros(len(expected_x), dtype=np.float64)
            for i in range(3):
                l = ax.lines[i]
                self.assertEqual(PeriodIndex(data=l.get_xdata()).freq,
                                 idxh.freq)
                self.assert_numpy_array_equal(
                    l.get_xdata(orig=False), expected_x)
                expected_y += high[i].values
                self.assert_numpy_array_equal(
                    l.get_ydata(orig=False), expected_y)

            # check low dataframe result
            expected_x = np.array([1514, 1519, 1523, 1527, 1531, 1536, 1540,
                                   1544, 1549, 1553, 1558, 1562],
                                  dtype=np.float64)
            expected_y = np.zeros(len(expected_x), dtype=np.float64)
            for i in range(3):
                l = ax.lines[3 + i]
                self.assertEqual(PeriodIndex(data=l.get_xdata()).freq,
                                 idxh.freq)
                self.assert_numpy_array_equal(l.get_xdata(orig=False),
                                              expected_x)
                expected_y += low[i].values
                self.assert_numpy_array_equal(l.get_ydata(orig=False),
                                              expected_y)
Beispiel #50
0
 def test_rotation(self):
     df = DataFrame(np.random.randn(5, 5))
     ax = df.plot(rot=30)
     for l in ax.get_xticklabels():
         self.assert_(l.get_rotation() == 30)
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import tkinter as tk
from pandas import DataFrame
import matplotlib.pyplot as plt

irsData = {
    'Contact Method': [
        'Phone Call: Landline', 'Phone', 'Phone Call: Mobile/Cell',
        'Internet/E-mail', 'Mobile: Text/Email/ IM', 'Mail',
        'I Initiated Contact', 'Unknown', 'Internet Web Site', 'In Person',
        'Internet (Other)', 'Wireless', 'Fax', 'Print', 'TV/Radio'
    ],
    'Total': [10044, 2769, 2543, 226, 58, 57, 36, 34, 30, 16, 11, 7, 3, 3, 1]
}
df1 = DataFrame(irsData, columns=['Contact Method', 'Total'])
df1 = df1[['Contact Method', 'Total']].groupby('Contact Method').sum()
root = tk.Tk()
root.title('Contact Method')
root.geometry("800x800")
figure1 = plt.Figure(figsize=(6, 4), dpi=100)
ax1 = figure1.add_subplot(111)
ax1.set_xlabel('Contact Methods')
bar1 = FigureCanvasTkAgg(figure1, root)
bar1.get_tk_widget().pack(side=tk.LEFT, fill=tk.BOTH)
df1.plot(kind='bar', legend=True, ax=ax1)
ax1.set_title('Contact Method Vs. Total')

root.mainloop()
Beispiel #52
0
 def test_nonnumeric_exclude(self):
     import matplotlib.pyplot as plt
     df = DataFrame({'A': ["x", "y", "z"], 'B': [1, 2, 3]})
     ax = df.plot()
     self.assertEqual(len(ax.get_lines()), 1)  # B was plotted
Beispiel #53
0
    def start(self):
        """
        策略回测。结束后打印出收益曲线(沪深300基准)、年化收益、最大回撤、

        :param begin_date: 回测开始日期
        :param end_date: 回测结束日期
        """
        total_capital = 1E7
        cash = 1E7
        single_position = 2E5

        # 初始化信号对象
        daily_k_break_ma10 = DailyKBreakMA10Signal()

        low_pe_stock_pool = LowPeStockPool(self.begin_date, self.end_date, 7)

        # 保存持仓股的日期
        code_date_volume_dict = dict()

        # 时间为key的净值、收益和同期沪深基准
        df_profit = DataFrame(columns=['net_value', 'profit', 'hs300'])

        # 因为上证指数没有停牌不会缺数,所以用它作为交易日历,
        szzz_hq_df = self.dm.get_k_data('000001',
                                        index=True,
                                        begin_date=self.begin_date,
                                        end_date=self.end_date)
        all_dates = list(szzz_hq_df['date'])

        # 获取沪深300在统计周期内的第一天的值
        hs300_k = self.dm.get_k_data('000300',
                                     index=True,
                                     begin_date=all_dates[0],
                                     end_date=all_dates[0])
        hs300_begin_value = hs300_k.loc[hs300_k.index[0]]['close']

        # 获取股票池数据
        rebalance_dates, date_codes_dict = low_pe_stock_pool.get_option_stocks(
        )

        # 获取回测周期内股票池内所有股票的收盘价和前收价
        all_option_code_set = set()
        for rebalance_date in rebalance_dates:
            for code in date_codes_dict[rebalance_date]:
                all_option_code_set.add(code)

        # 缓存股票的日线数据
        for code in all_option_code_set:
            dailies_df = self.dm.get_k_data(code,
                                            autype=None,
                                            begin_date=self.begin_date,
                                            end_date=self.end_date)
            dailies_hfq_df = self.dm.get_k_data(code,
                                                autype='hfq',
                                                begin_date=self.begin_date,
                                                end_date=self.end_date)
            # 计算复权因子
            dailies_df[
                'au_factor'] = dailies_hfq_df['close'] / dailies_df['close']
            dailies_df.set_index(['date'], inplace=True)

            self.code_daily_cache[code] = dailies_df

        last_phase_codes = None
        this_phase_codes = None
        to_be_sold_codes = set()
        to_be_bought_codes = set()
        holding_code_dict = dict()
        last_date = None
        # 按照日期一步步回测
        for _date in all_dates:
            print('Backtest at %s.' % _date)

            # 当期持仓股票列表
            before_sell_holding_codes = list(holding_code_dict.keys())

            # 处理复权
            if last_date is not None and len(before_sell_holding_codes) > 0:

                for code in before_sell_holding_codes:
                    try:
                        dailies = self.code_daily_cache[code]

                        current_au_factor = dailies.loc[_date]['au_factor']
                        before_volume = holding_code_dict[code]['volume']
                        last_au_factor = dailies.loc[last_date]['au_factor']

                        after_volume = int(
                            before_volume *
                            (current_au_factor / last_au_factor))
                        holding_code_dict[code]['volume'] = after_volume
                        print('持仓量调整:%s, %6d, %10.6f, %6d, %10.6f' %
                              (code, before_volume, last_au_factor,
                               after_volume, current_au_factor),
                              flush=True)
                    except:
                        print('持仓量调整时,发生错误:%s, %s' % (code, _date), flush=True)

            # 卖出
            if len(to_be_sold_codes) > 0:
                code_set_tmp = set(to_be_sold_codes)
                for code in code_set_tmp:
                    try:
                        if code in before_sell_holding_codes:
                            holding_stock = holding_code_dict[code]
                            holding_volume = holding_stock['volume']
                            sell_price = self.code_daily_cache[code].loc[
                                _date]['open']
                            sell_amount = holding_volume * sell_price
                            cash += sell_amount

                            cost = holding_stock['cost']
                            single_profit = (sell_amount - cost) * 100 / cost
                            print('卖出 %s, %6d, %6.2f, %8.2f, %4.2f' %
                                  (code, holding_volume, sell_price,
                                   sell_amount, single_profit))

                            del holding_code_dict[code]
                            to_be_sold_codes.remove(code)
                    except:
                        print('卖出时,发生异常:%s, %s' % (code, _date), flush=True)

            print('卖出后,现金: %10.2f' % cash)

            # 买入
            if len(to_be_bought_codes) > 0:
                sorted_to_be_bought_list = list(to_be_bought_codes)
                sorted_to_be_bought_list.sort()
                for code in sorted_to_be_bought_list:
                    try:
                        if cash > single_position:
                            buy_price = self.code_daily_cache[code].loc[_date][
                                'open']
                            volume = int(
                                int(single_position / buy_price) / 100) * 100
                            buy_amount = buy_price * volume
                            cash -= buy_amount
                            holding_code_dict[code] = {
                                'volume': volume,
                                'cost': buy_amount,
                                'last_value': buy_amount
                            }

                            print('买入 %s, %6d, %6.2f, %8.2f' %
                                  (code, volume, buy_price, buy_amount),
                                  flush=True)
                    except:
                        print('买入时,发生错误:%s, %s' % (code, _date), flush=True)

            print('买入后,现金: %10.2f' % cash)

            # 持仓股代码列表
            holding_codes = list(holding_code_dict.keys())
            # 如果调整日,则获取新一期的股票列表
            if _date in rebalance_dates:
                # 暂存为上期的日期
                if this_phase_codes is not None:
                    last_phase_codes = this_phase_codes
                this_phase_codes = date_codes_dict[_date]

                # 找到所有调出股票代码,在第二日开盘时卖出
                if last_phase_codes is not None:
                    out_codes = self.find_out_stocks(last_phase_codes,
                                                     this_phase_codes)
                    for out_code in out_codes:
                        if out_code in holding_code_dict:
                            to_be_sold_codes.add(out_code)

            # 获取检测信号的开始日期和结束日期
            current_date_index = all_dates.index(_date)
            signal_begin_date = None
            if current_date_index >= 10:
                signal_begin_date = all_dates[current_date_index - 10]

            # 检查是否有需要第二天卖出的股票
            for holding_code in holding_codes:
                if daily_k_break_ma10.is_k_down_break_ma10(
                        holding_code, begin_date=signal_begin_date,
                        end_date=_date):
                    to_be_sold_codes.add(holding_code)

            # 检查是否有需要第二天买入的股票
            to_be_bought_codes.clear()
            if this_phase_codes is not None:
                for _code in this_phase_codes:
                    if _code not in holding_codes and \
                            daily_k_break_ma10.is_k_up_break_ma10(_code, begin_date=signal_begin_date, end_date=_date):
                        to_be_bought_codes.add(_code)

            # 计算总资产
            total_value = 0
            for code in holding_codes:
                try:
                    holding_stock = holding_code_dict[code]
                    value = self.code_daily_cache[code].loc[_date][
                        'close'] * holding_stock['volume']
                    total_value += value

                    # 计算总收益
                    profit = (value - holding_stock['cost']
                              ) * 100 / holding_stock['cost']
                    # 计算单日收益
                    one_day_profit = (value - holding_stock['last_value']
                                      ) * 100 / holding_stock['last_value']
                    # 暂存当日市值
                    holding_stock['last_value'] = value

                    print('持仓: %s, %10.2f, %4.2f, %4.2f' %
                          (code, value, profit, one_day_profit))

                    # 保存每一日股票的持仓数
                    code_date_volume_dict[code + '_' +
                                          _date] = holding_stock['volume']
                except:
                    print('计算收益时发生错误:%s, %s' % (code, _date), flush=True)

            total_capital = total_value + cash

            hs300_k_current = self.dm.get_k_data('000300',
                                                 index=True,
                                                 begin_date=_date,
                                                 end_date=_date)
            hs300_current_value = hs300_k_current.loc[
                hs300_k_current.index[0]]['close']

            print('收盘后,现金: %10.2f, 总资产: %10.2f' % (cash, total_capital))
            last_date = _date
            df_profit.loc[_date] = {
                'net_value':
                round(total_capital / 1e7, 2),
                'profit':
                round(100 * (total_capital - 1e7) / 1e7, 2),
                'hs300':
                round(
                    100 * (hs300_current_value - hs300_begin_value) /
                    hs300_begin_value, 2)
            }

        # 打印回测收益曲线数值
        print('Profit history start')
        for index_date in df_profit.index:
            print('%s, %6.2f, %6.2f' %
                  (index_date, df_profit.loc[index_date]['profit'],
                   df_profit.loc[index_date]['hs300']),
                  flush=True)
        print('Profit history end')

        drawdown = self.compute_drawdown(df_profit['net_value'])
        annual_profit, sharpe_ratio = self.compute_sharpe_ratio(
            df_profit['net_value'])

        print('回测结果 %s - %s,年化收益: %7.3f, 最大回撤:%7.3f, 夏普比率:%4.2f' %
              (self.begin_date, self.end_date, annual_profit, drawdown,
               sharpe_ratio))

        df_profit.plot(title='Backtest Result',
                       y=['profit', 'hs300'],
                       kind='line')
        plt.show()
Beispiel #54
0
# Fare,补充 test_df 中的缺失值,有一条数据缺少 Fare 信息,使用中位数填充
test_df['Fare'].fillna(test_df['Fare'].median(), inplace=True)
# 从 float 型转换为 int 型
train_df['Fare'] = train_df['Fare'].astype(int)
test_df['Fare'] = test_df['Fare'].astype(int)
# 分别获取幸存者和遇难者的 Fare
fare_not_survived = train_df['Fare'][train_df['Survived'] == 0]
fare_survived = train_df['Fare'][train_df['Survived'] == 1]
# 计算平均值和标准差
average_fare = DataFrame([fare_not_survived.mean(), fare_survived.mean()])
std_fare = DataFrame([fare_not_survived.std(), fare_survived.std()])
# 画图
train_df['Fare'].plot(kind='hist', figsize=(15,3), bins=100, xlim=(0,50))
plt.savefig('Fare_1.png')
average_fare.index.names = std_fare.index.names = ['Survived']
average_fare.plot(yerr=std_fare, kind='bar', legend=False)
plt.savefig('Fare_2.png')
plt.close('all')


# 年龄
fig, (axis1, axis2) = plt.subplots(1, 2, figsize=(15,4))
axis1.set_title('Original Age Values - Titanic')
axis2.set_title('New Age Values - Titanic')
# 获取训练集中 Age 的平均值、标准差和 null 的个数
average_age_train = train_df['Age'].mean()
std_age_train = train_df['Age'].std()
count_nan_age_train = train_df['Age'].isnull().sum()
# 获取测试集中 Age 的平均值、标准差和 null 的个数
average_age_test = test_df['Age'].mean()
std_age_test = test_df['Age'].std()
Beispiel #55
0
def test_import_error_message():
    # GH-19810
    df = DataFrame({"A": [1, 2]})

    with pytest.raises(ImportError, match="matplotlib is required for plotting"):
        df.plot()
Beispiel #56
0
import plotly.plotly as py
import plotly.graph_objs as go

import pandas as pd
from pandas import Series
from pandas import DataFrame
from pandas import TimeGrouper
from pandas import parser
from matplotlib import pyplot

#df = pd.read_csv("Data.csv")
series = Series.from_csv('Data.csv',header=0)
df = DataFrame(series)
#df = pd.read_csv('Data.csv',header=0)
df.plot()
pyplot.show()
Beispiel #57
0
 def test_bar_stacked_center(self):
     #GH2157
     df = DataFrame({'A': [3] * 5, 'B': range(5)}, index=range(5))
     ax = df.plot(kind='bar', stacked='True', grid=True)
     self.assertEqual(ax.xaxis.get_ticklocs()[0],
                      ax.patches[0].get_x() + ax.patches[0].get_width() / 2)
Beispiel #58
0
    def test_df_series_secondary_legend(self):
        # GH 9779
        df = DataFrame(np.random.randn(30, 3), columns=list("abc"))
        s = Series(np.random.randn(30), name="x")

        # primary -> secondary (without passing ax)
        _, ax = self.plt.subplots()
        ax = df.plot(ax=ax)
        s.plot(legend=True, secondary_y=True, ax=ax)
        # both legends are drawn on left ax
        # left and right axis must be visible
        self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"])
        assert ax.get_yaxis().get_visible()
        assert ax.right_ax.get_yaxis().get_visible()
        tm.close()

        # primary -> secondary (with passing ax)
        _, ax = self.plt.subplots()
        ax = df.plot(ax=ax)
        s.plot(ax=ax, legend=True, secondary_y=True)
        # both legends are drawn on left ax
        # left and right axis must be visible
        self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"])
        assert ax.get_yaxis().get_visible()
        assert ax.right_ax.get_yaxis().get_visible()
        tm.close()

        # secondary -> secondary (without passing ax)
        _, ax = self.plt.subplots()
        ax = df.plot(secondary_y=True, ax=ax)
        s.plot(legend=True, secondary_y=True, ax=ax)
        # both legends are drawn on left ax
        # left axis must be invisible and right axis must be visible
        expected = ["a (right)", "b (right)", "c (right)", "x (right)"]
        self._check_legend_labels(ax.left_ax, labels=expected)
        assert not ax.left_ax.get_yaxis().get_visible()
        assert ax.get_yaxis().get_visible()
        tm.close()

        # secondary -> secondary (with passing ax)
        _, ax = self.plt.subplots()
        ax = df.plot(secondary_y=True, ax=ax)
        s.plot(ax=ax, legend=True, secondary_y=True)
        # both legends are drawn on left ax
        # left axis must be invisible and right axis must be visible
        expected = ["a (right)", "b (right)", "c (right)", "x (right)"]
        self._check_legend_labels(ax.left_ax, expected)
        assert not ax.left_ax.get_yaxis().get_visible()
        assert ax.get_yaxis().get_visible()
        tm.close()

        # secondary -> secondary (with passing ax)
        _, ax = self.plt.subplots()
        ax = df.plot(secondary_y=True, mark_right=False, ax=ax)
        s.plot(ax=ax, legend=True, secondary_y=True)
        # both legends are drawn on left ax
        # left axis must be invisible and right axis must be visible
        expected = ["a", "b", "c", "x (right)"]
        self._check_legend_labels(ax.left_ax, expected)
        assert not ax.left_ax.get_yaxis().get_visible()
        assert ax.get_yaxis().get_visible()
        tm.close()
Beispiel #59
0
 def test_bar_center(self):
     df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5))
     ax = df.plot(kind='bar', grid=True)
     self.assertEqual(ax.xaxis.get_ticklocs()[0],
                      ax.patches[0].get_x() + ax.patches[0].get_width())
Beispiel #60
0
history = [x for x in train]
predictions = list()
bias = -0.348572

for i in range(len(test)):
    # predict
    model = ARIMA(history, order=(2, 1, 3))
    model_fit = model.fit(trend='nc', disp=0)
    yhat = bias + float(model_fit.forecast()[0])
    predictions.append(yhat)
    # observation
    obs = test[i]
    history.append(obs)

# report performance
mse = mean_squared_error(test, predictions)
rmse = sqrt(mse)
print('RMSE: %.3f' % rmse)

# summarise residual errors
residuals = [test[i] - predictions[i] for i in range(len(test))]
residuals = DataFrame(residuals)
print(residuals.describe())

# plot residual errors
pyplot.figure()
pyplot.subplot(211)
residuals.hist(ax=pyplot.gca())
pyplot.subplot(212)
residuals.plot(kind='kde', ax=pyplot.gca())
pyplot.show()