Example #1
0
    def test_axis_shared(self):
        # GH4089
        import matplotlib.pyplot as plt

        def tick_text(tl):
            return [x.get_text() for x in tl]

        n = 100
        df = DataFrame(
            {
                "gender": np.array(["Male", "Female"])[random.randint(2, size=n)],
                "height": random.normal(66, 4, size=n),
                "weight": random.normal(161, 32, size=n),
            }
        )
        ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True)
        self.assert_(ax1._shared_x_axes.joined(ax1, ax2))
        self.assertFalse(ax1._shared_y_axes.joined(ax1, ax2))
        self.assert_(ax2._shared_x_axes.joined(ax1, ax2))
        self.assertFalse(ax2._shared_y_axes.joined(ax1, ax2))
        plt.close("all")

        ax1, ax2 = df.hist(column="height", by=df.gender, sharey=True)
        self.assertFalse(ax1._shared_x_axes.joined(ax1, ax2))
        self.assert_(ax1._shared_y_axes.joined(ax1, ax2))
        self.assertFalse(ax2._shared_x_axes.joined(ax1, ax2))
        self.assert_(ax2._shared_y_axes.joined(ax1, ax2))
        plt.close("all")

        ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True, sharey=True)
        self.assert_(ax1._shared_x_axes.joined(ax1, ax2))
        self.assert_(ax1._shared_y_axes.joined(ax1, ax2))
        self.assert_(ax2._shared_x_axes.joined(ax1, ax2))
        self.assert_(ax2._shared_y_axes.joined(ax1, ax2))
Example #2
0
    def test_hist_layout(self):
        import matplotlib.pyplot as plt

        df = DataFrame(randn(100, 4))

        layout_to_expected_size = (
            {"layout": None, "expected_size": (2, 2)},  # default is 2x2
            {"layout": (2, 2), "expected_size": (2, 2)},
            {"layout": (4, 1), "expected_size": (4, 1)},
            {"layout": (1, 4), "expected_size": (1, 4)},
            {"layout": (3, 3), "expected_size": (3, 3)},
        )

        for layout_test in layout_to_expected_size:
            ax = df.hist(layout=layout_test["layout"])
            self.assertEqual(len(ax), layout_test["expected_size"][0])
            self.assertEqual(len(ax[0]), layout_test["expected_size"][1])

        # layout too small for all 4 plots
        with tm.assertRaises(ValueError):
            df.hist(layout=(1, 1))

        # invalid format for layout
        with tm.assertRaises(ValueError):
            df.hist(layout=(1,))
Example #3
0
    def test_hist(self):
        import matplotlib.pyplot as plt
        df = DataFrame(np.random.randn(100, 4))
        _check_plot_works(df.hist)
        _check_plot_works(df.hist, grid=False)

        # make sure layout is handled
        df = DataFrame(np.random.randn(100, 3))
        _check_plot_works(df.hist)
        axes = df.hist(grid=False)
        self.assert_(not axes[1, 1].get_visible())

        df = DataFrame(np.random.randn(100, 1))
        _check_plot_works(df.hist)

        # make sure layout is handled
        df = DataFrame(np.random.randn(100, 6))
        _check_plot_works(df.hist)

        # make sure sharex, sharey is handled
        _check_plot_works(df.hist, sharex=True, sharey=True)

        # make sure xlabelsize and xrot are handled
        ser = df[0]
        xf, yf = 20, 20
        xrot, yrot = 30, 30
        ax = ser.hist(xlabelsize=xf, xrot=30, ylabelsize=yf, yrot=30)
        ytick = ax.get_yticklabels()[0]
        xtick = ax.get_xticklabels()[0]
        self.assertAlmostEqual(ytick.get_fontsize(), yf)
        self.assertAlmostEqual(ytick.get_rotation(), yrot)
        self.assertAlmostEqual(xtick.get_fontsize(), xf)
        self.assertAlmostEqual(xtick.get_rotation(), xrot)

        xf, yf = 20, 20
        xrot, yrot = 30, 30
        axes = df.hist(xlabelsize=xf, xrot=30, ylabelsize=yf, yrot=30)
        for i, ax in enumerate(axes.ravel()):
            if i < len(df.columns):
                ytick = ax.get_yticklabels()[0]
                xtick = ax.get_xticklabels()[0]
                self.assertAlmostEqual(ytick.get_fontsize(), yf)
                self.assertAlmostEqual(ytick.get_rotation(), yrot)
                self.assertAlmostEqual(xtick.get_fontsize(), xf)
                self.assertAlmostEqual(xtick.get_rotation(), xrot)

        plt.close('all')
        # make sure kwargs to hist are handled
        ax = ser.hist(normed=True, cumulative=True, bins=4)
        # height of last bin (index 5) must be 1.0
        self.assertAlmostEqual(ax.get_children()[5].get_height(), 1.0)

        plt.close('all')
        ax = ser.hist(log=True)
        # scale of y must be 'log'
        self.assert_(ax.get_yscale() == 'log')

        plt.close('all')
        # propagate attr exception from matplotlib.Axes.hist
        self.assertRaises(AttributeError, ser.hist, foo='bar')
Example #4
0
 def test_grouped_hist_layout(self):
     import matplotlib.pyplot as plt
     n = 100
     df = DataFrame({'gender': np.array(['Male',
                                         'Female'])[random.randint(2,
                                                                   size=n)],
                     'height': random.normal(66, 4, size=n),
                     'weight': random.normal(161, 32, size=n),
                     'category': random.randint(4, size=n)})
     self.assertRaises(ValueError, df.hist, column='weight', by=df.gender,
                       layout=(1, 1))
     self.assertRaises(ValueError, df.hist, column='weight', by=df.gender,
                       layout=(1,))
     self.assertRaises(ValueError, df.hist, column='height', by=df.category,
                       layout=(1, 3))
     self.assertRaises(ValueError, df.hist, column='height', by=df.category,
                       layout=(2, 1))
     self.assertEqual(df.hist(column='height', by=df.gender,
                              layout=(2, 1)).shape, (2,))
     plt.close('all')
     self.assertEqual(df.hist(column='height', by=df.category,
                              layout=(4, 1)).shape, (4,))
     plt.close('all')
     self.assertEqual(df.hist(column='height', by=df.category,
                              layout=(4, 2)).shape, (4, 2))
Example #5
0
    def test_axis_shared(self):
        # GH4089
        import matplotlib.pyplot as plt
        def tick_text(tl):
            return [x.get_text() for x in tl]

        n = 100
        df = DataFrame({'gender': np.array(['Male', 'Female'])[random.randint(2, size=n)],
                        'height': random.normal(66, 4, size=n),
                        'weight': random.normal(161, 32, size=n)})
        ax1, ax2 = df.hist(column='height', by=df.gender, sharex=True)
        self.assert_(ax1._shared_x_axes.joined(ax1, ax2))
        self.assertFalse(ax1._shared_y_axes.joined(ax1, ax2))
        self.assert_(ax2._shared_x_axes.joined(ax1, ax2))
        self.assertFalse(ax2._shared_y_axes.joined(ax1, ax2))
        plt.close('all')

        ax1, ax2 = df.hist(column='height', by=df.gender, sharey=True)
        self.assertFalse(ax1._shared_x_axes.joined(ax1, ax2))
        self.assert_(ax1._shared_y_axes.joined(ax1, ax2))
        self.assertFalse(ax2._shared_x_axes.joined(ax1, ax2))
        self.assert_(ax2._shared_y_axes.joined(ax1, ax2))
        plt.close('all')

        ax1, ax2 = df.hist(column='height', by=df.gender, sharex=True,
                           sharey=True)
        self.assert_(ax1._shared_x_axes.joined(ax1, ax2))
        self.assert_(ax1._shared_y_axes.joined(ax1, ax2))
        self.assert_(ax2._shared_x_axes.joined(ax1, ax2))
        self.assert_(ax2._shared_y_axes.joined(ax1, ax2))
Example #6
0
    def test_grouped_hist_legacy(self):
        from matplotlib.patches import Rectangle

        df = DataFrame(randn(500, 2), columns=['A', 'B'])
        df['C'] = np.random.randint(0, 4, 500)
        df['D'] = ['X'] * 500

        axes = grouped_hist(df.A, by=df.C)
        self._check_axes_shape(axes, axes_num=4, layout=(2, 2))

        tm.close()
        axes = df.hist(by=df.C)
        self._check_axes_shape(axes, axes_num=4, layout=(2, 2))

        tm.close()
        # group by a key with single value
        axes = df.hist(by='D', rot=30)
        self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
        self._check_ticks_props(axes, xrot=30)

        tm.close()
        # make sure kwargs to hist are handled
        xf, yf = 20, 18
        xrot, yrot = 30, 40

        if _mpl_ge_2_2_0():
            kwargs = {"density": True}
        else:
            kwargs = {"normed": True}

        axes = grouped_hist(df.A, by=df.C, cumulative=True,
                            bins=4, xlabelsize=xf, xrot=xrot,
                            ylabelsize=yf, yrot=yrot, **kwargs)
        # height of last bin (index 5) must be 1.0
        for ax in axes.ravel():
            rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
            height = rects[-1].get_height()
            tm.assert_almost_equal(height, 1.0)
        self._check_ticks_props(axes, xlabelsize=xf, xrot=xrot,
                                ylabelsize=yf, yrot=yrot)

        tm.close()
        axes = grouped_hist(df.A, by=df.C, log=True)
        # scale of y must be 'log'
        self._check_ax_scales(axes, yaxis='log')

        tm.close()
        # propagate attr exception from matplotlib.Axes.hist
        with pytest.raises(AttributeError):
            grouped_hist(df.A, by=df.C, foo='bar')

        with tm.assert_produces_warning(FutureWarning):
            df.hist(by='C', figsize='default')
Example #7
0
    def test_grouped_hist(self):
        import matplotlib.pyplot as plt
        df = DataFrame(np.random.randn(500, 2), columns=['A', 'B'])
        df['C'] = np.random.randint(0, 4, 500)
        axes = plotting.grouped_hist(df.A, by=df.C)
        self.assert_(len(axes.ravel()) == 4)

        plt.close('all')
        axes = df.hist(by=df.C)
        self.assert_(axes.ndim == 2)
        self.assert_(len(axes.ravel()) == 4)

        for ax in axes.ravel():
            self.assert_(len(ax.patches) > 0)

        plt.close('all')
        # make sure kwargs to hist are handled
        axes = plotting.grouped_hist(df.A, by=df.C, normed=True,
                                     cumulative=True, bins=4)

        # height of last bin (index 5) must be 1.0
        for ax in axes.ravel():
            height = ax.get_children()[5].get_height()
            self.assertAlmostEqual(height, 1.0)

        plt.close('all')
        axes = plotting.grouped_hist(df.A, by=df.C, log=True)
        # scale of y must be 'log'
        for ax in axes.ravel():
            self.assert_(ax.get_yscale() == 'log')

        plt.close('all')
        # propagate attr exception from matplotlib.Axes.hist
        self.assertRaises(AttributeError, plotting.grouped_hist, df.A,
                          by=df.C, foo='bar')
Example #8
0
    def test_grouped_hist(self):
        import matplotlib.pyplot as plt

        df = DataFrame(randn(500, 2), columns=["A", "B"])
        df["C"] = np.random.randint(0, 4, 500)
        axes = plotting.grouped_hist(df.A, by=df.C)
        self.assertEqual(len(axes.ravel()), 4)

        tm.close()
        axes = df.hist(by=df.C)
        self.assertEqual(axes.ndim, 2)
        self.assertEqual(len(axes.ravel()), 4)

        for ax in axes.ravel():
            self.assert_(len(ax.patches) > 0)

        tm.close()
        # make sure kwargs to hist are handled
        axes = plotting.grouped_hist(df.A, by=df.C, normed=True, cumulative=True, bins=4)

        # height of last bin (index 5) must be 1.0
        for ax in axes.ravel():
            height = ax.get_children()[5].get_height()
            self.assertAlmostEqual(height, 1.0)

        tm.close()
        axes = plotting.grouped_hist(df.A, by=df.C, log=True)
        # scale of y must be 'log'
        for ax in axes.ravel():
            self.assertEqual(ax.get_yscale(), "log")

        tm.close()
        # propagate attr exception from matplotlib.Axes.hist
        with tm.assertRaises(AttributeError):
            plotting.grouped_hist(df.A, by=df.C, foo="bar")
    def test_hist_df_legacy(self):
        from matplotlib.patches import Rectangle
        _check_plot_works(self.hist_df.hist)

        # make sure layout is handled
        df = DataFrame(randn(100, 3))
        axes = _check_plot_works(df.hist, grid=False)
        self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
        self.assertFalse(axes[1, 1].get_visible())

        df = DataFrame(randn(100, 1))
        _check_plot_works(df.hist)

        # make sure layout is handled
        df = DataFrame(randn(100, 6))
        axes = _check_plot_works(df.hist, layout=(4, 2))
        self._check_axes_shape(axes, axes_num=6, layout=(4, 2))

        # make sure sharex, sharey is handled
        _check_plot_works(df.hist, sharex=True, sharey=True)

        # handle figsize arg
        _check_plot_works(df.hist, figsize=(8, 10))

        # check bins argument
        _check_plot_works(df.hist, bins=5)

        # make sure xlabelsize and xrot are handled
        ser = df[0]
        xf, yf = 20, 18
        xrot, yrot = 30, 40
        axes = ser.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot)
        self._check_ticks_props(axes, xlabelsize=xf, xrot=xrot,
                                ylabelsize=yf, yrot=yrot)

        xf, yf = 20, 18
        xrot, yrot = 30, 40
        axes = df.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot)
        self._check_ticks_props(axes, xlabelsize=xf, xrot=xrot,
                                ylabelsize=yf, yrot=yrot)

        tm.close()
        # make sure kwargs to hist are handled
        ax = ser.hist(normed=True, cumulative=True, bins=4)
        # height of last bin (index 5) must be 1.0
        rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
        self.assertAlmostEqual(rects[-1].get_height(), 1.0)

        tm.close()
        ax = ser.hist(log=True)
        # scale of y must be 'log'
        self._check_ax_scales(ax, yaxis='log')

        tm.close()

        # propagate attr exception from matplotlib.Axes.hist
        with tm.assertRaises(AttributeError):
            ser.hist(foo='bar')
    def test_hist_layout(self):
        df = DataFrame(randn(100, 3))

        layout_to_expected_size = (
            {'layout': None, 'expected_size': (2, 2)},  # default is 2x2
            {'layout': (2, 2), 'expected_size': (2, 2)},
            {'layout': (4, 1), 'expected_size': (4, 1)},
            {'layout': (1, 4), 'expected_size': (1, 4)},
            {'layout': (3, 3), 'expected_size': (3, 3)},
            {'layout': (-1, 4), 'expected_size': (1, 4)},
            {'layout': (4, -1), 'expected_size': (4, 1)},
            {'layout': (-1, 2), 'expected_size': (2, 2)},
            {'layout': (2, -1), 'expected_size': (2, 2)}
        )

        for layout_test in layout_to_expected_size:
            axes = df.hist(layout=layout_test['layout'])
            expected = layout_test['expected_size']
            self._check_axes_shape(axes, axes_num=3, layout=expected)

        # layout too small for all 4 plots
        with tm.assertRaises(ValueError):
            df.hist(layout=(1, 1))

        # invalid format for layout
        with tm.assertRaises(ValueError):
            df.hist(layout=(1,))
        with tm.assertRaises(ValueError):
            df.hist(layout=(-1, -1))
Example #11
0
    def test_hist(self):
        df = DataFrame(np.random.randn(100, 4))
        _check_plot_works(df.hist)
        _check_plot_works(df.hist, grid=False)

        #make sure layout is handled
        df = DataFrame(np.random.randn(100, 3))
        _check_plot_works(df.hist)
        axes = df.hist(grid=False)
        self.assert_(not axes[1, 1].get_visible())

        df = DataFrame(np.random.randn(100, 1))
        _check_plot_works(df.hist)

        #make sure layout is handled
        df = DataFrame(np.random.randn(100, 6))
        _check_plot_works(df.hist)

        #make sure sharex, sharey is handled
        _check_plot_works(df.hist, sharex=True, sharey=True)

        #make sure kwargs are handled
        ser = df[0]
        xf, yf = 20, 20
        xrot, yrot = 30, 30
        ax = ser.hist(xlabelsize=xf, xrot=30, ylabelsize=yf, yrot=30)
        ytick = ax.get_yticklabels()[0]
        xtick = ax.get_xticklabels()[0]
        self.assertAlmostEqual(ytick.get_fontsize(), yf)
        self.assertAlmostEqual(ytick.get_rotation(), yrot)
        self.assertAlmostEqual(xtick.get_fontsize(), xf)
        self.assertAlmostEqual(xtick.get_rotation(), xrot)

        xf, yf = 20, 20
        xrot, yrot = 30, 30
        axes = df.hist(xlabelsize=xf, xrot=30, ylabelsize=yf, yrot=30)
        for i, ax in enumerate(axes.ravel()):
            if i < len(df.columns):
                ytick = ax.get_yticklabels()[0]
                xtick = ax.get_xticklabels()[0]
                self.assertAlmostEqual(ytick.get_fontsize(), yf)
                self.assertAlmostEqual(ytick.get_rotation(), yrot)
                self.assertAlmostEqual(xtick.get_fontsize(), xf)
                self.assertAlmostEqual(xtick.get_rotation(), xrot)
Example #12
0
    def test_grouped_hist_legacy(self):
        df = DataFrame(randn(500, 2), columns=['A', 'B'])
        df['C'] = np.random.randint(0, 4, 500)
        df['D'] = ['X'] * 500

        axes = plotting.grouped_hist(df.A, by=df.C)
        self._check_axes_shape(axes, axes_num=4, layout=(2, 2))

        tm.close()
        axes = df.hist(by=df.C)
        self._check_axes_shape(axes, axes_num=4, layout=(2, 2))

        tm.close()
        # group by a key with single value
        axes = df.hist(by='D', rot=30)
        self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
        self._check_ticks_props(axes, xrot=30)

        tm.close()
        # make sure kwargs to hist are handled
        xf, yf = 20, 18
        xrot, yrot = 30, 40
        axes = plotting.grouped_hist(df.A, by=df.C, normed=True,
                                     cumulative=True, bins=4,
                                     xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot)
        # height of last bin (index 5) must be 1.0
        for ax in axes.ravel():
            height = ax.get_children()[5].get_height()
            self.assertAlmostEqual(height, 1.0)
        self._check_ticks_props(axes, xlabelsize=xf, xrot=xrot,
                                ylabelsize=yf, yrot=yrot)

        tm.close()
        axes = plotting.grouped_hist(df.A, by=df.C, log=True)
        # scale of y must be 'log'
        self._check_ax_scales(axes, yaxis='log')

        tm.close()
        # propagate attr exception from matplotlib.Axes.hist
        with tm.assertRaises(AttributeError):
            plotting.grouped_hist(df.A, by=df.C, foo='bar')

        with tm.assert_produces_warning(FutureWarning):
            df.hist(by='C', figsize='default')
Example #13
0
    def test_grouped_hist_layout(self):
        import matplotlib.pyplot as plt

        n = 100
        df = DataFrame(
            {
                "gender": np.array(["Male", "Female"])[random.randint(2, size=n)],
                "height": random.normal(66, 4, size=n),
                "weight": random.normal(161, 32, size=n),
                "category": random.randint(4, size=n),
            }
        )
        self.assertRaises(ValueError, df.hist, column="weight", by=df.gender, layout=(1, 1))
        self.assertRaises(ValueError, df.hist, column="weight", by=df.gender, layout=(1,))
        self.assertRaises(ValueError, df.hist, column="height", by=df.category, layout=(1, 3))
        self.assertRaises(ValueError, df.hist, column="height", by=df.category, layout=(2, 1))
        self.assertEqual(df.hist(column="height", by=df.gender, layout=(2, 1)).shape, (2,))
        plt.close("all")
        self.assertEqual(df.hist(column="height", by=df.category, layout=(4, 1)).shape, (4,))
        plt.close("all")
        self.assertEqual(df.hist(column="height", by=df.category, layout=(4, 2)).shape, (4, 2))
Example #14
0
    def test_axis_share_xy(self):
        n = 100
        df = DataFrame({'gender': tm.choice(['Male', 'Female'], size=n),
                        'height': random.normal(66, 4, size=n),
                        'weight': random.normal(161, 32, size=n)})
        ax1, ax2 = df.hist(column='height', by=df.gender, sharex=True,
                           sharey=True)

        # share both x and y
        self.assertTrue(ax1._shared_x_axes.joined(ax1, ax2))
        self.assertTrue(ax2._shared_x_axes.joined(ax1, ax2))

        self.assertTrue(ax1._shared_y_axes.joined(ax1, ax2))
        self.assertTrue(ax2._shared_y_axes.joined(ax1, ax2))
    def test_grouped_hist(self):
        import matplotlib.pyplot as plt
        df = DataFrame(np.random.randn(500, 2), columns=['A', 'B'])
        df['C'] = np.random.randint(0, 4, 500)
        axes = plotting.grouped_hist(df.A, by=df.C)
        self.assert_(len(axes.ravel()) == 4)

        plt.close('all')
        axes = df.hist(by=df.C)
        self.assert_(axes.ndim == 2)
        self.assert_(len(axes.ravel()) == 4)

        for ax in axes.ravel():
            self.assert_(len(ax.patches) > 0)
Example #16
0
def gs(str,list):
    s = list
    t= pd.read_csv(str,usecols= s)

    w=DataFrame(t)



    try:
         plt.scatter(w[s[0]],w[s[1]],color='red')

         plt.show()
    except:
        pass
    try:
        w.hist()
        plt.show()

        w.plot(kind='box',by=list)
        plt.show()
    except:
        pass

    t=w.applymap(np.isreal)
    print t

    b= ''.join(s)
    for i in t[b]:
        if i==False:

            a=(w[b].value_counts())


            a.plot(kind='bar')

            plt.show()
            break
Example #17
0
    def test_axis_share_xy(self):
        n = 100
        df = DataFrame(
            {
                "gender": tm.choice(["Male", "Female"], size=n),
                "height": random.normal(66, 4, size=n),
                "weight": random.normal(161, 32, size=n),
            }
        )
        ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True, sharey=True)

        # share both x and y
        self.assertTrue(ax1._shared_x_axes.joined(ax1, ax2))
        self.assertTrue(ax2._shared_x_axes.joined(ax1, ax2))

        self.assertTrue(ax1._shared_y_axes.joined(ax1, ax2))
        self.assertTrue(ax2._shared_y_axes.joined(ax1, ax2))
Example #18
0
    def test_hist_layout(self):
        import matplotlib.pyplot as plt
        plt.close('all')
        df = DataFrame(randn(100, 4))

        layout_to_expected_size = (
            {'layout': None, 'expected_size': (2, 2)},  # default is 2x2
            {'layout': (2, 2), 'expected_size': (2, 2)},
            {'layout': (4, 1), 'expected_size': (4, 1)},
            {'layout': (1, 4), 'expected_size': (1, 4)},
            {'layout': (3, 3), 'expected_size': (3, 3)},
        )

        for layout_test in layout_to_expected_size:
            ax = df.hist(layout=layout_test['layout'])
            self.assert_(len(ax) == layout_test['expected_size'][0])
            self.assert_(len(ax[0]) == layout_test['expected_size'][1])

        # layout too small for all 4 plots
        self.assertRaises(ValueError, df.hist, layout=(1, 1))

        # invalid format for layout
        self.assertRaises(ValueError, df.hist, layout=(1,))
Example #19
0
    def test_hist_layout(self):
        import matplotlib.pyplot as plt

        plt.close("all")
        df = DataFrame(np.random.randn(100, 4))

        layout_to_expected_size = (
            {"layout": None, "expected_size": (2, 2)},  # default is 2x2
            {"layout": (2, 2), "expected_size": (2, 2)},
            {"layout": (4, 1), "expected_size": (4, 1)},
            {"layout": (1, 4), "expected_size": (1, 4)},
            {"layout": (3, 3), "expected_size": (3, 3)},
        )

        for layout_test in layout_to_expected_size:
            ax = df.hist(layout=layout_test["layout"])
            self.assert_(len(ax) == layout_test["expected_size"][0])
            self.assert_(len(ax[0]) == layout_test["expected_size"][1])

        # layout too small for all 4 plots
        self.assertRaises(ValueError, df.hist, layout=(1, 1))

        # invalid format for layout
        self.assertRaises(ValueError, df.hist, layout=(1,))
Example #20
0
    dd_qtr_std=df_drawdowns.groupby(df_drawdowns.index.quarter).std()
     
    #Look at drawdowns on a monthly basis
    mth_mean=df_drawdowns.resample('M', how='mean',kind='period')
    dd_monthly_mean=df_drawdowns.groupby(df_drawdowns.index.month).mean()
    dd_monthly_std=df_drawdowns.groupby(df_drawdowns.index.month).std()
    
    #Look at one year-2014
    dd_2014=df_drawdowns['2014-01-01':'2014-12-31']
    dd_2014_ri=dd_2014.mean().reset_index(name='Average Drawdown in 2014')
    
    #Creates histograms based on drawdown magnitudes
    bins_dd = np.linspace(0,30,61)
    dd_hist=df_drawdowns
    dd_hist.hist(bins=bins_dd, alpha=0.75,color='green',normed=True)
    dd_hist.plot(kind='kde',style='k--')

    '''
    Drawdown analysis-This code plots a histogram of a stocks drawdown length
    characteristics.  Ensure that stock ticker used in this function has already
    been placed in the ticker list. 
    '''

    stock_dd_length=calc_drawdown_local('WAT',63)
    dd_len_hist=DataFrame(stock_dd_length)
    bins_len_dd=np.linspace(0,30,31)
    dd_len_hist.hist(bins=bins_len_dd, alpha=0.55, color='purple',normed=True)
    plt.title('Drawdown lengths - WAT') 
    dd_len_hist.describe()
    
    
Example #21
0
print(df.corr())

### 8. Merge and Join ###

print(df)
other = DataFrame({"str_col": ["a", "b"], "some_val": [1, 2]})
print(other)
print(pd.merge(df, other, on="str_col", how="inner"))
print(pd.merge(df, other, on="str_col", how="outer"))
print(pd.merge(df, other, on="str_col", how="left"))
print(pd.merge(df, other, on="str_col", how="right"))

### 9. Plot ###

plot_df = DataFrame(np.random.randn(1000, 2), columns=["x", "y"])
plot_df["y"] = plot_df["y"].map(lambda x: x + 1)

plot_df.plot()  ### plot not working???? ###

plot_df.hist()  ### plot not working???? ###

### 10. Scikit-learn conversion ###

print(df)

print(df.values[:, :-1])

# print(df.values[:,:-1].astype(float32)) not working?

input()
Example #22
0
'''

import pandas
from pandas import Series, DataFrame
import code
import numpy as np
import matplotlib.pyplot as plt
import sys
import csv

if __name__ == '__main__':
	with open("stats.csv" if len(sys.argv) < 2 else sys.argv[1]) as f:
		reader = csv.reader(f)
		data = [(float(peak), float(iqr)) for peak, iqr in reader]

	md = Series(zip(*data)[0])
	iqrd = Series(zip(*data)[1])

	df = DataFrame(data=dict(max_deltas=md, iqr_deltas=iqrd))

	#log_df = np.log(df) / np.log(2)
	#log_df.columns = ["lg {}".format(foo) for foo in log_df.columns]
	#log_df.hist(normed=True)

	df.hist(normed=True)

	plt.show()

	code.interact(local=locals())

print df
print "aƱadimos columnas combinando las actuales"
df["C"] = df["A"]+df["B"]
df["D"] = df["A"]*3
df["E"] = np.sqrt(df["A"])
print df
print "*"*15
print "Datos disponibles de un dataframe"
print " descripcion del dataframe"
print df.describe()
print " covarianza "
print df.cov()
print " correlaciĆ³n "
print df.corr()
print "*"*15

print " Creamos otro dataframe con valores aleatorios (1000 filas y 2 columnas "
print " DataFrame(np.random.randn(1000,2),columns=['x','y'])"
plot_df = DataFrame(np.random.randn(1000,2),columns=['x','y'])
print plot_df
print "Mostramos las graficas"
plot_df.plot()
plot_df.hist()







    
 def test_hist_bins_legacy(self):
     df = DataFrame(np.random.randn(10, 2))
     ax = df.hist(bins=2)[0][0]
     self.assertEqual(len(ax.patches), 2)
                df.loc[l[i], 'Growth'] = (df.loc[l[i-1], 'Growth'] + trade_value) * (1 + df.loc[l[i], 'IVWReturn'])
                df.loc[l[i], 'Value'] = (df.loc[l[i-1], 'Value'] - trade_value) * (1 + df.loc[l[i], 'IVEReturn'])
                df.loc[l[i], 'InvestmentTotal'] = df.loc[l[i], 'Value'] + df.loc[l[i], 'Growth'] 
                df.loc[l[i], 'Growth%'] = df.loc[l[i],'Growth']/df.loc[l[i],'InvestmentTotal']
                df.loc[l[i], 'Value%'] = df.loc[l[i],'Value']/df.loc[l[i], 'InvestmentTotal']
                df.loc[l[i], 'Total%'] = df.loc[l[i], 'Growth%'] + df.loc[l[i], 'Value%']

    final_value = df.loc[l[len(l)-1], 'InvestmentTotal'] - df.loc[l[len(l)-1],'SP500Total']
    if math.isnan(final_value) == True:
        print("result removed it was nan")
        test = test + 1 
    else:
        results.append(final_value)
    print(results)
    test = test - 1 

dg = pd.Series(results, name = 'Results')
dff = DataFrame(dg)
print(dff) 
dff.hist()
plt.show() 
file = ExcelWriter('ValueGrowth.xlsx')
df.to_excel(file, 'Data')
file.close()
os.startfile('ValueGrowth.xlsx')

df.plot(y = ['SP500Total', 'InvestmentTotal'])
plt.show()   


# In[12]:

Y=DataFrame(Y)


# In[13]:

Y.head(1)


### Univariate analysis

# In[14]:

X.hist()


##### These histograms depicts the distribution of the 4 independent variables

##### We can do this analysis using just 1 variable also 

# In[15]:

X[0].hist()


##### we can also get the stats of that variable

# In[16]:
Example #27
0
print(len(list(
            dbclient.db_restT.user_tweets.aggregate(
                [{'$group': {'_id': '$user'}}]))), "users with tweets")


# And finally we plot the lexical diversity:

# In[38]:

cursor=dbclient.db_restT.diversity.aggregate([
    {'$project': {
            '_id': 0,
            'name': '$name',
            'lex_div': '$lexical_diversity'}}])
lex_div=DataFrame(list(cursor))
lex_div.hist('lex_div', bins=50)


# 2.3: Track unfollows
# --
# Write a python program to create a db called db_followers that stores all the followers for all the users that you find in task 2.1. Then, write a program to find the un-followed friends after a week for the top 10 users( users that have the highest number of followers in task 2.1) since the time that you extracted the tweets. In other words, you need to look for the people following the top 10 users at time X (the time that you extracted the tweets) and then look at the people following the same top 10 users at a later time Y (one-week after X) to see who stopped following the top 10 users.

# First, make a db/table of top RT'ed users in which to store follower stats

# In[39]:

dbclient.drop_database('db_followers')
rows=[]
for row in dbclient.db_tweets.top_retweets.aggregate([
        {'$group': {
                '_id': '$user.id',