Ejemplo n.º 1
0
    def test_panel_concat_other_axes(self):
        panel = tm.makePanel()

        p1 = panel.ix[:, :5, :]
        p2 = panel.ix[:, 5:, :]

        result = concat([p1, p2], axis=1)
        tm.assert_panel_equal(result, panel)

        p1 = panel.ix[:, :, :2]
        p2 = panel.ix[:, :, 2:]

        result = concat([p1, p2], axis=2)
        tm.assert_panel_equal(result, panel)

        # if things are a bit misbehaved
        p1 = panel.ix[:2, :, :2]
        p2 = panel.ix[:, :, 2:]
        p1['ItemC'] = 'baz'

        result = concat([p1, p2], axis=2)

        expected = panel.copy()
        expected['ItemC'] = expected['ItemC'].astype('O')
        expected.ix['ItemC', :, :2] = 'baz'
        tm.assert_panel_equal(result, expected)
Ejemplo n.º 2
0
    def test_concat_keys_and_levels(self):
        df = DataFrame(np.random.randn(1, 3))
        df2 = DataFrame(np.random.randn(1, 4))

        levels = [['foo', 'baz'], ['one', 'two']]
        names = ['first', 'second']
        result = concat([df, df2, df, df2],
                        keys=[('foo', 'one'), ('foo', 'two'), ('baz', 'one'),
                              ('baz', 'two')],
                        levels=levels,
                        names=names)
        expected = concat([df, df2, df, df2])
        exp_index = MultiIndex(levels=levels + [[0]],
                               labels=[[0, 0, 1, 1], [0, 1, 0, 1],
                                       [0, 0, 0, 0]],
                               names=names + [None])
        expected.index = exp_index

        assert_frame_equal(result, expected)

        # no names

        result = concat([df, df2, df, df2],
                        keys=[('foo', 'one'), ('foo', 'two'), ('baz', 'one'),
                              ('baz', 'two')],
                        levels=levels)
        self.assertEqual(result.index.names, [None] * 3)

        # no levels
        result = concat([df, df2, df, df2],
                        keys=[('foo', 'one'), ('foo', 'two'), ('baz', 'one'),
                              ('baz', 'two')],
                        names=['first', 'second'])
        self.assertEqual(result.index.names, ['first', 'second'] + [None])
        self.assert_(np.array_equal(result.index.levels[0], ['baz', 'foo']))
Ejemplo n.º 3
0
    def test_concat_with_group_keys(self):
        df = DataFrame(np.random.randn(4, 3))
        df2 = DataFrame(np.random.randn(4, 4))

        # axis=0
        df = DataFrame(np.random.randn(3, 4))
        df2 = DataFrame(np.random.randn(4, 4))

        result = concat([df, df2], keys=[0, 1])
        exp_index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1, 1],
                                            [0, 1, 2, 0, 1, 2, 3]])
        expected = DataFrame(np.r_[df.values, df2.values], index=exp_index)
        tm.assert_frame_equal(result, expected)

        result = concat([df, df], keys=[0, 1])
        exp_index2 = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1],
                                             [0, 1, 2, 0, 1, 2]])
        expected = DataFrame(np.r_[df.values, df.values], index=exp_index2)
        tm.assert_frame_equal(result, expected)

        # axis=1
        df = DataFrame(np.random.randn(4, 3))
        df2 = DataFrame(np.random.randn(4, 4))

        result = concat([df, df2], keys=[0, 1], axis=1)
        expected = DataFrame(np.c_[df.values, df2.values], columns=exp_index)
        tm.assert_frame_equal(result, expected)

        result = concat([df, df], keys=[0, 1], axis=1)
        expected = DataFrame(np.c_[df.values, df.values], columns=exp_index2)
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 4
0
    def test_panel_concat_other_axes(self):
        panel = tm.makePanel()

        p1 = panel.ix[:, :5, :]
        p2 = panel.ix[:, 5:, :]

        result = concat([p1, p2], axis=1)
        tm.assert_panel_equal(result, panel)

        p1 = panel.ix[:, :, :2]
        p2 = panel.ix[:, :, 2:]

        result = concat([p1, p2], axis=2)
        tm.assert_panel_equal(result, panel)

        # if things are a bit misbehaved
        p1 = panel.ix[:2, :, :2]
        p2 = panel.ix[:, :, 2:]
        p1['ItemC'] = 'baz'

        result = concat([p1, p2], axis=2)

        expected = panel.copy()
        expected['ItemC'] = expected['ItemC'].astype('O')
        expected.ix['ItemC', :, :2] = 'baz'
        tm.assert_panel_equal(result, expected)
Ejemplo n.º 5
0
    def test_concat_keys_and_levels(self):
        df = DataFrame(np.random.randn(1, 3))
        df2 = DataFrame(np.random.randn(1, 4))

        levels = [["foo", "baz"], ["one", "two"]]
        names = ["first", "second"]
        result = concat(
            [df, df2, df, df2],
            keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
            levels=levels,
            names=names,
        )
        expected = concat([df, df2, df, df2])
        exp_index = MultiIndex(
            levels=levels + [[0]], labels=[[0, 0, 1, 1], [0, 1, 0, 1], [0, 0, 0, 0]], names=names + [None]
        )
        expected.index = exp_index

        assert_frame_equal(result, expected)

        # no names

        result = concat(
            [df, df2, df, df2], keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")], levels=levels
        )
        self.assertEqual(result.index.names, [None] * 3)

        # no levels
        result = concat(
            [df, df2, df, df2],
            keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
            names=["first", "second"],
        )
        self.assertEqual(result.index.names, ["first", "second"] + [None])
        self.assert_(np.array_equal(result.index.levels[0], ["baz", "foo"]))
Ejemplo n.º 6
0
    def test_join_dups(self):

        # joining dups
        df = concat([
            DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B']),
            DataFrame(np.random.randint(0, 10, size=20).reshape(10, 2),
                      columns=['A', 'C'])
        ],
                    axis=1)

        expected = concat([df, df], axis=1)
        result = df.join(df, rsuffix='_2')
        result.columns = expected.columns
        assert_frame_equal(result, expected)

        # GH 4975, invalid join on dups
        w = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
        x = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
        y = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
        z = DataFrame(np.random.randn(4, 2), columns=["x", "y"])

        dta = x.merge(y, left_index=True,
                      right_index=True).merge(z,
                                              left_index=True,
                                              right_index=True,
                                              how="outer")
        dta = dta.merge(w, left_index=True, right_index=True)
        expected = concat([x, y, z, w], axis=1)
        expected.columns = [
            'x_x', 'y_x', 'x_y', 'y_y', 'x_x', 'y_x', 'x_y', 'y_y'
        ]
        assert_frame_equal(dta, expected)
Ejemplo n.º 7
0
    def test_concat_keys_and_levels(self):
        df = DataFrame(np.random.randn(1, 3))
        df2 = DataFrame(np.random.randn(1, 4))

        levels = [['foo', 'baz'], ['one', 'two']]
        names = ['first', 'second']
        result = concat([df, df2, df, df2],
                        keys=[('foo', 'one'), ('foo', 'two'),
                              ('baz', 'one'), ('baz', 'two')],
                        levels=levels,
                        names=names)
        expected = concat([df, df2, df, df2])
        exp_index = MultiIndex(levels=levels + [[0]],
                               labels=[[0, 0, 1, 1], [0, 1, 0, 1],
                                       [0, 0, 0, 0]],
                               names=names + [None])
        expected.index = exp_index

        assert_frame_equal(result, expected)

        # no names

        result = concat([df, df2, df, df2],
                        keys=[('foo', 'one'), ('foo', 'two'),
                              ('baz', 'one'), ('baz', 'two')],
                        levels=levels)
        self.assertEqual(result.index.names, [None] * 3)

        # no levels
        result = concat([df, df2, df, df2],
                        keys=[('foo', 'one'), ('foo', 'two'),
                              ('baz', 'one'), ('baz', 'two')],
                        names=['first', 'second'])
        self.assertEqual(result.index.names, ['first', 'second'] + [None])
        self.assert_(np.array_equal(result.index.levels[0], ['baz', 'foo']))
Ejemplo n.º 8
0
    def test_join_dups(self):

        # joining dups
        df = concat([DataFrame(np.random.randn(10, 4),
                               columns=['A', 'A', 'B', 'B']),
                     DataFrame(np.random.randint(0, 10, size=20)
                               .reshape(10, 2),
                               columns=['A', 'C'])],
                    axis=1)

        expected = concat([df, df], axis=1)
        result = df.join(df, rsuffix='_2')
        result.columns = expected.columns
        assert_frame_equal(result, expected)

        # GH 4975, invalid join on dups
        w = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
        x = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
        y = DataFrame(np.random.randn(4, 2), columns=["x", "y"])
        z = DataFrame(np.random.randn(4, 2), columns=["x", "y"])

        dta = x.merge(y, left_index=True, right_index=True).merge(
            z, left_index=True, right_index=True, how="outer")
        dta = dta.merge(w, left_index=True, right_index=True)
        expected = concat([x, y, z, w], axis=1)
        expected.columns = ['x_x', 'y_x', 'x_y',
                            'y_y', 'x_x', 'y_x', 'x_y', 'y_y']
        assert_frame_equal(dta, expected)
Ejemplo n.º 9
0
    def test_concat_with_group_keys(self):
        df = DataFrame(np.random.randn(4, 3))
        df2 = DataFrame(np.random.randn(4, 4))

        # axis=0
        df = DataFrame(np.random.randn(3, 4))
        df2 = DataFrame(np.random.randn(4, 4))

        result = concat([df, df2], keys=[0, 1])
        exp_index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1, 1],
                                            [0, 1, 2, 0, 1, 2, 3]])
        expected = DataFrame(np.r_[df.values, df2.values],
                             index=exp_index)
        tm.assert_frame_equal(result, expected)

        result = concat([df, df], keys=[0, 1])
        exp_index2 = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1],
                                            [0, 1, 2, 0, 1, 2]])
        expected = DataFrame(np.r_[df.values, df.values],
                             index=exp_index2)
        tm.assert_frame_equal(result, expected)

        # axis=1
        df = DataFrame(np.random.randn(4, 3))
        df2 = DataFrame(np.random.randn(4, 4))

        result = concat([df, df2], keys=[0, 1], axis=1)
        expected = DataFrame(np.c_[df.values, df2.values],
                             columns=exp_index)
        tm.assert_frame_equal(result, expected)

        result = concat([df, df], keys=[0, 1], axis=1)
        expected = DataFrame(np.c_[df.values, df.values],
                             columns=exp_index2)
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 10
0
    def test_concat_series_axis1(self):
        ts = tm.makeTimeSeries()

        pieces = [ts[:-2], ts[2:], ts[2:-2]]

        result = concat(pieces, axis=1)
        expected = DataFrame(pieces).T
        assert_frame_equal(result, expected)

        result = concat(pieces, keys=['A', 'B', 'C'], axis=1)
        expected = DataFrame(pieces, index=['A', 'B', 'C']).T
        assert_frame_equal(result, expected)
Ejemplo n.º 11
0
    def test_concat_series_axis1(self):
        ts = tm.makeTimeSeries()

        pieces = [ts[:-2], ts[2:], ts[2:-2]]

        result = concat(pieces, axis=1)
        expected = DataFrame(pieces).T
        assert_frame_equal(result, expected)

        result = concat(pieces, keys=['A', 'B', 'C'], axis=1)
        expected = DataFrame(pieces, index=['A', 'B', 'C']).T
        assert_frame_equal(result, expected)
Ejemplo n.º 12
0
def extend_contour_features(contour_data, extra_features):
    '''
        extend the SALOMON features with added extra features. 
        The extra features are inserted after SALOMON features and before other contour data from first_time (e.g. contour bin data)  
        Parameters
        ---------
         extra_features - DataFr with features
        '''
    if extra_features is not None:
        dfFeatures = concat([contour_data.ix[:, 0:12], extra_features], axis=1)
        startIdx = contour_data.columns.get_loc('first_time')
        contour_data = concat([dfFeatures, contour_data.ix[:, startIdx:]],
                              axis=1)
    return contour_data
Ejemplo n.º 13
0
    def _wrap_frames(self, keys, values, not_indexed_same=False):
        from pandas.tools.merge import concat

        if not_indexed_same:
            group_keys = keys
            group_levels = [ping.group_index for ping in self.groupings]
            group_names = [ping.name for ping in self.groupings]
            result = concat(values, axis=self.axis, keys=group_keys, levels=group_levels, names=group_names)
        else:
            result = concat(values, axis=self.axis)
            ax = self.obj._get_axis(self.axis)
            result = result.reindex_axis(ax, axis=self.axis)

        return result
Ejemplo n.º 14
0
    def _wrap_frames(self, keys, values, not_indexed_same=False):
        from pandas.tools.merge import concat, _concat_frames_hierarchical

        if not_indexed_same:
            group_keys = keys
            group_levels = [ping.group_index for ping in self.groupings]
            group_names = [ping.name for ping in self.groupings]
            result = concat(values, axis=self.axis, keys=group_keys,
                            levels=group_levels, names=group_names)
        else:
            result = concat(values, axis=self.axis)
            ax = self.obj._get_axis(self.axis)
            result = result.reindex_axis(ax, axis=self.axis)

        return result
Ejemplo n.º 15
0
    def get_pandas_df(self, bql, parameters=None, dialect='legacy'):
        """
        Returns a Pandas DataFrame for the results produced by a BigQuery
        query. The DbApiHook method must be overridden because Pandas
        doesn't support PEP 249 connections, except for SQLite. See:

        https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447
        https://github.com/pydata/pandas/issues/6900

        :param bql: The BigQuery SQL to execute.
        :type bql: string
        :param parameters: The parameters to render the SQL query with (not
            used, leave to override superclass method)
        :type parameters: mapping or iterable
        :param dialect: Dialect of BigQuery SQL – legacy SQL or standard SQL
        :type dialect: string in {'legacy', 'standard'}, default 'legacy'
        """
        service = self.get_service()
        project = self._get_field('project')
        connector = BigQueryPandasConnector(project, service, dialect=dialect)
        schema, pages = connector.run_query(bql)
        dataframe_list = []

        while len(pages) > 0:
            page = pages.pop()
            dataframe_list.append(gbq_parse_data(schema, page))

        if len(dataframe_list) > 0:
            return concat(dataframe_list, ignore_index=True)
        else:
            return gbq_parse_data(schema, [])
Ejemplo n.º 16
0
    def get_pandas_df(self, bql, parameters=None):
        """
        Returns a Pandas DataFrame for the results produced by a BigQuery
        query. The DbApiHook method must be overridden because Pandas
        doesn't support PEP 249 connections, except for SQLite. See:

        https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447
        https://github.com/pydata/pandas/issues/6900

        :param bql: The BigQuery SQL to execute.
        :type bql: string
        """
        service = self.get_service()
        connection_extras = self._extras_dejson()
        project = connection_extras['project']
        connector = BigQueryPandasConnector(project, service)
        schema, pages = connector.run_query(bql, verbose=False)
        dataframe_list = []

        while len(pages) > 0:
            page = pages.pop()
            dataframe_list.append(gbq_parse_data(schema, page))

        if len(dataframe_list) > 0:
            return concat(dataframe_list, ignore_index=True)
        else:
            return gbq_parse_data(schema, [])
Ejemplo n.º 17
0
    def get_pandas_df(self, bql, parameters=None, dialect='legacy'):
        """
        Returns a Pandas DataFrame for the results produced by a BigQuery
        query. The DbApiHook method must be overridden because Pandas
        doesn't support PEP 249 connections, except for SQLite. See:

        https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447
        https://github.com/pydata/pandas/issues/6900

        :param bql: The BigQuery SQL to execute.
        :type bql: string
        :param parameters: The parameters to render the SQL query with (not used, leave to override superclass method)
        :type parameters: mapping or iterable
        :param dialect: Dialect of BigQuery SQL – legacy SQL or standard SQL
        :type dialect: string in {'legacy', 'standard'}, default 'legacy'
        """
        service = self.get_service()
        project = self._get_field('project')
        connector = BigQueryPandasConnector(project, service, dialect=dialect)
        schema, pages = connector.run_query(bql)
        dataframe_list = []

        while len(pages) > 0:
            page = pages.pop()
            dataframe_list.append(gbq_parse_data(schema, page))

        if len(dataframe_list) > 0:
            return concat(dataframe_list, ignore_index=True)
        else:
            return gbq_parse_data(schema, [])
Ejemplo n.º 18
0
    def test_concat_exclude_none(self):
        df = DataFrame(np.random.randn(10, 4))

        pieces = [df[:5], None, None, df[5:]]
        result = concat(pieces)
        tm.assert_frame_equal(result, df)
        self.assertRaises(Exception, concat, [None, None])
Ejemplo n.º 19
0
    def get_pandas_df(self, bql, parameters=None):
        """
        Returns a Pandas DataFrame for the results produced by a BigQuery
        query. The DbApiHook method must be overridden because Pandas
        doesn't support PEP 249 connections, except for SQLite. See:

        https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447
        https://github.com/pydata/pandas/issues/6900

        :param bql: The BigQuery SQL to execute.
        :type bql: string
        """
        service = self.get_service()
        project = self._get_field('project')
        connector = BigQueryPandasConnector(project, service)
        schema, pages = connector.run_query(bql)
        dataframe_list = []

        while len(pages) > 0:
            page = pages.pop()
            dataframe_list.append(gbq_parse_data(schema, page))

        if len(dataframe_list) > 0:
            return concat(dataframe_list, ignore_index=True)
        else:
            return gbq_parse_data(schema, [])
Ejemplo n.º 20
0
 def get_series(self, data, column=None, column_label=None):
     '''
     Get Quandl series
     ------
     column: list
         the list of columns to get from Quandl
     ------
     column_label: list
         the corresponding labels for each columns retrieved from Quandl
     ------
     '''
     if(len(column) != 0 and len(column)==len(column_label)):
         all_data = []
         for i, item in enumerate(column):
             URL = "%sdatasets/%s.json?column=%d&auth_token=otf6VxzVxjm5ZGLztqbG" % (self.Root, data, item)
             try:
                 response = urlopen(URL)
                 results = json.loads(response.read())
                 points = {}
                 for point in results['data']:
                     date = to_datetime(point[0], format='%Y-%m-%d')
                     if hasattr(date, 'to_datetime'):
                         date = date.to_datetime()
                         points[date]=point[1]
                 data_label = column_label[i]
                 points = DataFrame({data_label:Series(points)})
                 all_data.append(points)
             except HTTPError as exc:
                 print exc.read()
                 message = json.loads(exc.read())
                 raise ValueError("For %s, %s" % (data, message['error']))
         
     points = concat(all_data, axis=1, join='outer')
     return points
Ejemplo n.º 21
0
    def concat(self, frame, axis=0):
        if self._pandas:
            from pandas.tools.merge import concat

            return concat((self, frame), axis=axis)
        else:
            if axis == 0:
                if self._columns != frame._columns:
                    raise ValueError(
                        'Cannot concat two frame of different columns')

                return ResultFrame(self._values + frame._values,
                                   columns=self._columns,
                                   index=self._index + frame._index,
                                   pandas=self._pandas)
            else:
                if self._index != frame._index:
                    raise ValueError(
                        'Cannot concat two frames of different indexes')

                values = [
                    val + other
                    for val, other in zip(self._values, frame._values)
                ]
                return ResultFrame(values,
                                   self._columns + frame._columns,
                                   index=self._index,
                                   pandas=self._pandas)
Ejemplo n.º 22
0
    def test_concat_dataframe_keys_bug(self):
        t1 = DataFrame({"value": Series([1, 2, 3], index=Index(["a", "b", "c"], name="id"))})
        t2 = DataFrame({"value": Series([7, 8], index=Index(["a", "b"], name="id"))})

        # it works
        result = concat([t1, t2], axis=1, keys=["t1", "t2"])
        self.assertEqual(list(result.columns), [("t1", "value"), ("t2", "value")])
Ejemplo n.º 23
0
    def test_concat_exclude_none(self):
        df = DataFrame(np.random.randn(10, 4))

        pieces = [df[:5], None, None, df[5:]]
        result = concat(pieces)
        tm.assert_frame_equal(result, df)
        self.assertRaises(Exception, concat, [None, None])
Ejemplo n.º 24
0
def _generate_marginal_results(table, data, values, rows, cols, aggfunc,
                               grand_margin,
                               margins_name='All'):
    if len(cols) > 0:
        # need to "interleave" the margins
        table_pieces = []
        margin_keys = []

        def _all_key(key):
            return (key, margins_name) + ('',) * (len(cols) - 1)

        if len(rows) > 0:
            margin = data[rows + values].groupby(rows).agg(aggfunc)
            cat_axis = 1

            for key, piece in table.groupby(level=0, axis=cat_axis):
                all_key = _all_key(key)

                # we are going to mutate this, so need to copy!
                piece = piece.copy()
                try:
                    piece[all_key] = margin[key]
                except TypeError:

                    # we cannot reshape, so coerce the axis
                    piece.set_axis(cat_axis, piece._get_axis(
                        cat_axis)._to_safe_for_reshape())
                    piece[all_key] = margin[key]

                table_pieces.append(piece)
                margin_keys.append(all_key)
        else:
            margin = grand_margin
            cat_axis = 0
            for key, piece in table.groupby(level=0, axis=cat_axis):
                all_key = _all_key(key)
                table_pieces.append(piece)
                table_pieces.append(Series(margin[key], index=[all_key]))
                margin_keys.append(all_key)

        result = concat(table_pieces, axis=cat_axis)

        if len(rows) == 0:
            return result
    else:
        result = table
        margin_keys = table.columns

    if len(cols) > 0:
        row_margin = data[cols + values].groupby(cols).agg(aggfunc)
        row_margin = row_margin.stack()

        # slight hack
        new_order = [len(cols)] + lrange(len(cols))
        row_margin.index = row_margin.index.reorder_levels(new_order)
    else:
        row_margin = Series(np.nan, index=result.columns)

    return result, margin_keys, row_margin
Ejemplo n.º 25
0
def _generate_marginal_results(table, data, values, rows, cols, aggfunc,
                               grand_margin,
                               margins_name='All'):
    if len(cols) > 0:
        # need to "interleave" the margins
        table_pieces = []
        margin_keys = []

        def _all_key(key):
            return (key, margins_name) + ('',) * (len(cols) - 1)

        if len(rows) > 0:
            margin = data[rows + values].groupby(rows).agg(aggfunc)
            cat_axis = 1

            for key, piece in table.groupby(level=0, axis=cat_axis):
                all_key = _all_key(key)

                # we are going to mutate this, so need to copy!
                piece = piece.copy()
                try:
                    piece[all_key] = margin[key]
                except TypeError:

                    # we cannot reshape, so coerce the axis
                    piece.set_axis(cat_axis, piece._get_axis(
                        cat_axis)._to_safe_for_reshape())
                    piece[all_key] = margin[key]

                table_pieces.append(piece)
                margin_keys.append(all_key)
        else:
            margin = grand_margin
            cat_axis = 0
            for key, piece in table.groupby(level=0, axis=cat_axis):
                all_key = _all_key(key)
                table_pieces.append(piece)
                table_pieces.append(Series(margin[key], index=[all_key]))
                margin_keys.append(all_key)

        result = concat(table_pieces, axis=cat_axis)

        if len(rows) == 0:
            return result
    else:
        result = table
        margin_keys = table.columns

    if len(cols) > 0:
        row_margin = data[cols + values].groupby(cols).agg(aggfunc)
        row_margin = row_margin.stack()

        # slight hack
        new_order = [len(cols)] + lrange(len(cols))
        row_margin.index = row_margin.index.reorder_levels(new_order)
    else:
        row_margin = Series(np.nan, index=result.columns)

    return result, margin_keys, row_margin
Ejemplo n.º 26
0
    def test_pivot_multi_functions(self):
        f = lambda func: pivot_table(self.data, values=["D", "E"], index=["A", "B"], columns="C", aggfunc=func)
        result = f([np.mean, np.std])
        means = f(np.mean)
        stds = f(np.std)
        expected = concat([means, stds], keys=["mean", "std"], axis=1)
        tm.assert_frame_equal(result, expected)

        # margins not supported??
        f = lambda func: pivot_table(
            self.data, values=["D", "E"], index=["A", "B"], columns="C", aggfunc=func, margins=True
        )
        result = f([np.mean, np.std])
        means = f(np.mean)
        stds = f(np.std)
        expected = concat([means, stds], keys=["mean", "std"], axis=1)
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 27
0
    def test_concat_keys_specific_levels(self):
        df = DataFrame(np.random.randn(10, 4))
        pieces = [df.ix[:, [0, 1]], df.ix[:, [2]], df.ix[:, [3]]]
        level = ["three", "two", "one", "zero"]
        result = concat(pieces, axis=1, keys=["one", "two", "three"], levels=[level], names=["group_key"])

        self.assert_(np.array_equal(result.columns.levels[0], level))
        self.assertEqual(result.columns.names[0], "group_key")
Ejemplo n.º 28
0
    def _wrap_frames(self, keys, values, not_indexed_same=False):
        from pandas.tools.merge import concat

        if not_indexed_same:
            group_keys = keys
            group_levels = self.grouper.levels
            group_names = self.grouper.names
            result = concat(values,
                            axis=self.axis,
                            keys=group_keys,
                            levels=group_levels,
                            names=group_names)
        else:
            result = concat(values, axis=self.axis)
            ax = self.obj._get_axis(self.axis)
            result = result.reindex_axis(ax, axis=self.axis)

        return result
Ejemplo n.º 29
0
    def test_pivot_multi_functions(self):
        f = lambda func: pivot_table(self.data, values=['D', 'E'],
                                     rows=['A', 'B'], cols='C',
                                     aggfunc=func)
        result = f([np.mean, np.std])
        means = f(np.mean)
        stds = f(np.std)
        expected = concat([means, stds], keys=['mean', 'std'], axis=1)
        tm.assert_frame_equal(result, expected)

        # margins not supported??
        f = lambda func: pivot_table(self.data, values=['D', 'E'],
                                     rows=['A', 'B'], cols='C',
                                     aggfunc=func, margins=True)
        result = f([np.mean, np.std])
        means = f(np.mean)
        stds = f(np.std)
        expected = concat([means, stds], keys=['mean', 'std'], axis=1)
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 30
0
    def test_concat_series(self):
        ts = tm.makeTimeSeries()
        ts.name = 'foo'

        pieces = [ts[:5], ts[5:15], ts[15:]]

        result = concat(pieces)
        tm.assert_series_equal(result, ts)
        self.assertEqual(result.name, ts.name)

        result = concat(pieces, keys=[0, 1, 2])
        expected = ts.copy()

        exp_labels = [np.repeat([0, 1, 2], [len(x) for x in pieces]),
                      np.arange(len(ts))]
        exp_index = MultiIndex(levels=[[0, 1, 2], ts.index],
                               labels=exp_labels)
        expected.index = exp_index
        tm.assert_series_equal(result, expected)
Ejemplo n.º 31
0
    def test_concat_keys_specific_levels(self):
        df = DataFrame(np.random.randn(10, 4))
        pieces = [df.ix[:, [0, 1]], df.ix[:, [2]], df.ix[:, [3]]]
        level = ['three', 'two', 'one', 'zero']
        result = concat(pieces, axis=1, keys=['one', 'two', 'three'],
                        levels=[level],
                        names=['group_key'])

        self.assert_(np.array_equal(result.columns.levels[0], level))
        self.assertEqual(result.columns.names[0], 'group_key')
Ejemplo n.º 32
0
    def test_concat_dataframe_keys_bug(self):
        t1 = DataFrame({'value': Series([1,2,3],
                       index=Index(['a', 'b', 'c'], name='id'))})
        t2 = DataFrame({'value': Series([7, 8],
                       index=Index(['a', 'b'], name = 'id'))})

        # it works
        result = concat([t1, t2], axis=1, keys=['t1', 't2'])
        self.assertEqual(list(result.columns), [('t1', 'value'),
                                                ('t2', 'value')])
Ejemplo n.º 33
0
    def test_concat_series(self):
        ts = tm.makeTimeSeries()
        ts.name = "foo"

        pieces = [ts[:5], ts[5:15], ts[15:]]

        result = concat(pieces)
        tm.assert_series_equal(result, ts)
        self.assertEqual(result.name, ts.name)

        result = concat(pieces, keys=[0, 1, 2])
        expected = ts.copy()

        ts.index = DatetimeIndex(np.array(ts.index.values, dtype="M8[us]"))

        exp_labels = [np.repeat([0, 1, 2], [len(x) for x in pieces]), np.arange(len(ts))]
        exp_index = MultiIndex(levels=[[0, 1, 2], ts.index], labels=exp_labels)
        expected.index = exp_index
        tm.assert_series_equal(result, expected)
Ejemplo n.º 34
0
    def test_concat_dataframe_keys_bug(self):
        t1 = DataFrame({'value': Series([1,2,3],
                       index=Index(['a', 'b', 'c'], name='id'))})
        t2 = DataFrame({'value': Series([7, 8],
                       index=Index(['a', 'b'], name = 'id'))})

        # it works
        result = concat([t1, t2], axis=1, keys=['t1', 't2'])
        self.assertEqual(list(result.columns), [('t1', 'value'),
                                                ('t2', 'value')])
Ejemplo n.º 35
0
    def test_concat_keys_specific_levels(self):
        df = DataFrame(np.random.randn(10, 4))
        pieces = [df.ix[:, [0, 1]], df.ix[:, [2]], df.ix[:, [3]]]
        level = ['three', 'two', 'one', 'zero']
        result = concat(pieces, axis=1, keys=['one', 'two', 'three'],
                        levels=[level],
                        names=['group_key'])

        self.assert_(np.array_equal(result.columns.levels[0], level))
        self.assertEqual(result.columns.names[0], 'group_key')
Ejemplo n.º 36
0
    def test_pivot_multi_functions(self):
        f = lambda func: pivot_table(self.data, values=['D', 'E'],
                                     index=['A', 'B'], columns='C',
                                     aggfunc=func)
        result = f([np.mean, np.std])
        means = f(np.mean)
        stds = f(np.std)
        expected = concat([means, stds], keys=['mean', 'std'], axis=1)
        tm.assert_frame_equal(result, expected)

        # margins not supported??
        f = lambda func: pivot_table(self.data, values=['D', 'E'],
                                     index=['A', 'B'], columns='C',
                                     aggfunc=func, margins=True)
        result = f([np.mean, np.std])
        means = f(np.mean)
        stds = f(np.std)
        expected = concat([means, stds], keys=['mean', 'std'], axis=1)
        tm.assert_frame_equal(result, expected)
Ejemplo n.º 37
0
    def _wrap_frames(self, keys, values, not_indexed_same=False):
        from pandas.tools.merge import concat, _concat_frames_hierarchical

        if not_indexed_same:
            result = _concat_frames_hierarchical(values, keys,
                                                 self.groupings,
                                                 axis=self.axis)
        else:
            result = concat(values, axis=0).reindex(self.obj.index)

        return result
Ejemplo n.º 38
0
    def test_concat_series(self):
        ts = tm.makeTimeSeries()
        ts.name = 'foo'

        pieces = [ts[:5], ts[5:15], ts[15:]]

        result = concat(pieces)
        tm.assert_series_equal(result, ts)
        self.assertEqual(result.name, ts.name)

        result = concat(pieces, keys=[0, 1, 2])
        expected = ts.copy()

        exp_labels = [np.repeat([0, 1, 2], [len(x) for x in pieces]),
                      np.arange(len(ts))]
        exp_index = MultiIndex(levels=[[0, 1, 2], ts.index],
                               labels=exp_labels)
        expected.index = exp_index
        tm.assert_series_equal(result, expected)

        self.assertRaises(Exception, concat, pieces, axis=1)
Ejemplo n.º 39
0
    def _wrap_frames(self, keys, values, not_indexed_same=False):
        from pandas.tools.merge import concat, _concat_frames_hierarchical

        if not_indexed_same:
            result = _concat_frames_hierarchical(values,
                                                 keys,
                                                 self.groupings,
                                                 axis=self.axis)
        else:
            result = concat(values, axis=0).reindex(self.obj.index)

        return result
Ejemplo n.º 40
0
    def transform(self, func, *args, **kwargs):
        """
        Call function producing a like-indexed DataFrame on each group and
        return a DataFrame having the same indexes as the original object
        filled with the transformed values

        Parameters
        ----------
        f : function
            Function to apply to each subframe

        Note
        ----
        Each subframe is endowed the attribute 'name' in case you need to know
        which group you are working on.

        Example
        --------
        >>> grouped = df.groupby(lambda x: mapping[x])
        >>> grouped.transform(lambda x: (x - x.mean()) / x.std())
        """
        from pandas.tools.merge import concat

        applied = []

        obj = self._obj_with_exclusions
        for name, group in self:
            group.name = name

            try:
                wrapper = lambda x: func(x, *args, **kwargs)
                res = group.apply(wrapper, axis=self.axis)
            except Exception:  # pragma: no cover
                res = func(group, *args, **kwargs)

            # broadcasting
            if isinstance(res, Series):
                if res.index is obj.index:
                    group.T.values[:] = res
                else:
                    group.values[:] = res

                applied.append(group)
            else:
                applied.append(res)

        concat_index = obj.columns if self.axis == 0 else obj.index
        concatenated = concat(applied,
                              join_axes=[concat_index],
                              axis=self.axis,
                              verify_integrity=False)
        return concatenated.reindex_like(obj)
Ejemplo n.º 41
0
    def join(self, other, how='left', lsuffix='', rsuffix=''):
        """
        Join items with other Panel either on major and minor axes column

        Parameters
        ----------
        other : Panel or list of Panels
            Index should be similar to one of the columns in this one
        how : {'left', 'right', 'outer', 'inner'}
            How to handle indexes of the two objects. Default: 'left'
            for joining on index, None otherwise
            * left: use calling frame's index
            * right: use input frame's index
            * outer: form union of indexes
            * inner: use intersection of indexes
        lsuffix : string
            Suffix to use from left frame's overlapping columns
        rsuffix : string
            Suffix to use from right frame's overlapping columns

        Returns
        -------
        joined : Panel
        """
        from pandas.tools.merge import concat

        if isinstance(other, Panel):
            join_major, join_minor = self._get_join_index(other, how)
            this = self.reindex(major=join_major, minor=join_minor)
            other = other.reindex(major=join_major, minor=join_minor)
            merged_data = this._data.merge(other._data, lsuffix, rsuffix)
            return self._constructor(merged_data)
        else:
            if lsuffix or rsuffix:
                raise ValueError(
                    'Suffixes not supported when passing multiple '
                    'panels')

            if how == 'left':
                how = 'outer'
                join_axes = [self.major_axis, self.minor_axis]
            elif how == 'right':
                raise ValueError('Right join not supported with multiple '
                                 'panels')
            else:
                join_axes = None

            return concat([self] + list(other),
                          axis=0,
                          join=how,
                          join_axes=join_axes,
                          verify_integrity=True)
Ejemplo n.º 42
0
Archivo: ma.py Proyecto: martin1/thesis
 def forecast(self, forecast_start_str, forecast_period_in_days, periods_of_data_to_use):
     '''Perform the forecast and return forecast as pandas Series object'''
     #create forecast index
     forecast_index = date_range(forecast_start_str, periods=forecast_period_in_days)
     #Extract only that data which is necessary to make the first moving average calculation
     data_series = self.training_ts.tail(periods_of_data_to_use)
     forecast = Series()
     for time in forecast_index:
         #forecasted value is last value in rolling_mean list - all others are NaN because of forecast window length
         if self.forecast_method == 'ma':
             #Forecast using the simple moving average
             forecast_value = rolling_mean(data_series, periods_of_data_to_use).loc[-1]
         elif self.forecast_method == 'ewma':
             #forecast using the exponentially weighted moving average
             forecast_value = ewma(data_series, span=periods_of_data_to_use).loc[-1]
         #print forecast_value
         #remove 1-st value from data because its not needed for next forecasted value
         data_series = data_series[1:]
         #Append forecasted value to data because forecast is data for next iteration MA
         data_series = concat([data_series, Series(forecast_value, index=[time])])
         forecast = concat([forecast, Series(forecast_value, index=[time])])
     return forecast
Ejemplo n.º 43
0
    def test_concat_multiindex_with_keys(self):
        index = MultiIndex(
            levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
            labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
            names=["first", "second"],
        )
        frame = DataFrame(np.random.randn(10, 3), index=index, columns=Index(["A", "B", "C"], name="exp"))
        result = concat([frame, frame], keys=[0, 1], names=["iteration"])

        self.assertEqual(result.index.names, ["iteration"] + index.names)
        tm.assert_frame_equal(result.ix[0], frame)
        tm.assert_frame_equal(result.ix[1], frame)
        self.assertEqual(result.index.nlevels, 3)
Ejemplo n.º 44
0
    def transform(self, func, *args, **kwargs):
        """
        Call function producing a like-indexed DataFrame on each group and
        return a DataFrame having the same indexes as the original object
        filled with the transformed values

        Parameters
        ----------
        f : function
            Function to apply to each subframe

        Note
        ----
        Each subframe is endowed the attribute 'name' in case you need to know
        which group you are working on.

        Example
        --------
        >>> grouped = df.groupby(lambda x: mapping[x])
        >>> grouped.transform(lambda x: (x - x.mean()) / x.std())
        """
        from pandas.tools.merge import concat

        applied = []

        obj = self._obj_with_exclusions
        for name, group in self:
            group.name = name

            try:
                wrapper = lambda x: func(x, *args, **kwargs)
                res = group.apply(wrapper, axis=self.axis)
            except Exception: # pragma: no cover
                res = func(group, *args, **kwargs)

            # broadcasting
            if isinstance(res, Series):
                if res.index is obj.index:
                    group.T.values[:] = res
                else:
                    group.values[:] = res

                applied.append(group)
            else:
                applied.append(res)

        concat_index = obj.columns if self.axis == 0 else obj.index
        concatenated = concat(applied, join_axes=[concat_index],
                              axis=self.axis, verify_integrity=False)
        return concatenated.reindex_like(obj)
Ejemplo n.º 45
0
    def test_concat_multiindex_with_keys(self):
        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
                                   ['one', 'two', 'three']],
                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                           names=['first', 'second'])
        frame = DataFrame(np.random.randn(10, 3), index=index,
                          columns=Index(['A', 'B', 'C'], name='exp'))
        result = concat([frame, frame], keys=[0, 1], names=['iteration'])

        self.assertEqual(result.index.names, ['iteration'] + index.names)
        tm.assert_frame_equal(result.ix[0], frame)
        tm.assert_frame_equal(result.ix[1], frame)
        self.assertEqual(result.index.nlevels, 3)
Ejemplo n.º 46
0
    def test_concat_multiindex_with_keys(self):
        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
                                   ['one', 'two', 'three']],
                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                           names=['first', 'second'])
        frame = DataFrame(np.random.randn(10, 3), index=index,
                          columns=Index(['A', 'B', 'C'], name='exp'))
        result = concat([frame, frame], keys=[0, 1], names=['iteration'])

        self.assertEqual(result.index.names, ['iteration'] + index.names)
        tm.assert_frame_equal(result.ix[0], frame)
        tm.assert_frame_equal(result.ix[1], frame)
        self.assertEqual(result.index.nlevels, 3)
Ejemplo n.º 47
0
def _add_margins(table, data, values, rows=None, cols=None, aggfunc=np.mean):
    if len(cols) > 0:
        col_margin = data[rows + values].groupby(rows).agg(aggfunc)

        # need to "interleave" the margins
        table_pieces = []
        margin_keys = []
        for key, piece in table.groupby(level=0, axis=1):
            all_key = (key, 'All') + ('',) * (len(cols) - 1)
            piece[all_key] = col_margin[key]
            table_pieces.append(piece)
            margin_keys.append(all_key)

        result = concat(table_pieces, axis=1)
    else:
        result = table
        margin_keys = table.columns

    grand_margin = {}
    for k, v in data[values].iteritems():
        try:
            grand_margin[k] = aggfunc(v)
        except TypeError:
            pass

    if len(cols) > 0:
        row_margin = data[cols + values].groupby(cols).agg(aggfunc)
        row_margin = row_margin.stack()

        # slight hack
        new_order = [len(cols)] + range(len(cols))
        row_margin.index = row_margin.index.reorder_levels(new_order)
    else:
        row_margin = Series(np.nan, index=result.columns)

    key = ('All',) + ('',) * (len(rows) - 1)

    row_margin = row_margin.reindex(result.columns)
    # populate grand margin
    for k in margin_keys:
        if len(cols) > 0:
            row_margin[k] = grand_margin[k[0]]
        else:
            row_margin[k] = grand_margin[k]

    margin_dummy = DataFrame(row_margin, columns=[key]).T
    result = result.append(margin_dummy)

    return result
Ejemplo n.º 48
0
def _add_margins(table, data, values, rows=None, cols=None, aggfunc=np.mean):
    if len(cols) > 0:
        col_margin = data[rows + values].groupby(rows).agg(aggfunc)

        # need to "interleave" the margins
        table_pieces = []
        margin_keys = []
        for key, piece in table.groupby(level=0, axis=1):
            all_key = (key, 'All') + ('', ) * (len(cols) - 1)
            piece[all_key] = col_margin[key]
            table_pieces.append(piece)
            margin_keys.append(all_key)

        result = concat(table_pieces, axis=1)
    else:
        result = table
        margin_keys = table.columns

    grand_margin = {}
    for k, v in data[values].iteritems():
        try:
            grand_margin[k] = aggfunc(v)
        except TypeError:
            pass

    if len(cols) > 0:
        row_margin = data[cols + values].groupby(cols).agg(aggfunc)
        row_margin = row_margin.stack()

        # slight hack
        new_order = [len(cols)] + range(len(cols))
        row_margin.index = row_margin.index.reorder_levels(new_order)
    else:
        row_margin = Series(np.nan, index=result.columns)

    key = ('All', ) + ('', ) * (len(rows) - 1)

    row_margin = row_margin.reindex(result.columns)
    # populate grand margin
    for k in margin_keys:
        if len(cols) > 0:
            row_margin[k] = grand_margin[k[0]]
        else:
            row_margin[k] = grand_margin[k]

    margin_dummy = DataFrame(row_margin, columns=[key]).T
    result = result.append(margin_dummy)

    return result
Ejemplo n.º 49
0
    def test_handle_empty_objects(self):
        df = DataFrame(np.random.randn(10, 4), columns=list('abcd'))

        baz = df[:5]
        baz['foo'] = 'bar'
        empty = df[5:5]

        frames = [baz, empty, empty, df[5:]]
        concatted = concat(frames, axis=0)

        expected = df.ix[:, ['a', 'b', 'c', 'd', 'foo']]
        expected['foo'] = expected['foo'].astype('O')
        expected['foo'][:5] = 'bar'

        tm.assert_frame_equal(concatted, expected)
Ejemplo n.º 50
0
    def test_concat_ignore_index(self):
        frame1 = DataFrame({"test1": ["a", "b", "c"], "test2": [1, 2, 3], "test3": [4.5, 3.2, 1.2]})
        frame2 = DataFrame({"test3": [5.2, 2.2, 4.3]})
        frame1.index = Index(["x", "y", "z"])
        frame2.index = Index(["x", "y", "q"])

        v1 = concat([frame1, frame2], axis=1, ignore_index=True)

        nan = np.nan
        expected = DataFrame(
            [[nan, nan, nan, 4.3], ["a", 1, 4.5, 5.2], ["b", 2, 3.2, 2.2], ["c", 3, 1.2, nan]],
            index=Index(["q", "x", "y", "z"]),
        )

        tm.assert_frame_equal(v1, expected)
Ejemplo n.º 51
0
    def test_handle_empty_objects(self):
        df = DataFrame(np.random.randn(10, 4), columns=list("abcd"))

        baz = df[:5]
        baz["foo"] = "bar"
        empty = df[5:5]

        frames = [baz, empty, empty, df[5:]]
        concatted = concat(frames, axis=0)

        expected = df.ix[:, ["a", "b", "c", "d", "foo"]]
        expected["foo"] = expected["foo"].astype("O")
        expected["foo"][:5] = "bar"

        tm.assert_frame_equal(concatted, expected)
Ejemplo n.º 52
0
    def test_handle_empty_objects(self):
        df = DataFrame(np.random.randn(10, 4), columns=list('abcd'))

        baz = df[:5]
        baz['foo'] = 'bar'
        empty = df[5:5]

        frames = [baz, empty, empty, df[5:]]
        concatted = concat(frames, axis=0)

        expected = df.ix[:, ['a', 'b', 'c', 'd', 'foo']]
        expected['foo'] = expected['foo'].astype('O')
        expected['foo'][:5] = 'bar'

        tm.assert_frame_equal(concatted, expected)
Ejemplo n.º 53
0
    def join(self, other, how='left', lsuffix='', rsuffix=''):
        """
        Join items with other Panel either on major and minor axes column

        Parameters
        ----------
        other : Panel or list of Panels
            Index should be similar to one of the columns in this one
        how : {'left', 'right', 'outer', 'inner'}
            How to handle indexes of the two objects. Default: 'left'
            for joining on index, None otherwise
            * left: use calling frame's index
            * right: use input frame's index
            * outer: form union of indexes
            * inner: use intersection of indexes
        lsuffix : string
            Suffix to use from left frame's overlapping columns
        rsuffix : string
            Suffix to use from right frame's overlapping columns

        Returns
        -------
        joined : Panel
        """
        from pandas.tools.merge import concat

        if isinstance(other, Panel):
            join_major, join_minor = self._get_join_index(other, how)
            this = self.reindex(major=join_major, minor=join_minor)
            other = other.reindex(major=join_major, minor=join_minor)
            merged_data = this._data.merge(other._data, lsuffix, rsuffix)
            return self._constructor(merged_data)
        else:
            if lsuffix or rsuffix:
                raise ValueError('Suffixes not supported when passing '
                                 'multiple panels')

            if how == 'left':
                how = 'outer'
                join_axes = [self.major_axis, self.minor_axis]
            elif how == 'right':
                raise ValueError('Right join not supported with multiple '
                                 'panels')
            else:
                join_axes = None

            return concat([self] + list(other), axis=0, join=how,
                          join_axes=join_axes, verify_integrity=True)
Ejemplo n.º 54
0
def _generate_marginal_results(table, data, values, rows, cols, aggfunc,
                               grand_margin):
    if len(cols) > 0:
        # need to "interleave" the margins
        table_pieces = []
        margin_keys = []

        def _all_key(key):
            return (key, 'All') + ('', ) * (len(cols) - 1)

        if len(rows) > 0:
            margin = data[rows + values].groupby(rows).agg(aggfunc)
            cat_axis = 1
            for key, piece in table.groupby(level=0, axis=cat_axis):
                all_key = _all_key(key)
                piece[all_key] = margin[key]
                table_pieces.append(piece)
                margin_keys.append(all_key)
        else:
            margin = grand_margin
            cat_axis = 0
            for key, piece in table.groupby(level=0, axis=cat_axis):
                all_key = _all_key(key)
                table_pieces.append(piece)
                table_pieces.append(Series(margin[key], index=[all_key]))
                margin_keys.append(all_key)

        result = concat(table_pieces, axis=cat_axis)

        if len(rows) == 0:
            return result
    else:
        result = table
        margin_keys = table.columns

    if len(cols) > 0:
        row_margin = data[cols + values].groupby(cols).agg(aggfunc)
        row_margin = row_margin.stack()

        # slight hack
        new_order = [len(cols)] + lrange(len(cols))
        row_margin.index = row_margin.index.reorder_levels(new_order)
    else:
        row_margin = Series(np.nan, index=result.columns)

    return result, margin_keys, row_margin
Ejemplo n.º 55
0
    def _aggregate_multiple_funcs(self, arg, _level):
        from pandas.tools.merge import concat

        if self.axis != 0:
            raise NotImplementedError("axis other than 0 is not supported")

        if self._selected_obj.ndim == 1:
            obj = self._selected_obj
        else:
            obj = self._obj_with_exclusions

        results = []
        keys = []

        # degenerate case
        if obj.ndim == 1:
            for a in arg:
                try:
                    colg = self._gotitem(obj.name, ndim=1, subset=obj)
                    results.append(colg.aggregate(a))

                    # make sure we find a good name
                    name = com._get_callable_name(a) or a
                    keys.append(name)
                except (TypeError, DataError):
                    pass
                except SpecificationError:
                    raise

        # multiples
        else:
            for col in obj:
                try:
                    colg = self._gotitem(col, ndim=1, subset=obj[col])
                    results.append(colg.aggregate(arg))
                    keys.append(col)
                except (TypeError, DataError):
                    pass
                except SpecificationError:
                    raise

        if _level:
            keys = None
        result = concat(results, keys=keys, axis=1)

        return result
Ejemplo n.º 56
0
    def test_concat_ignore_index(self):
        frame1 = DataFrame({
            "test1": ["a", "b", "c"],
            "test2": [1, 2, 3],
            "test3": [4.5, 3.2, 1.2]
        })
        frame2 = DataFrame({"test3": [5.2, 2.2, 4.3]})
        frame1.index = Index(["x", "y", "z"])
        frame2.index = Index(["x", "y", "q"])

        v1 = concat([frame1, frame2], axis=1, ignore_index=True)

        nan = np.nan
        expected = DataFrame([[nan, nan, nan, 4.3], ['a', 1, 4.5, 5.2],
                              ['b', 2, 3.2, 2.2], ['c', 3, 1.2, nan]],
                             index=Index(["q", "x", "y", "z"]))

        tm.assert_frame_equal(v1, expected)
Ejemplo n.º 57
0
    def get_pandas_df(self, bql, parameters=None):
        """
        Returns a Pandas DataFrame for the results produced by a BigQuery query.
        """
        service = self.get_conn()
        connection_info = self.get_connection(self.bigquery_conn_id)
        connection_extras = connection_info.extra_dejson
        project = connection_extras['project']
        connector = BigQueryPandasConnector(project, service)
        schema, pages = connector.run_query(bql, verbose=False)
        dataframe_list = []

        while len(pages) > 0:
            page = pages.pop()
            dataframe_list.append(gbq_parse_data(schema, page))

        if len(dataframe_list) > 0:
            return concat(dataframe_list, ignore_index=True)
        else:
            return gbq_parse_data(schema, [])
Ejemplo n.º 58
0
    def describe(self):
        """ Describes this Categorical

        Returns
        -------
        description: `DataFrame`
            A dataframe with frequency and counts by level.
        """
        # Hack?
        from pandas.core.frame import DataFrame
        counts = DataFrame({
            'codes': self._codes,
            'values': self._codes
        }).groupby('codes').count()

        freqs = counts / float(counts.sum())

        from pandas.tools.merge import concat
        result = concat([counts, freqs], axis=1)
        result.columns = ['counts', 'freqs']

        # fill in the real levels
        check = result.index == -1
        if check.any():
            # Sort -1 (=NaN) to the last position
            index = np.arange(0, len(self.levels) + 1, dtype='int64')
            index[-1] = -1
            result = result.reindex(index)
            # build new index
            levels = np.arange(0, len(self.levels) + 1, dtype=object)
            levels[:-1] = self.levels
            levels[-1] = np.nan
            result.index = levels.take(com._ensure_platform_int(result.index))
        else:
            result.index = self.levels.take(
                com._ensure_platform_int(result.index))
            result = result.reindex(self.levels)
        result.index.name = 'levels'

        return result
Ejemplo n.º 59
0
def main():
    try:
        print "Attempting to read pickle-d DataFrames."
        df = {}
        df['survival'] = pd.DataFrame()
        df['mortality'] = pd.DataFrame()

        df['survival'] = pd.read_pickle('data/survival.pkl')
        df['mortality'] = pd.read_pickle('data/mortality.pkl')
        print "DataFrames imported!"
    except:
        print "Reading failed! Creating DataFrames."
        df = create_df()
    finally:

        # print "Creating mortality scatter plot matrix."
        # plt.figure()
        # pd.tools.plotting.scatter_matrix(df['mortality'])
        # F = plt.gcf()
        # F.set_size_inches((50, 50))
        # F.savefig('graphs/scatter_mortality.png',
        #           bbox_inches='tight',
        #           dpi=150)
        #
        # print "Creating survival scatter plot matrix."
        # plt.figure()
        # pd.tools.plotting.scatter_matrix(df['survival'])
        # F = plt.gcf()
        # F.set_size_inches((50, 50))
        # F.savefig('graphs/scatter_survival.png',
        #           bbox_inches='tight',
        #           dpi=150)

        print "Creating Andrew plot."
        plt.figure()
        pd.tools.plotting.andrews_curves(
            concat([df['mortality'][0::10], df['survival'][0::10]]), 'death')
        F = plt.gcf()
        F.set_size_inches((10, 10))
        F.savefig('graphs/andrews_curves.png', bbox_inches='tight', dpi=150)
Ejemplo n.º 60
0
    def _aggregate_multiple_funcs(self, arg):
        from pandas.tools.merge import concat

        if self.axis != 0:
            raise NotImplementedError

        obj = self._obj_with_exclusions

        results = []
        keys = []
        for col in obj:
            try:
                colg = SeriesGroupBy(obj[col], column=col,
                                     groupings=self.groupings)
                results.append(colg.agg(arg))
                keys.append(col)
            except TypeError:
                pass

        result = concat(results, keys=keys, axis=1)

        return result