コード例 #1
0
    def sheet_to_df(self, index=1, header_rows=1, start_row=1, sheet=None):
        """Pull a worksheet into a DataFrame.

        Parameters
        ----------
        index : int
            col number of index column, 0 or None for no index (default 1)
        header_rows : int
            number of rows that represent headers (default 1)
        start_row : int
            row number for first row of headers or data (default 1)
        sheet : str,int
            optional, if you want to open a different sheet first,
            see :meth:`open_sheet <gspread_pandas.client.Spread.open_sheet>`
            (default None)

        Returns
        -------
        DataFrame
            DataFrame with the data from the Worksheet

        """
        if sheet is not None:
            self.open_sheet(sheet)

        if not self.sheet:
            raise NoWorksheetException("No open worksheet")

        vals = self._retry_func(self.sheet.get_all_values)
        vals = self._fix_merge_values(vals)[start_row - 1 :]

        col_names = parse_sheet_headers(vals, header_rows)

        # remove rows where everything is null, then replace nulls with ''
        df = (
            pd.DataFrame(vals[header_rows or 0 :])
            .replace("", np.nan)
            .dropna(how="all")
            .fillna("")
        )

        if col_names is not None:
            if len(df.columns) == len(col_names):
                df.columns = col_names
            elif len(df) == 0:
                # if we have headers but no data, set column headers on empty DF
                df = df.reindex(columns=col_names)
            else:
                raise MissMatchException(
                    "Column headers don't match number of data columns"
                )

        return parse_sheet_index(df, index)
コード例 #2
0
    def sheet_to_df(self,
                    index=1,
                    headers=1,
                    header_rows=1,
                    start_row=1,
                    sheet=None):
        """
        Pull a worksheet into a DataFrame.

        :param int index: col number of index column, 0 or None for no index (default 1)
        :param int headers: (DEPRECATED - use `header_rows`) number of rows that represent
            headers (default 1)
        :param int header_rows: number of rows that represent headers (default 1)
        :param int start_row: row number for first row of headers or data (default 1)
        :param str,int sheet: optional, if you want to open a different sheet first,
            see :meth:`open_sheet <gspread_pandas.client.Spread.open_sheet>` (default None)

        :returns: a DataFrame with the data from the Worksheet
        """
        if sheet:
            self.open_sheet(sheet)

        if not self.sheet:
            raise Exception("No open worksheet")

        if headers != 1:
            deprecate("headers has been deprecated, use header_rows instead")
            header_rows = headers

        vals = self._retry_get_all_values()
        vals = self._fix_merge_values(vals)[start_row - 1:]

        col_names = parse_sheet_headers(vals, header_rows)

        # remove rows where everything is null, then replace nulls with ''
        df = pd.DataFrame(vals[header_rows or 0:])\
               .replace('', np.nan)\
               .dropna(how='all')\
               .fillna('')

        if col_names is not None:
            if len(df.columns) == len(col_names):
                df.columns = col_names
            elif len(df) == 0:
                # if we have headers but no data, set column headers on empty DF
                df = df.reindex(columns=col_names)
            else:
                raise Exception(
                    "Column headers don't match number of data columns")

        return parse_sheet_index(df, index)
コード例 #3
0
ファイル: util_test.py プロジェクト: graingert/gspread-pandas
 def test_multiheader_blank_bottom(self, df_multiheader_blank_bottom):
     assert (util.parse_sheet_index(df_multiheader_blank_bottom,
                                    1).index.name == "col1")
コード例 #4
0
ファイル: util_test.py プロジェクト: graingert/gspread-pandas
 def test_multiheader_blank_top(self, df_multiheader_blank_top):
     assert (util.parse_sheet_index(df_multiheader_blank_top,
                                    1).index.name == "subcol1")
コード例 #5
0
ファイル: util_test.py プロジェクト: graingert/gspread-pandas
 def test_multiheader2(self, df_multiheader):
     assert util.parse_sheet_index(df_multiheader,
                                   2).index.name == "subcol2"
コード例 #6
0
ファイル: util_test.py プロジェクト: graingert/gspread-pandas
 def test_noop(self, df):
     assert util.parse_sheet_index(df, 0).index.name == "test_index"
コード例 #7
0
ファイル: util_test.py プロジェクト: graingert/gspread-pandas
 def test_normal(self, df):
     assert util.parse_sheet_index(df, 1).index.name == "col1"
コード例 #8
0
    def sheet_to_df(
        self,
        index=1,
        header_rows=1,
        start_row=1,
        unformatted_columns=None,
        formula_columns=None,
        sheet=None,
    ):
        """
        Pull a worksheet into a DataFrame.

        Parameters
        ----------
        index : int
            col number of index column, 0 or None for no index (default 1)
        header_rows : int
            number of rows that represent headers (default 1)
        start_row : int
            row number for first row of headers or data (default 1)
        unformatted_columns : list
            column numbers or names for columns you'd like to pull in as
            unformatted values (defaul [])
        formula_columns : list
            column numbers or names for columns you'd like to pull in as
            actual formulas (defaul [])
        sheet : str,int
            optional, if you want to open a different sheet first,
            see :meth:`open_sheet <gspread_pandas.spread.Spread.open_sheet>`
            (default None)

        Returns
        -------
        DataFrame
            DataFrame with the data from the Worksheet
        """
        self._ensure_sheet(sheet)

        vals = self.sheet.get_all_values()
        vals = self._fix_merge_values(vals)[start_row - 1:]

        col_names = parse_sheet_headers(vals, header_rows)

        # remove rows where everything is null, then replace nulls with ''
        df = (pd.DataFrame(vals[header_rows or 0:]).replace(
            "", np.nan).dropna(how="all").fillna(""))

        # replace values with a different value render option before we set the
        # index in set_col_names
        if unformatted_columns:
            self._fix_value_render(
                df,
                header_rows + start_row - 1,
                col_names,
                unformatted_columns,
                "UNFORMATTED_VALUE",
            )

        if formula_columns:
            self._fix_value_render(df, header_rows + start_row - 1, col_names,
                                   formula_columns, "FORMULA")

        df = set_col_names(df, col_names)

        return parse_sheet_index(df, index)
コード例 #9
0
def test_parse_sheet_index_multiheader2(df_multiheader):
    """In a multi-header situation, it should use the lower heading as the index name"""
    assert util.parse_sheet_index(df_multiheader, 2).index.name == 'subcol2'
コード例 #10
0
def test_parse_sheet_index_noop(df):
    assert util.parse_sheet_index(df, 0).index.name == 'test_index'
コード例 #11
0
def test_parse_sheet_index(df):
    assert util.parse_sheet_index(df, 1).index.name == 'col1'