コード例 #1
0
ファイル: spread.py プロジェクト: pyrish/scrapers
    def _get_update_chunks(self, start, end, vals):
        start = get_cell_as_tuple(start)
        end = get_cell_as_tuple(end)

        num_cols = end[COL] - start[COL] + 1
        num_rows = end[ROW] - start[ROW] + 1
        num_cells = num_cols * num_rows

        if num_cells != len(vals):
            raise MissMatchException(
                "Number of values needs to match number of cells")

        chunk_rows = self._max_range_chunk_size // num_cols
        chunk_size = chunk_rows * num_cols

        end_cell = (start[ROW] - 1, 0)

        for val_chunks in chunks(vals, int(chunk_size)):
            start_cell = (end_cell[ROW] + 1, start[COL])
            end_cell = (
                min(start_cell[ROW] + chunk_rows - 1,
                    start[ROW] + num_rows - 1),
                end[COL],
            )
            yield start_cell, end_cell, val_chunks
コード例 #2
0
ファイル: client.py プロジェクト: rohit366/gspread-pandas
    def update_cells(self, start, end, vals, sheet=None):
        """
        Update the values in a given range. The values should be listed in order
        from left to right across rows.

        :param tuple,str start: tuple indicating (row, col) or string like 'A1'
        :param tuple,str end: tuple indicating (row, col) or string like 'Z20'
        :param list vals: array of values to populate
        :param str,int,Worksheet sheet: optional, if you want to open a different sheet first,
            see :meth:`open_sheet <gspread_pandas.client.Spread.open_sheet>` (default None)
        """
        if sheet:
            self.open_sheet(sheet)

        if not self.sheet:
            raise NoWorksheetException("No open worksheet")

        if start == end:
            return

        for start_cell, end_cell, val_chunks in self._get_update_chunks(
                start, end, vals):
            rng = get_range(start_cell, end_cell)

            cells = self._retry_range(rng)

            if len(val_chunks) != len(cells):
                raise MissMatchException(
                    "Number of chunked values doesn't match number of cells")

            for val, cell in zip(val_chunks, cells):
                cell.value = val

            self._retry_update(cells)
コード例 #3
0
    def sheet_to_df(self, index=1, header_rows=1, start_row=1, sheet=None):
        """Pull a worksheet into a DataFrame.

        Parameters
        ----------
        index : int
            col number of index column, 0 or None for no index (default 1)
        header_rows : int
            number of rows that represent headers (default 1)
        start_row : int
            row number for first row of headers or data (default 1)
        sheet : str,int
            optional, if you want to open a different sheet first,
            see :meth:`open_sheet <gspread_pandas.client.Spread.open_sheet>`
            (default None)

        Returns
        -------
        DataFrame
            DataFrame with the data from the Worksheet

        """
        if sheet is not None:
            self.open_sheet(sheet)

        if not self.sheet:
            raise NoWorksheetException("No open worksheet")

        vals = self._retry_func(self.sheet.get_all_values)
        vals = self._fix_merge_values(vals)[start_row - 1 :]

        col_names = parse_sheet_headers(vals, header_rows)

        # remove rows where everything is null, then replace nulls with ''
        df = (
            pd.DataFrame(vals[header_rows or 0 :])
            .replace("", np.nan)
            .dropna(how="all")
            .fillna("")
        )

        if col_names is not None:
            if len(df.columns) == len(col_names):
                df.columns = col_names
            elif len(df) == 0:
                # if we have headers but no data, set column headers on empty DF
                df = df.reindex(columns=col_names)
            else:
                raise MissMatchException(
                    "Column headers don't match number of data columns"
                )

        return parse_sheet_index(df, index)
コード例 #4
0
ファイル: spread.py プロジェクト: admariner/gspread-pandas
    def update_cells(self, start, end, vals, sheet=None, raw_columns=None):
        """
        Update the values in a given range. The values should be listed in order from
        left to right across rows.

        Parameters
        ----------
        start : tuple,str
            tuple indicating (row, col) or string like 'A1'
        end : tuple,str
            tuple indicating (row, col) or string like 'Z20'
        vals : list
            array of values to populate
        sheet : str,int,Worksheet
            optional, if you want to open a different sheet first,
            see :meth:`open_sheet <gspread_pandas.spread.Spread.open_sheet>`
            (default None)
        raw_columns : list, int
            optional, list of column numbers in the google sheet that should be
            interpreted as "RAW" input

        Returns
        -------
        None
        """
        self._ensure_sheet(sheet)

        for start_cell, end_cell, val_chunks in self._get_update_chunks(
                start, end, vals):
            rng = get_range(start_cell, end_cell)

            cells = self.sheet.range(rng)

            if len(val_chunks) != len(cells):
                raise MissMatchException(
                    "Number of chunked values doesn't match number of cells")

            for val, cell in zip(val_chunks, cells):
                cell.value = val

            if raw_columns:
                assert isinstance(raw_columns,
                                  list), "raw_columns must be a list of ints"
                raw_cells = [i for i in cells if i.col in raw_columns]
                self.sheet.update_cells(raw_cells, ValueInputOption.raw)
            else:
                raw_cells = []

            user_cells = [i for i in cells if i not in raw_cells]
            if user_cells:
                self.sheet.update_cells(user_cells,
                                        ValueInputOption.user_entered)
コード例 #5
0
def set_col_names(df, col_names):
    """Set the column names on the DataFrame and ensure the set even if there's no
    data."""
    if col_names is not None:
        if len(df.columns) == len(col_names):
            df.columns = col_names
        elif len(df) == 0:
            # if we have headers but no data, set column headers on empty DF
            df = df.reindex(columns=col_names)
        else:
            raise MissMatchException(
                "Column headers don't match number of data columns")
    return df
コード例 #6
0
    def update_cells(self, start, end, vals, sheet=None):
        """Update the values in a given range. The values should be listed in order
        from left to right across rows.

        Parameters
        ----------
        start : tuple,str
            tuple indicating (row, col) or string like 'A1'
        end : tuple,str
            tuple indicating (row, col) or string like 'Z20'
        vals : list
            array of values to populate
        sheet : str,int,Worksheet
            optional, if you want to open a different sheet first,
            see :meth:`open_sheet <gspread_pandas.client.Spread.open_sheet>`
            (default None)

        Returns
        -------
        None

        """
        if sheet is not None:
            self.open_sheet(sheet)

        if not self.sheet:
            raise NoWorksheetException("No open worksheet")

        for start_cell, end_cell, val_chunks in self._get_update_chunks(
            start, end, vals
        ):
            rng = get_range(start_cell, end_cell)

            cells = self._retry_func(partial(self.sheet.range, rng))

            if len(val_chunks) != len(cells):
                raise MissMatchException(
                    "Number of chunked values doesn't match number of cells"
                )

            for val, cell in zip(val_chunks, cells):
                cell.value = val

            self._retry_func(partial(self.sheet.update_cells, cells, "USER_ENTERED"))