Esempio n. 1
0
    def run_cleanup(self,
                    df: pandas.DataFrame) -> Tuple[bool, pandas.DataFrame]:
        clean_up = True
        # check for interactive mode
        if self.flags.interactive:
            logger.info(
                yellow(
                    "PRE-CLEANING PREVIEW: The DataFrame you would push to the database would look like this:"
                ))
            self._show_dry_run_preview(df)
            clean_up = self._collect_and_check_answer()

        if clean_up is True:
            logger.debug("Performing clean ups")
            clean_df = SheetCleaner(
                df,
                bool(self.config.sheet_config.get("snake_case_camel",
                                                  False))).cleanup()
            if self.flags.dry_run or self.flags.interactive:
                logger.info(yellow("\nPOST-CLEANING PREVIEW:"))
                self._show_dry_run_preview(clean_df)
                carry_on = self._collect_and_check_answer(post_cleanup=True)
                if not carry_on:
                    logger.info(timed_message(red("User Aborted.")))
                    sys.exit(1)
            return True, clean_df
        return True, df
Esempio n. 2
0
    def override_object_creation_from_flags(self) -> None:
        if self.flags.create_table:
            logger.debug(yellow("going to create table"))
            self.object_creation_dct.update({"create_table": True})

        if self.flags.create_schema:
            logger.debug(yellow("going to create schema"))
            self.object_creation_dct.update({"create_schema": True})
        logger.debug(yellow(f"Object creation dict after override\n {self.object_creation_dct}"))

        if self.flags.destructive_create_table:
            logger.debug(yellow("going to perform destuctive table creation"))
            self.destructive_create_table = True
Esempio n. 3
0
def check_and_compare_version(external_version: Optional[str] = str()
                              ) -> Tuple[bool, str]:
    """Checks what the currently installed version of sheetwork is and compares it to the one on PyPI.

    This requires an internet connection. In the case where this doesn't happen a URLError will
    probably be thrown and in that case we just return False not to cause annoying user experience.

    Args:
        external_version (Optional[str], optional): Mainly for testing purposes. Defaults to str().

    Returns:
        bool: True when sheetwork needs an update. False when good.
    """
    try:
        pypi_version: str = luddite.get_version_pypi("sheetwork")
        if external_version:
            installed_version = external_version
        else:
            installed_version = __version__

        needs_update = semver_parse(pypi_version) > semver_parse(
            installed_version)
        if needs_update:
            logger.warning(
                yellow(
                    f"Looks like you're a bit behind. A newer version of Sheetwork v{pypi_version} is available."
                ))
        return needs_update, pypi_version

    except URLError:
        return False, str()
Esempio n. 4
0
 def run(self):
     self.load_sheet()
     if self.push_anyway:
         self.push_sheet()
         self.check_table()
     else:
         logger.info(
             yellow("Nothing pushed since you were in --dry_run mode."))
Esempio n. 5
0
 def decide_object_creation(self) -> None:
     self.handle_deprecations()
     create_everything_label = "always_create_objects"
     object_creation_mapping = {
         # ! DEPRECATE "always_create"
         "create_table": ["always_create_table", "always_create"],
         "create_schema": ["always_create_schema"],
     }
     for object_type, rule in object_creation_mapping.items():
         if self.project_dict.get(create_everything_label):
             create = [True]
         else:
             create = [True for x in rule if self.project_dict.get(x) is True]
         self.object_creation_dct.update({object_type: True in create})
     self.destructive_create_table = (
         True
         if self.project_dict.get("destructive_create_table", self.destructive_create_table)
         is True
         else False
     )
     logger.debug(yellow(f"Object creation dict:\n {self.object_creation_dct}"))
     logger.debug(yellow(str(self.project_dict)))
Esempio n. 6
0
def deprecate(message: str, colour: str = "yellow") -> None:
    """Handles deperecation messages more using proper DeprecationWarnings.

    It also makes sure deprecatio warnings are enabled globally as certain shells might have them
    turned off by default.

    Args:
        message (str): Deprecation message to print.
        colour (str, optional): Colour name to wrap the decprecation message. For now only "yellow",
            "red" or None are supported. Defaults to "yellow".
    """
    global DEPRECATION_WARNINGS_ENABLED, _WARNINGS_ALREADY_ENABLED

    if colour == "yellow":
        _message = yellow(message)
    elif colour == "red":
        _message = red(message)
    elif colour is None:
        _message = message
    else:
        logger.error(
            f"{colour} is not supported, painting error mesage 'yellow'")
        _message = yellow(colour)

    if DEPRECATION_WARNINGS_ENABLED and not _WARNINGS_ALREADY_ENABLED:
        _WARNINGS_ALREADY_ENABLED = True
        warnings.filterwarnings("default",
                                ".*",
                                category=DeprecationWarning,
                                module="gspread_pandas")
    if _WARNINGS_ALREADY_ENABLED and not DEPRECATION_WARNINGS_ENABLED:
        warnings.filterwarnings("ignore",
                                ".*",
                                category=DeprecationWarning,
                                module="gspread_pandas")
    warnings.warn(_message, DeprecationWarning, stacklevel=2)
Esempio n. 7
0
 def _create_schema(self) -> None:
     if self._has_connection is False:
         raise NoAcquiredConnectionError(
             f"No acquired connection for {type(self).__name__}. Make sure you call "
             "`acquire_connection` before.")
     try:
         if self.config.project.object_creation_dct["create_schema"]:
             schema_exists = (True if self.config.target_schema
                              in self.con.dialect.get_schema_names(
                                  self.con) else False)
             if schema_exists is False:
                 logger.debug(
                     yellow(
                         f"Creating schema: {self.config.target_schema} in {self._database}"
                     ))
                 self.con.execute(CreateSchema(self.config.target_schema))
     except Exception as e:
         raise DatabaseError(str(e))
Esempio n. 8
0
def check_columns_in_df(
    df: pandas.DataFrame,
    columns: Union[List[str], str],
    warn_only: bool = False,
    suppress_warning: bool = False,
) -> Tuple[bool, List[str]]:
    """Checks if a bunch of columns are present in a dataframe.

    Args:
        df (pandas.DataFrame): df to check.
        columns (Union[List[str], str]): column names to check for.
        warn_only (bool, optional): When True will only warn otherwise raises. Defaults to False.
        suppress_warning (bool, optional): When true warning isn't shown only return. Defaults to False.

    Raises:
        ColumnNotFoundInDataFrame: If warn_only is False, this error will be raised when any of the
            columns to check for are not present in the dataframe.

    Returns:
        Tuple[bool, List[str]]: Boolean if all columns are present in df, List of missing columns.
    """
    if isinstance(columns, str):
        columns = [columns]
    is_subset = set(columns).issubset(df.columns)
    if is_subset:
        return True, columns
    # else reduce columms, provide filtered list set bool to false and warn or raise
    cols_not_in_df = [x for x in columns if x not in df.columns.tolist()]
    reduced_cols = [x for x in columns if x in df.columns.tolist()]
    message = f"The following columns were not found in the sheet: {cols_not_in_df} "
    if warn_only and not suppress_warning:
        logger.warning(
            yellow(
                message +
                "they were ignored. Consider cleaning your sheets.yml file"))
    elif not warn_only and not suppress_warning:
        raise ColumnNotFoundInDataFrame(
            message + "Google Sheet or sheets.yml needs to be cleaned")
    return False, reduced_cols
Esempio n. 9
0
 def make_df_from_worksheet(self,
                            worksheet_name: str = str(),
                            grab_header: bool = True) -> pandas.DataFrame:
     if not self.workbook:
         raise NoWorkbookLoadedError(
             "Workbook object seems empty, cannot turn a None object into a dataframe"
         )
     try:
         if worksheet_name:
             worksheet = self.workbook.worksheet(worksheet_name)
         else:
             worksheet_name = "default sheet"
             worksheet = self.workbook.get_worksheet(0)
         logger.debug(green("Sheet loaded successfully"))
         if grab_header:
             values: List[Any] = worksheet.get_all_values()
             check_dupe_cols(values[0])
             df = pandas.DataFrame(values[1:], columns=values[0])
         else:
             df = pandas.DataFrame(worksheet.get_all_values())
         logger.debug(yellow(f"Raw obtained google sheet: \n {df.head()}"))
         return df
     except Exception as e:
         raise SheetLoadingError(f"Error loading sheet: \n {e}")
Esempio n. 10
0
    def upload(self, df: pandas.DataFrame,
               override_schema: str = str()) -> None:
        # cast columns
        # !: note integer conversion doesn't actually happen it is left as a str see #204, #205
        df = cast_pandas_dtypes(df, overwrite_dict=self.config.sheet_columns)
        dtypes_dict = self.sqlalchemy_dtypes(self.config.sheet_columns)

        # potentially override target schema from config.
        if override_schema:
            schema = override_schema
        else:
            schema = self.config.target_schema

        # write to csv and try to talk to db
        temp = tempfile.NamedTemporaryFile()
        df.to_csv(temp.name, index=False, header=False, sep="|")

        self.acquire_connection()

        # set up schema creation
        self._create_schema()

        try:
            # set the table creation behaviour
            _if_exists = "fail"
            if self.config.project.object_creation_dct["create_table"] is True:
                if self.config.project.destructive_create_table:
                    _if_exists = "replace"

                # perform the create ops
                try:
                    df.head(0).to_sql(
                        name=self.config.target_table,
                        schema=schema,
                        con=self.con,
                        if_exists=_if_exists,
                        index=False,
                        dtype=dtypes_dict,
                    )

                # if _if_exists is fail pandas will throw a ValueError which we want to escape when
                # destructive_create_table is set to False (or not provided) and throw a warning instead.
                except ValueError as e:
                    if _if_exists == "fail":
                        logger.warning(
                            yellow(
                                f"{self._database}"
                                f".{schema}.{self.config.target_table} already exists and was not\n"
                                "recreated because 'destructive_create_table' is set to False in your profile \n"
                                "APPENDING instead."))
                    else:
                        raise DatabaseError(str(e))

            # Now push the actual data --the pandas create above is only for creation the logic below
            # is actually faster as pandas does it row by row
            qualified_table = (
                f"{self._database}.{self.config.target_schema}.{self.config.target_table}"
            )
            self.con.execute(f"""
                create or replace temporary stage {self.config.target_table}_stg
                file_format = (type = 'CSV' field_delimiter = '|'
                skip_header = 0 field_optionally_enclosed_by = '"')
                """)
            self.con.execute(
                f"put file://{temp.name} @{self.config.target_table}_stg")
            self.con.execute(
                f"copy into {qualified_table} from @{self.config.target_table}_stg"
            )
            self.con.execute(f"drop stage {self.config.target_table}_stg")
        except Exception as e:
            raise DatabaseError(str(e))
        finally:
            logger.debug("CLOSING CONNECTION & CLEANING TMP FILE")
            temp.close()
            self.close_connection()