Python GLOBAL_LOGGER Examples, sheetwork.core.logger.GLOBAL_LOGGER Python Examples

Example #1

0

Show file

File: utils.py Project: jflairie/sheetwork

def check_and_compare_version(external_version: Optional[str] = str()
                              ) -> Tuple[bool, str]:
    """Checks what the currently installed version of sheetwork is and compares it to the one on PyPI.

    This requires an internet connection. In the case where this doesn't happen a URLError will
    probably be thrown and in that case we just return False not to cause annoying user experience.

    Args:
        external_version (Optional[str], optional): Mainly for testing purposes. Defaults to str().

    Returns:
        bool: True when sheetwork needs an update. False when good.
    """
    try:
        pypi_version: str = luddite.get_version_pypi("sheetwork")
        if external_version:
            installed_version = external_version
        else:
            installed_version = __version__

        needs_update = semver_parse(pypi_version) > semver_parse(
            installed_version)
        if needs_update:
            logger.warning(
                yellow(
                    f"Looks like you're a bit behind. A newer version of Sheetwork v{pypi_version} is available."
                ))
        return needs_update, pypi_version

    except URLError:
        return False, str()

Example #2

0

Show file

 def read_profile(self):
     logger.debug(f"Profile Name: {self.profile_name}")
     filename = Path(self.profile_dir, "profiles.yml")
     if filename.exists():
         yaml_dict = open_yaml(filename)
         is_valid_yaml = validate_yaml(yaml_dict, profiles_schema)
         profile = yaml_dict["profiles"].get(self.profile_name)
         if profile:
             # set target name from profile unless one was given at init from flags parse.
             if not self.target_name:
                 self.target_name = profile.get("target")
             if profile.get("outputs"):
                 target_profile = profile["outputs"].get(self.target_name)
             if target_profile and is_valid_yaml:
                 is_valid_profile = self._validate_profile(target_profile)
                 if is_valid_profile:
                     self.profile_dict = target_profile
             else:
                 raise ProfileParserError(
                     f"Error finding and entry for  target: {self.target_name}, "
                     f"under the {self.profile_name} profile.")
         else:
             raise ProfileParserError(
                 f"Could not find an entry for {self.profile_name} in your profile.yml"
             )
     else:
         raise FileNotFoundError(
             f"Could not open or find {filename.resolve()} check that it exists"
         )

Example #3

0

Show file

    def _override_gspread_default_creds(self) -> None:
        """Temporary workaround to allow `gspread.oauth()` to look for credentials in another location.

        For more info: https://github.com/burnash/gspread/issues/826
        This will likely be removed if work on gspread #826 gets carried out.
        """
        logger.debug(
            "Overriding `gspread`'s DEFAULT_AUTHORISED_USER_FILENAME and stuff. "
            "This is temporary (hopefully) see `GoogleSpreadsheet._override_gspread_default_creds()` "
            "docstring for more info.")
        logger.debug(
            f"Overriding to: {self._profile.google_credentials_dir}/{self._profile.profile_name}"
        )
        gspread.auth.DEFAULT_CONFIG_DIR = Path(
            self._profile.google_credentials_dir)

        gspread.auth.DEFAULT_CREDENTIALS_FILENAME = gspread.auth.DEFAULT_CONFIG_DIR.joinpath(
            self._profile.profile_name).with_suffix(self.CREDS_EXT)

        gspread.auth.DEFAULT_AUTHORIZED_USER_FILENAME = gspread.auth.DEFAULT_CONFIG_DIR.joinpath(
            f"{self._profile.profile_name}_authorised_user").with_suffix(
                self.CREDS_EXT)

        gspread.auth.DEFAULT_SERVICE_ACCOUNT_FILENAME = gspread.auth.DEFAULT_CONFIG_DIR.joinpath(
            f"{self._profile.profile_name}_service_account").with_suffix(
                self.CREDS_EXT)

        # doing this skipping for when I'm testing this function
        gspread.auth.load_credentials.__defaults__ = (
            gspread.auth.DEFAULT_AUTHORIZED_USER_FILENAME, )

        gspread.auth.store_credentials.__defaults__ = (
            gspread.auth.DEFAULT_AUTHORIZED_USER_FILENAME,
            "token",
        )

Example #4

0

Show file

 def show_complete(self):
     credentials_message = TO_DO_CREDENTIALS.format(
         to_do_credentials=TO_DO_CREDENTIALS,
         open_cmd=open_dir_cmd(),
         profiles_path=self.profiles_path,
         profiles_doc_url=PROFILE_DOC_URL,
         google_creds_doc_url=GOOGLE_CREDS_DOC_URL,
         project_name=self.project_name,
     )
     if self.project_dir_is_created:
         done_message = INIT_DONE.format(
             project_name=self.project_name,
             project_path=self.project_path,
             profiles_path=self.profiles_path,
             google_path=self.google_path,
             profiles_doc_url=PROFILE_DOC_URL,
             google_creds_doc_url=GOOGLE_CREDS_DOC_URL,
             project_doc_url=PROJECT_DOC_URL,
             sheets_config_doc_url=SHEETS_CONFIG_DOC_URL,
             to_do_credentials=credentials_message,
             open_cmd=open_dir_cmd(),
         )
     else:
         done_message = CREDENTIALS_ONLY_SUCCESS_MESSAGE.format(
             to_do_credentials=credentials_message
         )
     logger.info(green(done_message))

Example #5

0

Show file

File: utils.py Project: jflairie/sheetwork

    def find_nearest_dir_and_file(
        self, yaml_file: str, current: Path = Path.cwd()) -> Tuple[Path, Path]:
        """Looks for the yaml_file you ask for.

        Starting from the current directory and going up with
        recursion while the iteration number is still within the max allowed.

        Args:
            yaml_file (str): Name and extension of the file to find.
            current (Path, optional): Path() objects from which to start. Defaults to Path.cwd().

        Raises:
            NearestFileNotFound: When no file that matches the required name can be found.

        Returns:
            Tuple[Path, Path]: The directory up to the file name, and the full path to the filename,
            respectively. Maybe we'll end up deprecating one of these returns down the line but for
            now it's handy.
        """
        filename = Path(current, yaml_file)
        while self.iteration < self.max_iter:
            logger.debug(f"Looking for {filename}")
            if filename.exists():
                project_dir = filename.parent
                logger.debug(f"{filename} exists and was returned")
                return project_dir, filename
            current = current.parent
            filename = Path(current, yaml_file)
            self.iteration += 1
        else:
            raise NearestFileNotFound(
                f"Unable to find {yaml_file} in the nearby directories after {self.max_iter} "
                "iterations upwards.")

Example #6

0

Show file

File: system.py Project: jflairie/sheetwork

def make_dir(path: "Path"):
    """Creates a directory.

    Args:
        path (Path): Where you want it to be.
    """
    logger.debug(f"Making folder: {path}")
    path.mkdir()

Example #7

0

Show file

 def run(self):
     self.load_sheet()
     if self.push_anyway:
         self.push_sheet()
         self.check_table()
     else:
         logger.info(
             yellow("Nothing pushed since you were in --dry_run mode."))

Example #8

0

Show file

 def authenticate(self) -> None:
     if self.is_service_account:
         logger.debug("Using SERVICE_ACCOUNT auth")
         self.google_client = gspread.service_account(self.creds_path)
     else:
         logger.debug("Using END_USER auth")
         # ! This override should be temporary ideally we'll have a more long term solution in:
         # ! https://github.com/burnash/gspread/issues/826
         self._override_gspread_default_creds()
         self.google_client = gspread.oauth()
     self.is_authenticated = True

Example #9

0

Show file

File: system.py Project: jflairie/sheetwork

def make_file(path: "Path", contents: str = str()):
    """Creates a text file with potential things in it. WOW!

    Args:
        path (Path): Where you want it to be
        contents (str, optional): What you want to put in that text file. Defaults to str().
    """
    logger.debug(f"Making file: {path}")
    path.touch()
    if contents:
        with path.open("w", encoding="utf-8") as f:
            f.write(contents)

Example #10

0

Show file

    def create_google_dir_and_file(self):
        self.google_path = self.profiles_path / "google"
        google_file = self.google_path / f"{self.project_name}.json"

        if not self.google_path.exists():
            make_dir(self.google_path)
        else:
            logger.debug(f"{self.google_path} already exists.")

        if not google_file.exists():
            make_file(google_file)
        else:
            logger.debug(f"{google_file} already exists.")

Example #11

0

Show file

    def run_cleanup(self,
                    df: pandas.DataFrame) -> Tuple[bool, pandas.DataFrame]:
        clean_up = True
        # check for interactive mode
        if self.flags.interactive:
            logger.info(
                yellow(
                    "PRE-CLEANING PREVIEW: The DataFrame you would push to the database would look like this:"
                ))
            self._show_dry_run_preview(df)
            clean_up = self._collect_and_check_answer()

        if clean_up is True:
            logger.debug("Performing clean ups")
            clean_df = SheetCleaner(
                df,
                bool(self.config.sheet_config.get("snake_case_camel",
                                                  False))).cleanup()
            if self.flags.dry_run or self.flags.interactive:
                logger.info(yellow("\nPOST-CLEANING PREVIEW:"))
                self._show_dry_run_preview(clean_df)
                carry_on = self._collect_and_check_answer(post_cleanup=True)
                if not carry_on:
                    logger.info(timed_message(red("User Aborted.")))
                    sys.exit(1)
            return True, clean_df
        return True, df

Example #12

0

Show file

 def push_sheet(self):
     logger.info(timed_message("Pushing sheet to database..."))
     logger.debug(
         f"Column override dict is a {type(self.config.sheet_columns)}")
     logger.debug(f"Sheet columns: {self.config.sheet_columns}")
     logger.debug(f"Columns in final df: {self.sheet_df.columns.tolist()}")
     self.sql_adapter.upload(self.sheet_df, self.target_schema)

Example #13

0

Show file

    def create_project_dir(self):
        project_dir = self.project_path / f"{self.project_name}"
        if not project_dir.exists():
            make_dir(project_dir)
            self.project_dir_is_created = True
        elif self.flags.force_credentials:
            logger.warn(f"{self.project_name} already exists, moving on to credential files.")
        else:
            raise ProjectIsAlreadyCreated(
                f"""\n
                {self.project_name} already exists, so we'll stop.
                If you created it by mistake, delete it and run this again.

                If you want to generate the profiles and credentials files only use
                --force-credentials-folders CLI arguments (see help for more info).
                """
            )

Example #14

0

Show file

File: impl.py Project: jflairie/sheetwork

 def _create_schema(self) -> None:
     if self._has_connection is False:
         raise NoAcquiredConnectionError(
             f"No acquired connection for {type(self).__name__}. Make sure you call "
             "`acquire_connection` before.")
     try:
         if self.config.project.object_creation_dct["create_schema"]:
             schema_exists = (True if self.config.target_schema
                              in self.con.dialect.get_schema_names(
                                  self.con) else False)
             if schema_exists is False:
                 logger.debug(
                     yellow(
                         f"Creating schema: {self.config.target_schema} in {self._database}"
                     ))
                 self.con.execute(CreateSchema(self.config.target_schema))
     except Exception as e:
         raise DatabaseError(str(e))

Example #15

0

Show file

 def _collect_and_check_answer(post_cleanup: bool = False):
     acceptable_answers = ["y", "n", "a"]
     user_input = str()
     while user_input not in acceptable_answers:
         if user_input is not None:
             logger.info("Choose 'y':yes, 'n':no, 'a':abort'")
         if post_cleanup:
             user_input = input("Would you like to push to db? (y/n):")
         else:
             user_input = input(
                 "Would you like to perform cleanup? (y/n/a): ")
     if user_input.lower() == "y":
         return True
     if user_input.lower() == "n":
         return False
     if user_input.lower() == "a":
         logger.info(red("User aborted."))
         sys.exit(1)

Example #16

0

Show file

    def __init__(self, project: Project, target_name: str = str()):
        """Profile constructor. Mainly just needs an initted Project object.

        Args:
            project (Project): initted project object
            target_name (str, optional): Mainly used in unit testing if you want to override the
                project name. Pretty useless in all other practice cases I think.
                Defaults to str().
        """
        self.profile_name = project.project_name
        self.target_name = target_name
        self.profile_dict: Dict[str, str] = dict()
        self.cannot_be_none = {"db_type", "guser"}
        self.profile_dir: Path = project.profile_dir
        self.google_credentials_dir = Path(project.profile_dir,
                                           "google").resolve()
        self.read_profile()
        logger.debug(f"PROFILE_DIR {self.profile_dir}")
        logger.debug(f"PROFILE_NAME: {self.profile_name}")

Example #17

0

Show file

File: project.py Project: jflairie/sheetwork

    def override_object_creation_from_flags(self) -> None:
        if self.flags.create_table:
            logger.debug(yellow("going to create table"))
            self.object_creation_dct.update({"create_table": True})

        if self.flags.create_schema:
            logger.debug(yellow("going to create schema"))
            self.object_creation_dct.update({"create_schema": True})
        logger.debug(yellow(f"Object creation dict after override\n {self.object_creation_dct}"))

        if self.flags.destructive_create_table:
            logger.debug(yellow("going to perform destuctive table creation"))
            self.destructive_create_table = True

Example #18

0

Show file

File: utils.py Project: jflairie/sheetwork

def cast_pandas_dtypes(
    df: pandas.DataFrame, overwrite_dict: dict = dict()) -> pandas.DataFrame:
    """Converts a dataframe's columns along a provided dictionary of {col: dype}.

    Args:
        df (pandas.DataFrame): dataframe to cast.
        overwrite_dict (dict, optional): Dict of shate {column: dtype}. Defaults to dict().

    Raises:
        UnsupportedDataTypeError: When a dtype isn't currently supported (see dtypes_map inside function).
        ColumnNotFoundInDataFrame: When a column that is required for casting isn't found.

    Returns:
        pandas.DataFrame: df with converted dtypes
    """
    overwrite_dict = overwrite_dict.copy()
    dtypes_map = dict(
        varchar="object",
        # this is intentional in case of nulls. currently pandas doesn't play well with converting mixed types
        # see https://github.com/bastienboutonnet/sheetwork/issues/204 for more details
        int="object",
        numeric="float64",
        # ! HOT_FIX
        # this is intentional pandas
        # see https://github.com/bastienboutonnet/sheetwork/issues/288
        boolean="boolean",
        timestamp_ntz="datetime64[ns]",
        date=
        "datetime64[ns]",  # this intentional pandas doesn't really have just dates.
    )

    # Check for type support
    unsupported_dtypes = set(overwrite_dict.values()).difference(
        dtypes_map.keys())
    if unsupported_dtypes:
        raise UnsupportedDataTypeError(
            f"{unsupported_dtypes} are currently not supported")

    # check overwrite col is in df
    invalid_columns = set(overwrite_dict.keys()).difference(
        set(df.columns.tolist()))
    if invalid_columns:
        raise ColumnNotFoundInDataFrame(
            f"{invalid_columns} not in DataFrame. Check spelling?")

    # recode dict in pandas terms
    for col, data_type in overwrite_dict.items():
        overwrite_dict.update({col: dtypes_map[data_type]})

    # cast
    logger.debug(f"DF BEFORE CASTING: {df.head()}")
    logger.debug(f"DF BEFORE CASTING DTYPES: {df.dtypes}")

    # handle boolean "manually" because .astype(bool) leads to everythin being true if not null.
    df = handle_booleans(df, overwrite_dict=overwrite_dict)
    # use pandas native function for all other data types as they are not problematic and we have
    # already handled booleans specificatlly.
    df = df.astype(overwrite_dict)
    logger.debug(f"Head of cast dataframe:\n {df.head()}")
    return df

Example #19

0

Show file

File: project.py Project: jflairie/sheetwork

 def decide_object_creation(self) -> None:
     self.handle_deprecations()
     create_everything_label = "always_create_objects"
     object_creation_mapping = {
         # ! DEPRECATE "always_create"
         "create_table": ["always_create_table", "always_create"],
         "create_schema": ["always_create_schema"],
     }
     for object_type, rule in object_creation_mapping.items():
         if self.project_dict.get(create_everything_label):
             create = [True]
         else:
             create = [True for x in rule if self.project_dict.get(x) is True]
         self.object_creation_dct.update({object_type: True in create})
     self.destructive_create_table = (
         True
         if self.project_dict.get("destructive_create_table", self.destructive_create_table)
         is True
         else False
     )
     logger.debug(yellow(f"Object creation dict:\n {self.object_creation_dct}"))
     logger.debug(yellow(str(self.project_dict)))

Example #20

0

Show file

File: utils.py Project: jflairie/sheetwork

def check_columns_in_df(
    df: pandas.DataFrame,
    columns: Union[List[str], str],
    warn_only: bool = False,
    suppress_warning: bool = False,
) -> Tuple[bool, List[str]]:
    """Checks if a bunch of columns are present in a dataframe.

    Args:
        df (pandas.DataFrame): df to check.
        columns (Union[List[str], str]): column names to check for.
        warn_only (bool, optional): When True will only warn otherwise raises. Defaults to False.
        suppress_warning (bool, optional): When true warning isn't shown only return. Defaults to False.

    Raises:
        ColumnNotFoundInDataFrame: If warn_only is False, this error will be raised when any of the
            columns to check for are not present in the dataframe.

    Returns:
        Tuple[bool, List[str]]: Boolean if all columns are present in df, List of missing columns.
    """
    if isinstance(columns, str):
        columns = [columns]
    is_subset = set(columns).issubset(df.columns)
    if is_subset:
        return True, columns
    # else reduce columms, provide filtered list set bool to false and warn or raise
    cols_not_in_df = [x for x in columns if x not in df.columns.tolist()]
    reduced_cols = [x for x in columns if x in df.columns.tolist()]
    message = f"The following columns were not found in the sheet: {cols_not_in_df} "
    if warn_only and not suppress_warning:
        logger.warning(
            yellow(
                message +
                "they were ignored. Consider cleaning your sheets.yml file"))
    elif not warn_only and not suppress_warning:
        raise ColumnNotFoundInDataFrame(
            message + "Google Sheet or sheets.yml needs to be cleaned")
    return False, reduced_cols

Example #21

0

Show file

    def run(self):
        # print something cos it's fun!
        print(
            r"""
           ______           __                  __
          / __/ /  ___ ___ / /__    _____  ____/ /__
         _\ \/ _ \/ -_) -_) __/ |/|/ / _ \/ __/  '_/
        /___/_//_/\__/\__/\__/|__,__/\___/_/ /_/\_\\
        """
        )
        logger.info("Alright let's get to work")
        logger.info("❤️ Taking peanut butter and jelly out of the cupboard 🍇")
        time.sleep(3)

        # do the actual work people cared about in the first place.
        self.assert_project_name()
        self.override_paths()
        self.create_project_dir()
        self.create_project_file()
        self.create_profiles_dir()
        self.create_profiles_file()
        self.create_google_dir_and_file()
        self.show_complete()

Example #22

0

Show file

File: impl.py Project: jflairie/sheetwork

 def check_table(self, target_schema: str, target_table: str) -> None:
     columns_query = f"""
             select count(*)
             from {self._database}.information_schema.columns
             where table_catalog = '{self._database.upper()}'
             and table_schema = '{target_schema.upper()}'
             and table_name = '{target_table.upper()}'
             ;
             """
     rows_query = rows_query = f"select count(*) from {target_schema}.{target_table}"
     columns = self.excecute_query(columns_query, return_results=True)
     rows = self.excecute_query(rows_query, return_results=True)
     if columns and rows:
         logger.info(
             timed_message(
                 green(
                     f"Push successful for "
                     f"{self._database}.{target_schema}.{target_table} \n"
                     f"Found {columns[0][0]} columns and {rows[0][0]} rows."
                 )))
     else:
         raise TableDoesNotExist(
             f"Table {self._database}.{target_schema}.{target_table} seems empty"
         )

Example #23

0

Show file

File: project.py Project: jflairie/sheetwork

    def __init__(self, flags: FlagParser) -> None:
        """Constructs project object.

        Args:
            flags (FlagParser): Inited flags object.
        """
        self.project_dict: Dict[str, Union[str, bool]] = dict()
        self.target_schema: str = str()
        self.object_creation_dct: Dict[str, bool] = dict()
        self.destructive_create_table: bool = False
        self.flags = flags

        # directories (first overwritten by flags, then by project) This may not always be able to
        # be like this we might wanna give prio to CLI but for now this removes some complication.
        self.project_file_fullpath: Path = Path("dumpy_path")
        self.profile_dir: Path = Path("~/.sheetwork/").expanduser()
        self.sheet_config_dir: Path = Path.cwd()

        # override defaults
        self.override_paths_from_flags()
        self.load_project_from_yaml()
        self.decide_object_creation()
        self.override_object_creation_from_flags()
        logger.debug(f"Project name: {self.project_name}")

Example #24

0

Show file

File: utils.py Project: jflairie/sheetwork

def deprecate(message: str, colour: str = "yellow") -> None:
    """Handles deperecation messages more using proper DeprecationWarnings.

    It also makes sure deprecatio warnings are enabled globally as certain shells might have them
    turned off by default.

    Args:
        message (str): Deprecation message to print.
        colour (str, optional): Colour name to wrap the decprecation message. For now only "yellow",
            "red" or None are supported. Defaults to "yellow".
    """
    global DEPRECATION_WARNINGS_ENABLED, _WARNINGS_ALREADY_ENABLED

    if colour == "yellow":
        _message = yellow(message)
    elif colour == "red":
        _message = red(message)
    elif colour is None:
        _message = message
    else:
        logger.error(
            f"{colour} is not supported, painting error mesage 'yellow'")
        _message = yellow(colour)

    if DEPRECATION_WARNINGS_ENABLED and not _WARNINGS_ALREADY_ENABLED:
        _WARNINGS_ALREADY_ENABLED = True
        warnings.filterwarnings("default",
                                ".*",
                                category=DeprecationWarning,
                                module="gspread_pandas")
    if _WARNINGS_ALREADY_ENABLED and not DEPRECATION_WARNINGS_ENABLED:
        warnings.filterwarnings("ignore",
                                ".*",
                                category=DeprecationWarning,
                                module="gspread_pandas")
    warnings.warn(_message, DeprecationWarning, stacklevel=2)

Example #25

0

Show file

 def make_df_from_worksheet(self,
                            worksheet_name: str = str(),
                            grab_header: bool = True) -> pandas.DataFrame:
     if not self.workbook:
         raise NoWorkbookLoadedError(
             "Workbook object seems empty, cannot turn a None object into a dataframe"
         )
     try:
         if worksheet_name:
             worksheet = self.workbook.worksheet(worksheet_name)
         else:
             worksheet_name = "default sheet"
             worksheet = self.workbook.get_worksheet(0)
         logger.debug(green("Sheet loaded successfully"))
         if grab_header:
             values: List[Any] = worksheet.get_all_values()
             check_dupe_cols(values[0])
             df = pandas.DataFrame(values[1:], columns=values[0])
         else:
             df = pandas.DataFrame(worksheet.get_all_values())
         logger.debug(yellow(f"Raw obtained google sheet: \n {df.head()}"))
         return df
     except Exception as e:
         raise SheetLoadingError(f"Error loading sheet: \n {e}")

Example #26

0

Show file

File: impl.py Project: jflairie/sheetwork

    def upload(self, df: pandas.DataFrame,
               override_schema: str = str()) -> None:
        # cast columns
        # !: note integer conversion doesn't actually happen it is left as a str see #204, #205
        df = cast_pandas_dtypes(df, overwrite_dict=self.config.sheet_columns)
        dtypes_dict = self.sqlalchemy_dtypes(self.config.sheet_columns)

        # potentially override target schema from config.
        if override_schema:
            schema = override_schema
        else:
            schema = self.config.target_schema

        # write to csv and try to talk to db
        temp = tempfile.NamedTemporaryFile()
        df.to_csv(temp.name, index=False, header=False, sep="|")

        self.acquire_connection()

        # set up schema creation
        self._create_schema()

        try:
            # set the table creation behaviour
            _if_exists = "fail"
            if self.config.project.object_creation_dct["create_table"] is True:
                if self.config.project.destructive_create_table:
                    _if_exists = "replace"

                # perform the create ops
                try:
                    df.head(0).to_sql(
                        name=self.config.target_table,
                        schema=schema,
                        con=self.con,
                        if_exists=_if_exists,
                        index=False,
                        dtype=dtypes_dict,
                    )

                # if _if_exists is fail pandas will throw a ValueError which we want to escape when
                # destructive_create_table is set to False (or not provided) and throw a warning instead.
                except ValueError as e:
                    if _if_exists == "fail":
                        logger.warning(
                            yellow(
                                f"{self._database}"
                                f".{schema}.{self.config.target_table} already exists and was not\n"
                                "recreated because 'destructive_create_table' is set to False in your profile \n"
                                "APPENDING instead."))
                    else:
                        raise DatabaseError(str(e))

            # Now push the actual data --the pandas create above is only for creation the logic below
            # is actually faster as pandas does it row by row
            qualified_table = (
                f"{self._database}.{self.config.target_schema}.{self.config.target_table}"
            )
            self.con.execute(f"""
                create or replace temporary stage {self.config.target_table}_stg
                file_format = (type = 'CSV' field_delimiter = '|'
                skip_header = 0 field_optionally_enclosed_by = '"')
                """)
            self.con.execute(
                f"put file://{temp.name} @{self.config.target_table}_stg")
            self.con.execute(
                f"copy into {qualified_table} from @{self.config.target_table}_stg"
            )
            self.con.execute(f"drop stage {self.config.target_table}_stg")
        except Exception as e:
            raise DatabaseError(str(e))
        finally:
            logger.debug("CLOSING CONNECTION & CLEANING TMP FILE")
            temp.close()
            self.close_connection()

Example #27

0

Show file

 def create_profiles_dir(self):
     if not self.profiles_path.exists():
         make_dir(self.profiles_path)
     else:
         logger.debug(f"{self.profiles_path} already exists.")

Example #28

0

Show file

    def load_sheet(self):
        """Loads a google sheet, and calls clean up steps if applicable.

        Sheet must have been shared with account admin email address used in storage.

        Raises:
            TypeError: When loader does not return results that can be converted into a pandas
            DataFrame a type error will be raised.
        """
        if self.flags.sheet_name:
            logger.info(timed_message(f"Importing: {self.flags.sheet_name}"))
            logger.debug(
                f"Importing data from: {self.config.sheet_config['sheet_key']}"
            )
        else:
            logger.info(
                timed_message(
                    f"Importing data from: {self.config.sheet_config.get('sheet_key')}"
                ))
        df = self._obtain_googlesheet()
        if not isinstance(df, pandas.DataFrame):
            raise TypeError("import_sheet did not return a pandas DataFrame")
        logger.debug(f"Columns imported from sheet: {df.columns.tolist()}")

        # Perform exclusions, renamings and cleanups before releasing the sheet.
        df = self.exclude_columns(df)
        df = self.rename_columns(df)
        self.push_anyway, df = self.run_cleanup(df)
        logger.debug(f"Columns after cleanups and exclusions: {df.columns}")
        logger.debug(f"Loaded SHEET HEAD: {df}")
        self.sheet_df = df

Example #29

0

Show file

 def create_profiles_file(self):
     profile_file = Path(self.profiles_path, "profiles").with_suffix(".yml")
     if not profile_file.exists():
         make_file(profile_file)
     else:
         logger.debug(f"{profile_file} already exists.")