Ejemplo n.º 1
0
    def load_sheet(self):
        """Loads a google sheet, and calls clean up steps if applicable.

        Sheet must have been shared with account admin email address used in storage.

        Raises:
            TypeError: When loader does not return results that can be converted into a pandas
            DataFrame a type error will be raised.
        """
        if self.flags.sheet_name:
            logger.info(timed_message(f"Importing: {self.flags.sheet_name}"))
            logger.debug(
                f"Importing data from: {self.config.sheet_config['sheet_key']}"
            )
        else:
            logger.info(
                timed_message(
                    f"Importing data from: {self.config.sheet_config.get('sheet_key')}"
                ))
        df = self._obtain_googlesheet()
        if not isinstance(df, pandas.DataFrame):
            raise TypeError("import_sheet did not return a pandas DataFrame")
        logger.debug(f"Columns imported from sheet: {df.columns.tolist()}")

        # Perform exclusions, renamings and cleanups before releasing the sheet.
        df = self.exclude_columns(df)
        df = self.rename_columns(df)
        self.push_anyway, df = self.run_cleanup(df)
        logger.debug(f"Columns after cleanups and exclusions: {df.columns}")
        logger.debug(f"Loaded SHEET HEAD: {df}")
        self.sheet_df = df
Ejemplo n.º 2
0
 def show_complete(self):
     credentials_message = TO_DO_CREDENTIALS.format(
         to_do_credentials=TO_DO_CREDENTIALS,
         open_cmd=open_dir_cmd(),
         profiles_path=self.profiles_path,
         profiles_doc_url=PROFILE_DOC_URL,
         google_creds_doc_url=GOOGLE_CREDS_DOC_URL,
         project_name=self.project_name,
     )
     if self.project_dir_is_created:
         done_message = INIT_DONE.format(
             project_name=self.project_name,
             project_path=self.project_path,
             profiles_path=self.profiles_path,
             google_path=self.google_path,
             profiles_doc_url=PROFILE_DOC_URL,
             google_creds_doc_url=GOOGLE_CREDS_DOC_URL,
             project_doc_url=PROJECT_DOC_URL,
             sheets_config_doc_url=SHEETS_CONFIG_DOC_URL,
             to_do_credentials=credentials_message,
             open_cmd=open_dir_cmd(),
         )
     else:
         done_message = CREDENTIALS_ONLY_SUCCESS_MESSAGE.format(
             to_do_credentials=credentials_message
         )
     logger.info(green(done_message))
Ejemplo n.º 3
0
 def push_sheet(self):
     logger.info(timed_message("Pushing sheet to database..."))
     logger.debug(
         f"Column override dict is a {type(self.config.sheet_columns)}")
     logger.debug(f"Sheet columns: {self.config.sheet_columns}")
     logger.debug(f"Columns in final df: {self.sheet_df.columns.tolist()}")
     self.sql_adapter.upload(self.sheet_df, self.target_schema)
Ejemplo n.º 4
0
 def run(self):
     self.load_sheet()
     if self.push_anyway:
         self.push_sheet()
         self.check_table()
     else:
         logger.info(
             yellow("Nothing pushed since you were in --dry_run mode."))
Ejemplo n.º 5
0
 def _collect_and_check_answer(post_cleanup: bool = False):
     acceptable_answers = ["y", "n", "a"]
     user_input = str()
     while user_input not in acceptable_answers:
         if user_input is not None:
             logger.info("Choose 'y':yes, 'n':no, 'a':abort'")
         if post_cleanup:
             user_input = input("Would you like to push to db? (y/n):")
         else:
             user_input = input(
                 "Would you like to perform cleanup? (y/n/a): ")
     if user_input.lower() == "y":
         return True
     if user_input.lower() == "n":
         return False
     if user_input.lower() == "a":
         logger.info(red("User aborted."))
         sys.exit(1)
Ejemplo n.º 6
0
    def run_cleanup(self,
                    df: pandas.DataFrame) -> Tuple[bool, pandas.DataFrame]:
        clean_up = True
        # check for interactive mode
        if self.flags.interactive:
            logger.info(
                yellow(
                    "PRE-CLEANING PREVIEW: The DataFrame you would push to the database would look like this:"
                ))
            self._show_dry_run_preview(df)
            clean_up = self._collect_and_check_answer()

        if clean_up is True:
            logger.debug("Performing clean ups")
            clean_df = SheetCleaner(
                df,
                bool(self.config.sheet_config.get("snake_case_camel",
                                                  False))).cleanup()
            if self.flags.dry_run or self.flags.interactive:
                logger.info(yellow("\nPOST-CLEANING PREVIEW:"))
                self._show_dry_run_preview(clean_df)
                carry_on = self._collect_and_check_answer(post_cleanup=True)
                if not carry_on:
                    logger.info(timed_message(red("User Aborted.")))
                    sys.exit(1)
            return True, clean_df
        return True, df
Ejemplo n.º 7
0
    def run(self):
        # print something cos it's fun!
        print(
            r"""
           ______           __                  __
          / __/ /  ___ ___ / /__    _____  ____/ /__
         _\ \/ _ \/ -_) -_) __/ |/|/ / _ \/ __/  '_/
        /___/_//_/\__/\__/\__/|__,__/\___/_/ /_/\_\\
        """
        )
        logger.info("Alright let's get to work")
        logger.info("❤️ Taking peanut butter and jelly out of the cupboard 🍇")
        time.sleep(3)

        # do the actual work people cared about in the first place.
        self.assert_project_name()
        self.override_paths()
        self.create_project_dir()
        self.create_project_file()
        self.create_profiles_dir()
        self.create_profiles_file()
        self.create_google_dir_and_file()
        self.show_complete()
Ejemplo n.º 8
0
 def check_table(self, target_schema: str, target_table: str) -> None:
     columns_query = f"""
             select count(*)
             from {self._database}.information_schema.columns
             where table_catalog = '{self._database.upper()}'
             and table_schema = '{target_schema.upper()}'
             and table_name = '{target_table.upper()}'
             ;
             """
     rows_query = rows_query = f"select count(*) from {target_schema}.{target_table}"
     columns = self.excecute_query(columns_query, return_results=True)
     rows = self.excecute_query(rows_query, return_results=True)
     if columns and rows:
         logger.info(
             timed_message(
                 green(
                     f"Push successful for "
                     f"{self._database}.{target_schema}.{target_table} \n"
                     f"Found {columns[0][0]} columns and {rows[0][0]} rows."
                 )))
     else:
         raise TableDoesNotExist(
             f"Table {self._database}.{target_schema}.{target_table} seems empty"
         )