Beispiel #1
0
 def execute(self, method, retries):
     test = False
     cont = 1
     while test is False:
         test = True
         try:
             return getattr(self, method)()
         except Exception as e:
             Log.Instance().appendFinalReport("THREAD EXCEPTION at " +
                                              method + ", " + self.name +
                                              ": " + str(e) + ", type: " +
                                              str(type(e)) +
                                              ", RETRYING (" +
                                              str(retries) +
                                              " times), retry: " +
                                              str(cont))
             test = False
             cont += 1
             if cont > retries:
                 message = "<<< Tried " + str(retries) +\
                           " times and failed, ABORTING THREAD! >>>\n\n"
                 message += self.name + " - " + str(e) + ", type: " + str(
                     type(e))
                 message += JobsExecutionInfo.Instance().report()
                 SystemExiter.Instance().exit(message, isThread=True)
             else:
                 time.sleep(60)
Beispiel #2
0
 def _config_job_variables(self):
     Config.JOBS_NAMES = []
     if self.args.jobs:
         Config.JOBS_NAMES = self.args.jobs.split(',')
         if len(Config.JOBS_NAMES) == 0:
             SystemExiter.Instance().exit("Error: If you use -j, you must choose a job to run!")
     else:
         Config.RUN_JOBS = True
 def _validate_jobs(self):
     Log.Instance().appendFinalReport('\nValidating kpis/jobs...\n')
     for job in list(self.loaded_jobs.keys()):
         if self.loaded_jobs[job].action == 'insert' and self.loaded_jobs[
                 job].table_name is None:
             SystemExiter.Instance().exit(
                 'Error: ' + job + ' needs table name to insert data')
     if len(Config.JOBS_NAMES) == 0:
         all_final_jobs = [
             job_name for job_name in list(self.loaded_jobs.keys())
             if self.loaded_jobs[job_name].is_kpi()
         ]
         if Config.RUN_JOBS:
             Config.JOBS_NAMES += [job_name for job_name in all_final_jobs]
     for job_name in Config.JOBS_NAMES:
         if job_name not in self.loaded_jobs:
             if Config.RUN_JOBS:
                 SystemExiter.Instance().exit(
                     'Error: ' + job_name +
                     ' not found in jobs definitions')
             else:
                 SystemExiter.Instance().exit(
                     'Error: ' + job_name +
                     ' not found in jobs definitions ')
Beispiel #4
0
 def _config_general_variables(self):
     Config.DEBUG_MODE = self.args.debug
     Config.VERBOSE_MODE = self.args.verbose
     Config.PARTIAL_RUN = self.args.partial_run
     Config.SEND_REPORT = self.args.report
     Config.SKIP_TO_RESULTS = self.args.skip_to_results
     Config.SKIP_WARMUP = self.args.skip_warmup
     Config.SKIP_INSERT = self.args.skip_insert
     if Config.CURRENT_ENV['label'] == 'prod':
         Config.TEMP_TABLES = self.args.temp_tables
     else:
         Config.TEMP_TABLES = True
     if Config.PARTIAL_RUN and not self.args.jobs:
         SystemExiter.Instance().exit('Error: You can\'t omit param --jobs if you are using --partial-run.')
     if Config.SKIP_TO_RESULTS:
         Config.PARTIAL_RUN = True
     if self.args.temp_label:
         Config.TEMP_TABLES_LABEL = "_" + self.args.temp_label
Beispiel #5
0
 def _execute_load(self):
     execution_info = ETLExecutionInfo("LOAD")
     test = False
     cont = 1
     while test is False:
         test = True
         try:
             Loader.Instance().run()
         except RequestException:
             if cont == 21:
                 SystemExiter.Instance().exit(
                     "<<< Tried 20 times to LOAD and failed, ABORTING! >>>")
             Log.Instance().appendFinalReport(
                 "<<< EXCEPTION RequestException, RETRY! (" + str(cont) +
                 " time)>>>")
             test = False
             cont += 1
             time.sleep(60)
     execution_info.end()
     Log.Instance().appendFinalReport(
         "[LOAD executed in: " +
         str(execution_info.execution_data['value']) + " minutes ]")
     return execution_info.execution_data
Beispiel #6
0
 def _configure_environment(self):
     if self.args.env not in Config.ENVIRONMENTS.keys():
         SystemExiter.Instance().exit("Error: invalid environment! (valid envs: " + str(Config.ENVIRONMENTS.keys()) + ")")
     Config.CURRENT_ENV = Config.ENVIRONMENTS[self.args.env]
    def parse_arguments(self):
        # configure argument parser
        parser = argparse.ArgumentParser(
            description='ETL Ambev')
        ################## DATES ##################
        parser.add_argument(
            '-sd', '--start-date', type=str, default=None, metavar='DATE',
            help='Date (start) to run ETL for (yyyy-mm-dd). Default: Yesterday')
        parser.add_argument(
            '-ed', '--end-date', type=str, default=None, metavar='DATE',
            help='Date (end) to run ETL for (yyyy-mm-dd). Default: Equal to start-date')
        ################## STAGES ##################
        parser.add_argument(
            '-T', '--transform', action='store_true',
            help='Execute the transform step. Note that all needed dumps must have been successfully extracted to MySQL. Default: All steps true')
        parser.add_argument(
            '-L', '--load', action='store_true',
            help='Execute the load step. Note that all needed outputs must have been sucessfully transformed. Default: All steps true')
        ################## CLIENTS, INDICATORS, ETC ##################
        parser.add_argument(
            '-j', '--jobs', type=str, default=None, metavar='JOBS',
            help='Jobs to run, split by comma. Default: all jobs that are kpis')
        ################## OTHERS ##################
        parser.add_argument(
            '-e', '--env', type=str, default='dev', metavar='ENV',
            help="If the env is dev, test or prod. Default: dev")
        parser.add_argument(
            '-f', '--force-config', action='store_true',
            help='Dont ask for configuration confirmation')
        parser.add_argument(
            '-o', '--output', type=str, default=None, metavar='OUTPUT',
            help='Output files directory. Default: HOME/etl4-data/')
        parser.add_argument(
            '-p', '--partial-run', action="store_true",
            help="Boolean flag, run ETL skipping job dependencies")
        parser.add_argument(
            '-sr', '--skip-to-results', action="store_true",
            help="Boolean flag, skip jobs to just get the results.")
        parser.add_argument(
            '-v', '--verbose', action="store_true",
            help="Boolean flag, prints all the queries and their first results.")
        parser.add_argument(
            '-d', '--debug', action="store_true",
            help="Boolean flag, debug mode that get all queries results (including non-kpis queries). WARNING! Debug mode is slower than normal mode and consumes a lot of memory!")
        parser.add_argument(
            '-r', '--report', action="store_true",
            help="Boolean flag, send a report by mail after the execution")
        parser.add_argument(
            '-t', '--temp-tables', action="store_true",
            help="Boolean flag, run queries on temp tables, in order to don't overwrite the main tables. If env is not prod, it is always True.")
        parser.add_argument(
            '-l', '--temp-label', type=str, default='temp', metavar='LABEL',
            help="If using temp tables, sets the label for the temp tables. Default: _temp")
        parser.add_argument(
            '-sw', '--skip-warmup', action="store_true",
            help="Boolean flag, skip warmup. Useful when running ETL for old dumps.")
        parser.add_argument(
            '-si', '--skip-insert', action="store_true",
            help="Boolean flag, skip insert into table data. Useful when reprocess.")
        parser.add_argument(
            '-opq', '--only-print-query', action="store_true",
            help="Boolean flag, only print query used to run ETL. Useful when debug.")


        # parse arguments
        args = parser.parse_args()

        # check dates
        if args.start_date:
            args.start_date = self._check_date_format(args.start_date)
        else:
            args.start_date = date.today() - timedelta(days=1)

        if args.end_date:
            args.end_date = self._check_date_format(args.end_date)
        else:
            args.end_date = args.start_date

        if args.start_date > args.end_date:
            SystemExiter.Instance().exit('Error: Start date (' + str(args.start_date) +
                                         ') must be smaller than or equal end date (' +
                                         str(args.end_date) + ').')

        if not args.transform and not args.load:
            args.transform = True
            args.load = True

        return args
 def _check_date_format(self, date):
     try:
         return datetime.strptime(date, "%Y-%m-%d").date()
     except ValueError:
         SystemExiter.Instance().exit("Error: argument date: invalid format: '%s' (use yyyy-mm-dd)\n" % date)
Beispiel #9
0
        cont = 1
        while test is False:
            test = True
            try:
                Loader.Instance().run()
            except RequestException:
                if cont == 21:
                    SystemExiter.Instance().exit(
                        "<<< Tried 20 times to LOAD and failed, ABORTING! >>>")
                Log.Instance().appendFinalReport(
                    "<<< EXCEPTION RequestException, RETRY! (" + str(cont) +
                    " time)>>>")
                test = False
                cont += 1
                time.sleep(60)
        execution_info.end()
        Log.Instance().appendFinalReport(
            "[LOAD executed in: " +
            str(execution_info.execution_data['value']) + " minutes ]")
        return execution_info.execution_data


### main part
if __name__ == "__main__":
    run = Run()
    try:
        run.run()
    except Exception as e:
        tb = traceback.format_exc()
        SystemExiter.Instance().exit("Exception - " + str(e) + ", type: " +
                                     str(type(e)) + "\n\n" + str(tb))