Esempio n. 1
0
class TestLoadStateODBC(unittest.TestCase, LoadStateBase):
    def setUp(self):
        self.load_state = LoadState(OXDB(DB_CONNECTION_NAME).connection,
                                    variable_name=VARIABLE_NAME)

    def tearDown(self):
        self.load_state.delete()
Esempio n. 2
0
    def main(self):
        """
        Run for the configured table to load.
        """
        try:
            self.logger.info("START Create: %s;", self.tmp_file)
            if self.config.get('permutations'):
                self._write_permutations()
            else:
                self._write_options()
            self.logger.info("FINISH Create: %s", self.tmp_file)

            self._load_options()

            LoadState(self.oxdb.connection,
                      variable_name=LOAD_STATE_TEMPLATE %
                      self.table_name.upper()).upsert(commit=True)
        except Exception as error:
            self.logger.error("Options Loader failed for %s. Error %s",
                              self.table_name.upper(), error)
            self.oxdb.rollback()
            raise Exception("%s options loader failed. Error %s" %
                            (self.table_name.upper(), error))
        finally:
            try:
                self.oxdb.close()
            except Exception:
                pass
 def update_load_state(self):
     """
     Update load_state and commit.
     """
     LoadState(self.oxdb.connection,
               CONFIG['LOAD_STATE_VAR']).upsert(commit=True)
     LOGGER.info("Updated and Committed load_state variable for %s",
                 CONFIG['LOAD_STATE_VAR'])
Esempio n. 4
0
 def update_load_state(self, dataset):
     """
     Update loadstate(s) if needed.
     """
     # If we are not a republish the update the load_state
     if not self.is_republish(dataset):
         variable_value = readable_interval_datetime(
             dataset.meta['readableInterval'])
         self.job.logger.info("DATASET %s(%s) is loaded", variable_value,
                              dataset.serial)
         self.job.load_state.upsert(variable_value)
         # If any, then set the AUXILIARY_LOAD_STATE_VARS
         for variable_name in self.job.config.get(
                 'AUXILIARY_LOAD_STATE_VARS', []):
             LoadState(self.job.dbh, variable_name).upsert(variable_value)
     else:
         # Increment grid_fact_reload_version load state variable
         LoadState(self.job.dbh, GRID_FACT_RELOAD_VERSION).increment_by_seq(
             SEQ_GRID_FACT_RELOAD_VERSION)
Esempio n. 5
0
 def __init__(self, name, dbh, logger, options=None):
     self.name = name
     self.dbh = dbh
     self.logger = logger
     self.config = get_conf(self.name)
     self.feed = Feeds(ODFI_CONF)[self.feed_name]
     self.load_state = LoadState(
         self.dbh, variable_name=self.config['LOAD_STATE_VAR'])
     self.options = options
     self._depends_on = None
Esempio n. 6
0
 def load_tmp_file(self):
     """
     Will run the SQL statements to merge any new data into the DW.
     """
     with OXDB('SNOWFLAKE') as oxdb:
         for stmt in STATEMENTS:
             stmt = stmt.format(self.temp_file)
             self.logger.info("Executing:%s;", stmt)
             self.logger.info("Results:%s;", oxdb.execute(stmt))
         self.logger.info("Setting load state '%s'", LOAD_STATE_NAME)
         LoadState(oxdb.connection,
                   variable_name=LOAD_STATE_NAME).upsert(commit=True)
Esempio n. 7
0
    def __call__(self, max_datasets=None, since_serial=None):
        """
        Iterate through the datasets and process.
        :param max_datasets: Optional. ONLY used for TESTING.
        :param since_serial: Optional. ONLY used for TESTING.
        NOTE: Above optional parameters should only be used for testing
              as they can break the upload logic leading to data issues
              downstream.
        """
        if since_serial is None:
            since_serial = self.get_since_serial()
        self.job.logger.debug("SINCE_SERIAL: %s;", since_serial)

        last_dataset = self.job.get_current_dataset()

        loaded_datasets = self.get_serials_since(since_serial)
        self.job.logger.debug("LOADED_DATASETS: %s;", loaded_datasets)

        for dataset in self.job.feed.get_datasets_since_serial(
                since_serial, max_datasets=max_datasets,
                sort_key=UPLOAD_SORT_KEY):

            if dataset.serial in loaded_datasets:
                self.job.logger.info(
                    "Dataset serial %s is already loaded, skipping.",
                    dataset.serial)
                continue

            if self.is_correct_interval(last_dataset, dataset):
                last_dataset = dataset
                start_time = datetime.utcnow()
                if self.upload(dataset):
                    self.record_state(dataset, start_time)
                    self.queue_rev_adjustment(dataset.readable_interval,
                                              self.is_republish(dataset))
                    self.queue_rollup(dataset.readable_interval)
                    continue
                else:
                    # Upload did not occur.
                    return False
            else:
                # GAP detected stop the loop
                return self.has_data

        if self.has_data:
            LoadState(self.job.dbh,
                      self.load_state_serial_name).upsert(last_dataset.serial,
                                                          commit=True)

        return self.has_data
Esempio n. 8
0
def _update_load_state(db, rollup_config, time_rollup, last_hour):
    # Update load_state only if last_hour is <= source data's last hour or if
    #  we don't have source data's last hour
    if 'source_load_state_variable_name' in time_rollup and \
                    'load_state_variable_name' in time_rollup:
        src_load_state_val = [
            row for row in db.cursor().execute(
                rollup_config['QUERY_LOAD_STATE'],
                tuple(time_rollup['source_load_state_variable_name']))
        ]

        if src_load_state_val:
            LoadState(
                db, time_rollup['load_state_variable_name']).upsert(last_hour)
Esempio n. 9
0
def bootstrap(options, dbh):
    """
    Before you can run you need to initialize the load_state for a given
    job_name/feed_name.
    :param options: NamedTuple including options defined for this action in
                    .actions.py
    """
    job = Job(options.job_name, dbh, get_logger(NAME, debug=options.debug))
    proceed = 'n'
    if job.load_state.variable_value:
        while True:
            proceed = str(
                input("%s already exists with value %s. "
                      "Okay to Update to new values?(Y/n) " %
                      (job.load_state.variable_name,
                       job.load_state.variable_value))).lower() or 'y'
            if proceed in ('y', 'n'):
                if proceed == 'n':
                    sys.exit(0)
                break

    now = datetime.utcnow()

    downloader = Downloader(job)
    uploader = Uploader(job)

    # Create if not exists stage and status table
    downloader.create_stage()
    downloader.create_status_table()
    uploader.create_status_table()

    # Add new data to the DB
    try:
        min_dataset = job.feed.get_min_dataset(options.readable_interval_str)
        job.load_state.upsert(
            readable_interval_datetime(options.readable_interval_str))
        downloader.add_status(min_dataset, now)
        LoadState(job.dbh,
                  uploader.load_state_serial_name).upsert(min_dataset.serial)

        action = 'Updated' if proceed == 'y' else 'Added'
        sys.stderr.write("%s load_state:%s;%s\n" %
                         (action, job.load_state.variable_name,
                          job.load_state.variable_value))
    except NoDataSetException as error:
        job.logger.error(error)
        print('ERROR', error)
Esempio n. 10
0
 def sync_table(self):
     """
     Performs the defined SQL for the specific table(s).
     """
     for sync_mode in ['DESTALE', 'DELETE', 'INSERT']:
         statements = self.config.get(sync_mode)
         if not isinstance(statements, (list, tuple)):
             statements = [statements]
         for statement in statements:
             statement %= {
                 'DEST_SCHEMA': DB_NAMESPACE,
                 'SRC_TABLE': self.table_name_raw
             }
             self.logger.debug(statement)
             self.dest_db.execute(statement)
     LoadState(self.dest_db.connection,
               variable_name='%s_updated' %
               self.table_name).upsert(commit=True)
 def setUp(self):
     self.load_state = LoadState(OXDB(DB_CONNECTION_NAME).connection,
                                 variable_name=VARIABLE_NAME)
Esempio n. 12
0
 def update_load_state(self, var_name):
     LoadState(self.job.dbh,
               variable_name=var_name).update_variable_datetime(
                   variable_value=self.revenue_job["jq_utc_timestamp"])
 def setUp(self):
     self.load_state = LoadState(sqlite3.connect(DB_FILE),
                                 variable_name=VARIABLE_NAME)
Esempio n. 14
0
def run(config):
    """
    Required in the config are:
        STATEMENTS: Ordered list of SQL statements to run.
    Optional:
        DW_NAME: Data warehouse name.
        Required either in config file or sql_runner argument, -d (--dw_name).
        APP_NAME: This is used for the logger.
              Defaults to LOAD_STATE_VAR or sql_runner
        LOAD_STATE_VAR: If present will update this load state var.
    This will fail at the first statement that fails and will not continue.
    Be sure the use local temporary or temporary tables as there is no clean up.
    """
    job_name = config.get('APP_NAME', APP_NAME)
    logger = get_etl_logger(job_name)
    try:
        with pid.PidFile(pidname="%s.LOCK" % job_name,
                         piddir=LOCK_ROOT,
                         enforce_dotpid_postfix=False) as p_lock:
            logger.info("-------------------------------")
            logger.info("Running %s application with process id: %d", job_name,
                        p_lock.pid)
            logger.info("Starting %s for load_state_variable %s", job_name,
                        config.get('LOAD_STATE_VAR'))
            if sys.stdout.isatty():
                sys.stderr.write("Logging all output to %s\n" %
                                 logger.handlers[0].baseFilename)
            logger.info("Connecting to %s", config.get('DW_NAME'))
            with OXDB(config.get('DW_NAME')) as oxdb:
                size = len(config.get('STATEMENTS'))
                # Set dynamic variables
                for key, val in config.get('VARIABLES').items():
                    if str(val).lower().startswith('select '):
                        val %= config.get('VARIABLES')
                        config['VARIABLES'][key], = \
                            oxdb.get_executed_cursor(val).fetchone()
                for index, statement in enumerate(config.get('STATEMENTS'),
                                                  start=1):
                    statement %= config.get('VARIABLES')
                    logger.info("STATEMENT(%s/%s) %s;", index, size, statement)
                    cursor = oxdb.get_executed_cursor(statement)
                    if str(statement).lower().startswith('select '):
                        writer = \
                            csv.writer(
                                sys.stdout,
                                delimiter=config.get('FIELD_SEP', DEFAULT_FIELD_SEP))
                        if config.get('HEADERS', False):
                            writer.writerow(col[0]
                                            for col in cursor.description)
                        for row in cursor:
                            writer.writerow(row)
                    else:
                        cursor.execute(statement)
                if config.get('LOAD_STATE_VAR') is not None:
                    logger.info("SETTING %s in load_state.",
                                config.get('LOAD_STATE_VAR'))
                    LoadState(
                        oxdb.connection,
                        variable_name=config.get('LOAD_STATE_VAR')).upsert()
            logger.info("Completed %s for load_state_variable %s", job_name,
                        config.get('LOAD_STATE_VAR'))
    except (pid.PidFileAlreadyRunningError, pid.PidFileAlreadyLockedError):
        logger.warning("Unable to get lock for %s application. Exiting...",
                       job_name)
    except Exception as err:
        logger.error("Application %s FAILED. ERROR %s", job_name, err)
        raise Exception("Application %s FAILED. ERROR %s" % (job_name, err))