Example #1
0
 def __init__(self, start_date=None, end_date=None):
     self._start_date = start_date
     self._end_date = end_date
     self.lock = pid.PidFile(pidname="%s.LOCK" % APP_NAME,
                             piddir=LOCK_ROOT,
                             enforce_dotpid_postfix=False)
     self.logger = get_etl_logger(APP_NAME)
    def logger(self):
        """
        Where to output our logging. Defaults to the app name and table name.
        """
        if self._logger is None:
            self._logger = get_etl_logger(self.table_name, LOG_DIR)

        return self._logger
Example #3
0
 def test_None_log_directory(self):
     logger = get_etl_logger(log_name='test',
                             log_directory=None,
                             log_format=None,
                             log_level=NEW_LOG_LEVEL)
     self.assertEqual(
         logger.handlers[0].baseFilename,
         os.path.join(os.path.abspath(LOG_DIRECTORY),
                      '.'.join(['test', 'log'])))
 def setUp(self):
     print('')
     self.extractor = \
         Extractor(
             NAME, SchemaConfig(SCHEMA_CONFIG_FILE),
             get_etl_logger(NAME))
     self.table_name = os.path.splitext(os.path.basename(JSON_FILE))[0]
     with open(JSON_FILE, 'r') as in_json:
         for raw in in_json:
             self.extractor.update_handler(json.loads(raw))
Example #5
0
def get_logger():
    """
    Will look at the known loggers for this package.
    Will return
    """
    for name in [APP_NAME, HARMONIZER_NAME, DEFAULT]:
        logger = logging.Logger.manager.loggerDict.get(name)
        if logger is not None:
            return logger

    return get_etl_logger(DEFAULT, log_directory=None)
Example #6
0
def get_logger(name, debug=False):
    """
    Returns a logger with the appropriate log level.
    """
    logger = get_etl_logger(
        name,
        log_level=logging.DEBUG
        if debug else getattr(logging, ENV.get('LOG_LEVEL', 'INFO')))
    if sys.stdin.isatty():
        sys.stderr.write("Logging to %s\n" % logger.handlers[0].baseFilename)

    return logger
Example #7
0
    def __init__(self, name, conf, queues, schema_config):
        logger = get_etl_logger(name, LOG_DIR)
        super(APIExtractorClient, self).__init__(name,
                                                 conf,
                                                 queues,
                                                 logger=logger)
        self.schema_config = schema_config
        self.lock = pid.PidFile(pidname="%s.LOCK" % APP_NAME,
                                piddir=LOCK_ROOT,
                                enforce_dotpid_postfix=False)

        self._worker = None
Example #8
0
    def __init__(self, schema_file, tables=None):
        # Required
        self.schema_file = schema_file

        # Constructor setup
        self.lock = pid.PidFile(pidname="%s.LOCK" % LOADER_APP_NAME, piddir=LOCK_ROOT,
                                enforce_dotpid_postfix=False)
        self.logger = get_etl_logger(LOADER_APP_NAME, LOG_DIR)
        self.schema_config = SchemaConfig(self.schema_file)

        # Optional
        self.tables = tables
        if not bool(self.tables):
            self.tables = self.schema_config.configured_tables
Example #9
0
    def __init__(self, table_name, config):
        self.table_name = table_name
        self.config = config

        self.tmp_table_name = '_'.join(['tmp', self.table_name])
        self.name = '-'.join([APP_NAME, self.table_name])

        self.lock = pid.PidFile(pidname="%s.LOCK" % self.name,
                                piddir=LOCK_ROOT,
                                enforce_dotpid_postfix=False)
        self.logger = get_etl_logger(self.name)
        self.tmp_file = \
            os.path.join(OUTPUT_DIR, '.'.join([self.table_name, 'csv']))

        # These values are pivoted from the config and used for kwargs.
        self.keys_keys = \
            [list(item.keys())[0] for item in self.config.get('keys', [])]
        self.keys_values = \
            [list(item.values())[0] for item in self.config.get('keys', [])]
        self.values_keys = \
            [list(item.keys())[0] for item in self.config.get('values', [])]
        self.values_values = \
            [list(item.values())[0] for item in self.config.get('values', [])]

        # These items can be overridden in the config if needed but defaults
        # usually work fine.
        self.join_keys = self.config.get('join_keys', self.keys_values)
        self.join_values = self.config.get('join_values', self.values_values)
        self.key_columns = self.config.get('key_columns', self.join_keys)
        self.set_columns = self.config.get('set_columns', self.values_values)

        # These are used for kwargs.
        self.destination_fields = self.keys_values + self.values_values
        self.source_fields = self.keys_keys + self.values_keys
        self.key_sep = self.config.get('key_sep')

        self._kwargs = None
        self._oxdb = None
Example #10
0
"""
Re-exporting data from the API DB.
"""
import sys
import psycopg2
import pid
from progressbar import ProgressBar, widgets, UnknownLength
from retrying import retry, RetryError
from ox_dw_logger import get_etl_logger
from .extractor import Extractor
from .settings import DEFAULT_SCHEMA_FILE, HARMONIZER_NAME, LOCK_ROOT, ENV
from .schema_config import SchemaConfig

API_JSONDB_CONFIG = ENV.get('API_JSONDB_CONFIG')
LOGGER = get_etl_logger(HARMONIZER_NAME)
MAX_ATTEMPTS = 5
WAIT_BETWEEN_ATTEMPTS = 2000  # ms

# Object name to JSONDB table mapping.
# Most of the time the object and table name are the same.
# This is for when it doesn't
# Keep this here since it is specific to the harmonizer and may change when
# switching to postgres someday.
OBJECT_TYPE_TO_TABLE = {'order': 'order_', 'user': '******'}


def retry_if_db_error(exception):
    """
    Only retry on a psycopg2.Error. Else raise error immediately.
    """
    if isinstance(exception, psycopg2.Error):
Example #11
0
 def test_log_same_name(self):
     logger = get_etl_logger(log_name=NEW_LOG_NAME)
     self.assertEqual(self.logger.handlers[0].baseFilename,
                      logger.handlers[0].baseFilename)
Example #12
0
 def setUp(self):
     self.logger = \
         get_etl_logger(
             log_name=NEW_LOG_NAME, log_directory=NEW_LOG_DIR,
             log_format=NEW_LOG_FORMAT, log_level=NEW_LOG_LEVEL)
Example #13
0
    return {}


ENV = get_config('env')
CONFIG = get_config(APP_NAME)
DB_CONFIG = ENV['DB_CONNECTIONS']['SNOWFLAKE']['KWARGS']
DB_NAME = DB_CONFIG.get('database')
DB_SCHEMA = DB_CONFIG.get('schema')

TMP = os.environ['TMP'] if 'TMP' in os.environ else '/tmp'
TEMP_FILE_DIR = os.path.join(TMP, APP_NAME)
DELIMITER = chr(30)
MAX_ATTEMPTS = 5
WAIT_BETWEEN_ATTEMPTS = 2000  # ms

LOGGER = get_etl_logger(APP_NAME)

GET_COLUMNS = """
    SELECT COLUMN_NAME, ORDINAL_POSITION
    FROM   {0}.INFORMATION_SCHEMA.COLUMNS
    WHERE  TABLE_SCHEMA = ?
      AND  TABLE_NAME = ?
    ORDER BY ORDINAL_POSITION"""

INSERT_MISSING_COLUMNS = """
    INSERT INTO monitor_sf_load
    (table_name, column_name)
    VALUES (?, ?)"""

TRUNCATE_MONITOR_TABLE = """
    TRUNCATE TABLE monitor_sf_load"""
Example #14
0
 def __init__(self):
     self.lock = pid.PidFile(pidname="%s.LOCK" % APP_NAME,
                             piddir=LOCK_ROOT,
                             enforce_dotpid_postfix=False)
     self.logger = get_etl_logger(APP_NAME)
Example #15
0
def run(config):
    """
    Required in the config are:
        STATEMENTS: Ordered list of SQL statements to run.
    Optional:
        DW_NAME: Data warehouse name.
        Required either in config file or sql_runner argument, -d (--dw_name).
        APP_NAME: This is used for the logger.
              Defaults to LOAD_STATE_VAR or sql_runner
        LOAD_STATE_VAR: If present will update this load state var.
    This will fail at the first statement that fails and will not continue.
    Be sure the use local temporary or temporary tables as there is no clean up.
    """
    job_name = config.get('APP_NAME', APP_NAME)
    logger = get_etl_logger(job_name)
    try:
        with pid.PidFile(pidname="%s.LOCK" % job_name,
                         piddir=LOCK_ROOT,
                         enforce_dotpid_postfix=False) as p_lock:
            logger.info("-------------------------------")
            logger.info("Running %s application with process id: %d", job_name,
                        p_lock.pid)
            logger.info("Starting %s for load_state_variable %s", job_name,
                        config.get('LOAD_STATE_VAR'))
            if sys.stdout.isatty():
                sys.stderr.write("Logging all output to %s\n" %
                                 logger.handlers[0].baseFilename)
            logger.info("Connecting to %s", config.get('DW_NAME'))
            with OXDB(config.get('DW_NAME')) as oxdb:
                size = len(config.get('STATEMENTS'))
                # Set dynamic variables
                for key, val in config.get('VARIABLES').items():
                    if str(val).lower().startswith('select '):
                        val %= config.get('VARIABLES')
                        config['VARIABLES'][key], = \
                            oxdb.get_executed_cursor(val).fetchone()
                for index, statement in enumerate(config.get('STATEMENTS'),
                                                  start=1):
                    statement %= config.get('VARIABLES')
                    logger.info("STATEMENT(%s/%s) %s;", index, size, statement)
                    cursor = oxdb.get_executed_cursor(statement)
                    if str(statement).lower().startswith('select '):
                        writer = \
                            csv.writer(
                                sys.stdout,
                                delimiter=config.get('FIELD_SEP', DEFAULT_FIELD_SEP))
                        if config.get('HEADERS', False):
                            writer.writerow(col[0]
                                            for col in cursor.description)
                        for row in cursor:
                            writer.writerow(row)
                    else:
                        cursor.execute(statement)
                if config.get('LOAD_STATE_VAR') is not None:
                    logger.info("SETTING %s in load_state.",
                                config.get('LOAD_STATE_VAR'))
                    LoadState(
                        oxdb.connection,
                        variable_name=config.get('LOAD_STATE_VAR')).upsert()
            logger.info("Completed %s for load_state_variable %s", job_name,
                        config.get('LOAD_STATE_VAR'))
    except (pid.PidFileAlreadyRunningError, pid.PidFileAlreadyLockedError):
        logger.warning("Unable to get lock for %s application. Exiting...",
                       job_name)
    except Exception as err:
        logger.error("Application %s FAILED. ERROR %s", job_name, err)
        raise Exception("Application %s FAILED. ERROR %s" % (job_name, err))
Example #16
0
 def setUp(self):
     setup_schema()
     self.logger = get_etl_logger(JOB_NAME)  # , log_directory=None)
     self.options = OPTIONS(JOB_NAME, '2018-03-14_04', 1181, True)