Example #1
0
 def __init__(self, cfg_mgr):
     """Init."""
     self.cfg_mgr = cfg_mgr
     self.it_inventory = ITInventory(self.cfg_mgr)
     self.export_it_inventory = ExportITInventory(self.cfg_mgr)
     self.logger = get_logger(self.cfg_mgr)
     self.log_msg = ''
Example #2
0
 def __init__(self, meta_dict, cfg_mgr):
     """Init"""
     self.cfg_mgr = cfg_mgr
     self.meta_dict = copy.deepcopy(meta_dict)
     # check for allowed frequencies
     # self.frequency_readable = self.frequency_readable
     self.logger = get_logger(self.cfg_mgr)
Example #3
0
 def __init__(self, cfg_mgr, database, table, jdbcurl):
     """Init"""
     self.cfg_mgr = cfg_mgr
     self.jdbcurl = jdbcurl
     self.database = database
     self.table = table
     self.logger = get_logger(self.cfg_mgr)
Example #4
0
 def __init__(self, cfg_mgr):
     self.cfg_mgr = cfg_mgr
     self.utilities = Utilities(self.cfg_mgr)
     self.logger = get_logger(self.cfg_mgr)
     self.queue = self.cfg_mgr.queue_name
     self.table = self.cfg_mgr.it_table
     self.freq_ingest_table = self.cfg_mgr.freq_ingest
Example #5
0
 def __init__(self, action_type, name, ok, error, cfg_mgr):
     self.action_type = action_type
     self.name = name
     self.ok = ok
     self.error = error
     self.cfg_mgr = cfg_mgr
     self.logger = get_logger(self.cfg_mgr)
Example #6
0
 def __init__(self, workflow_name, cfg_mgr):
     self.cfg_mgr = cfg_mgr
     self.action_builder = ActionBuilder(cfg_mgr)
     self.utilities = Utilities(cfg_mgr)
     self.action_builder.workflowName = workflow_name
     self.logger = get_logger(self.cfg_mgr)
     # Workflow being written
     self.wf_file_path = os.path.join(self.cfg_mgr.files,
                                      workflow_name + '.xml')
     self.file_out = open(self.wf_file_path, "wb")
     self.workflow_name = workflow_name
     self.workflow_started = False
     self.sub_workflow_template = ''
     self.workflows_chunks = []  # used for subworkflow generation
Example #7
0
 def __init__(self, meta_dict, cfg_mgr):
     """Init
     Args:
         meta_dict: A hive db record {column_name: value}
         cfg_mgr: instance of ibis.utilities.config_manager.ConfigManager
     """
     self.cfg_mgr = cfg_mgr
     self.logger = get_logger(self.cfg_mgr)
     self.meta_dict = copy.deepcopy(meta_dict)
     self._props = [
         'domain', 'split_by', 'mappers', 'jdbcurl', 'target_dir', 'schema',
         'query', 'username', 'password_file', 'load', 'frequency',
         'fetch_size', 'hold', 'esp_appl_id', 'views', 'esp_group',
         'check_column', 'table_name', 'database', 'db_env'
     ]
Example #8
0
 def __init__(self, meta_dict, cfg_mgr):
     """Init
     Args:
         meta_dict: A hive db record {column_name: value}
         cfg_mgr: instance of ibis.utilities.config_manager.ConfigManager
     """
     self.cfg_mgr = cfg_mgr
     self.logger = get_logger(self.cfg_mgr)
     self.meta_dict = copy.deepcopy(meta_dict)
     self._props = [
         'mappers', 'jdbcurl', 'source_dir', 'schema', 'username',
         'password_file', 'load', 'frequency', 'fetch_size',
         'automation_appl_id', 'table_name', 'database', 'target_schema',
         'target_table', 'weight', 'db_env', 'staging_database'
     ]
Example #9
0
 def __init__(self, cfg_mgr, pre_defined_actions, scripts_dir):
     """init
     Args:
         cfg_mgr: Instance of ibis.utilities.config_manager.ConfigManager
         pre_defined_actions: list of default action ids
     """
     self.cfg_mgr = cfg_mgr
     self.logger = get_logger(self.cfg_mgr)
     self.pre_defined_actions = pre_defined_actions
     header_pattern = pp.Keyword('action').setResultsName('action_header')
     body_pattern = pp.Word(pp.alphanums + '._/')
     body_pattern = body_pattern.setResultsName('action_id')
     self.pattern = header_pattern + \
         pp.Group(body_pattern).setResultsName('action_body')
     self.scripts_dir = scripts_dir
Example #10
0
 def __init__(self, cfg_mgr, it_table):
     """Init.
     Args:
         it_table(ibis.model.table.ItTable): instance of ItTable
     """
     self.cfg_mgr = cfg_mgr
     self.it_table_obj = it_table
     self.db = it_table.database.upper()
     self.table = it_table.table_name.upper()
     self.domain = it_table.domain
     self.user_name = it_table.username
     self.password_file = it_table.password_file
     self.driver = it_table.connection_factories
     self.jdbc_url = it_table.jdbcurl
     self.mappers = it_table.mappers
     self.db_env = it_table.db_env
     self.logger = get_logger(self.cfg_mgr)
Example #11
0
 def __init__(self, cfg_mgr):
     """init"""
     self.oozie_url = cfg_mgr.oozie_url
     self.cfg_mgr = cfg_mgr
     self.logger = get_logger(self.cfg_mgr)
Example #12
0
 def __init__(self, action_type, name, cfg_mgr):
     """Init."""
     self.action_type = action_type
     self.name = name
     self.cfg_mgr = cfg_mgr
     self.logger = get_logger(self.cfg_mgr)
Example #13
0
def main():
    """Command line arguments parser.
    Calls the appropriate handler method
    """
    global driver
    global logger

    parser = ArgumentParser()

    # Properties
    parser.add_argument('--db',
                        nargs=1,
                        type=str,
                        help='Used to provide a database name')
    parser.add_argument('--table',
                        nargs=1,
                        type=str,
                        help='Used to provide a table name')
    parser.add_argument('--frequency',
                        nargs=1,
                        type=str,
                        help='Used to provide a frequency')
    parser.add_argument('--teamname',
                        nargs=1,
                        type=str,
                        help='Used to provide a team name')
    parser.add_argument('--activate',
                        nargs=1,
                        type=str,
                        help='Used to provide a activator(yes/no)')
    parser.add_argument('--env',
                        nargs=1,
                        type=str,
                        required=True,
                        help='REQUIRED. Used to provide the ibis '
                        'environment for properties file.')
    parser.add_argument('--for-env',
                        nargs=1,
                        type=str,
                        help='Optional. To create workflow of dev on prod.')
    # Checks and Balances
    parser.add_argument('--checks-balances',
                        action='store_true',
                        help='Used to interact with check balances table. '
                        'required: --db {db_name}, --table {tbl_name}'
                        'options: --update-lifespan list[str], '
                        '--update-all-lifespan')

    # Business Table
    parser.add_argument('--submit-request',
                        type=FileType('r'),
                        help='Used to generate oozie workflow')
    parser.add_argument('--export-request',
                        type=FileType('r'),
                        help='Used to generate oozie workflow')
    parser.add_argument('--submit-request-prod',
                        type=FileType('r'),
                        help='Used to mark changes in it table '
                        'into staging_it_table')

    # IT Table
    parser.add_argument('--save-it-table',
                        action='store_true',
                        help='Saves all records in it_table to file')

    parser.add_argument('--update-it-table',
                        type=FileType('r'),
                        help='Used to submit text file containing table '
                        'properties for the IT table')

    # IT Table Export
    parser.add_argument('--update-it-table-export',
                        type=FileType('r'),
                        help='Used to submit text file containing table '
                        'properties for the IT table export')

    # Run
    parser.add_argument('--run-job',
                        type=str,
                        help='Used to submit a workflow to run an oozie job')

    # View generation
    parser.add_argument('--view',
                        action='store_true',
                        help='Create a view. required: --view-name '
                        '{name}, --db {db_name}, '
                        '--table {tbl_name} optional param: '
                        '--select {cols} ,'
                        ' --where {statement}')
    parser.add_argument('--view-name',
                        nargs=1,
                        type=str,
                        help='Used to provide a view name')
    parser.add_argument('--select',
                        nargs='+',
                        type=str,
                        help='Used to provide a list of columns')
    parser.add_argument('--where',
                        nargs=1,
                        type=str,
                        help='Used to provide a where statement')

    # Generate workflows base on filter
    parser.add_argument('--gen-esp-workflow',
                        nargs='+',
                        type=str,
                        help='Create workflow(s) based on a list of ESP '
                        'ids separated by spaces.')
    parser.add_argument('--gen-esp-workflow-tables',
                        type=FileType('r'),
                        help='Create workflow(s) based on a list of '
                        'tables from request file')

    # config based workflows
    parser.add_argument('--gen-config-workflow',
                        nargs=1,
                        type=FileType('r'),
                        help='Used to generate custom hive or'
                        ' shell scripts in workflows')
    parser.add_argument('--config-workflow-properties',
                        nargs=1,
                        type=str,
                        help='Used to provide config workflow properties')
    parser.add_argument('--queue-name',
                        nargs=1,
                        type=str,
                        help='Used for providing hadoop queue name')

    parser.add_argument('--esp-id', nargs=1, type=str, help='esp-appl-id')
    parser.add_argument('--message',
                        nargs=1,
                        type=str,
                        help='Provide description for bmrs')

    parser.add_argument('--export',
                        action='store_true',
                        help='Export hadoop table to teradata. '
                        'required: --db {db}, '
                        'name of db you want to export, '
                        '--table {table}, name of table '
                        'you want to export, --to {db}.{table}, '
                        'name of database and '
                        'table to export to')
    parser.add_argument('--to',
                        nargs=1,
                        type=str,
                        help='Used to provide {database}.{table} '
                        'to export to in Teradata')

    parser.add_argument('--auth-test',
                        action='store_true',
                        help='Test sqoop auth'
                        'required: --source-db {db}, name of db'
                        ' you want to export,'
                        '--source-table {table}, name of table '
                        'you want to export,'
                        '--jdbc-url {jdbcurl}, connection string '
                        'for target schema'
                        '--user-name {user_name}, db user name'
                        '--password-file {hdfs_path}, hdfs'
                        ' password file path')

    # Export to Oracle
    parser.add_argument('--export-oracle',
                        action='store_true',
                        help='Export hadoop table to Oracle. '
                        'required: --source-db {db}, name of db '
                        'you want to export,'
                        '--source-table {table}, name of table '
                        'you want to export,'
                        '--source-dir {dir}, hdfs location of '
                        'export table,'
                        '--jdbc-url {jdbcurl}, connection string'
                        ' for target schema,'
                        '--target-schema {targetdb}, oracle schema,'
                        '--target-table {targettable}, oracle table'
                        '--update-key {updatekey}, non mandatory'
                        ' param - primary key on target table,'
                        '--user-name {username}, oracle username,'
                        '--pass-alias {passalias}, oracle password'
                        ' alias or jceks url')
    parser.add_argument('--source-db',
                        nargs=1,
                        type=str,
                        help='Used to provide source hive schema to'
                        ' export to oracle')
    parser.add_argument('--source-table',
                        nargs=1,
                        type=str,
                        help='Used to provide source hive table to '
                        'export to oracle')
    parser.add_argument('--source-dir',
                        nargs=1,
                        type=str,
                        help='Used to provide hdfs source directory'
                        ' to export to'
                        'oracle, directory should not include'
                        ' the final table directory')
    parser.add_argument('--jdbc-url',
                        nargs=1,
                        type=str,
                        help='Used to provide oracle connection '
                        'information to export to oracle')
    parser.add_argument('--target-schema',
                        nargs=1,
                        type=str,
                        help='Used to provide oracle target schema '
                        'to export to oracle')
    parser.add_argument('--target-table',
                        nargs=1,
                        type=str,
                        help='Used to provide oracle target table to '
                        'export to oracle')
    parser.add_argument('--update-key',
                        nargs='*',
                        type=str,
                        help='Used to provide oracle primary key to'
                        ' export to oracle')
    parser.add_argument('--user-name',
                        nargs=1,
                        type=str,
                        help='Used to provide oracle user name to export'
                        ' to oracle')
    parser.add_argument('--password-file',
                        nargs=1,
                        type=str,
                        help='Used to provide oracle password file')
    parser.add_argument('--pass-alias',
                        nargs=1,
                        type=str,
                        help='Used to provide oracle password alias to'
                        ' export to oracle')
    parser.add_argument('--source-type',
                        nargs=1,
                        type=str,
                        help='Used to provide source vendor type')

    # Export to Teradata
    parser.add_argument(
        '--export_teradata',
        action='store_true',
        help='Export hadoop table to Teradata. '
        'required: --source-db {db}, name of db you want to export,'
        '--source-table {table}, name of table you want to export,'
        '--source-dir {dir}, hdfs location of export table,'
        '--jdbc-url {jdbcurl}, connection string for target schema,'
        '--target-schema {targetdb}, teradata Database,'
        '--target-table {targettable}, teradata table'
        '--user-name {username}, oracle username,'
        '--pass-alias {passalias}, oracle password alias or jceks url')

    # Generate IT request file input file
    parser.add_argument('--gen-it-table',
                        type=FileType('r'),
                        help='Generate IT table with automatic split-by '
                        'if possible')
    parser.add_argument('--gen-qa-data-sampling',
                        type=FileType('r'),
                        help='Generate workflow for QA data sampling')
    parser.add_argument('--parse-request-file',
                        type=FileType('r'),
                        help='Print each table in request file as json')

    # Workflow actions
    parser.add_argument('--hive',
                        nargs='*',
                        type=str,
                        help='Generates hive action workflow')
    parser.add_argument('--shell',
                        nargs='*',
                        type=str,
                        help='Generates shell action workflow')
    parser.add_argument('--impala',
                        nargs='*',
                        type=str,
                        help='Generate impala action workflow')
    parser.add_argument('--gen-action',
                        nargs='*',
                        type=str,
                        help='Generates action for hive,shell,impala '
                        'in one xml')

    # Copy backup files to live
    parser.add_argument('--retrieve-backup',
                        action='store_true',
                        help='Copies backup files to live. required: '
                        '--db {name} --table {name}')

    # Update freq_ingest Activator
    parser.add_argument('--update-activator',
                        action='store_true',
                        help='provide team frequency, Activator(yes/no), '
                        'team name and full table name')

    # Drop all the table from selected database
    parser.add_argument('--wipe-perf-env',
                        nargs=1,
                        type=str,
                        help='Provide the team_name or database '
                        'name for dropping all tables')

    parser.add_argument('--reingest-all',
                        action='store_true',
                        help='Use this option with wipe-perf-env to '
                        'reingest all tables')

    # Not saving workflows to git
    parser.add_argument('--no-git',
                        action='store_true',
                        help='Not saving workflow to git')
    # No dry run workflow
    parser.add_argument('--no-dry-run',
                        action='store_true',
                        help='Dont dry run workflow')

    parser.add_argument('--timeout',
                        type=str,
                        help='Timeout duration for auto split by')
    parser.add_argument('--ingest-version',
                        action='store_true',
                        help='Get the ingest version used for the xml')
    parser.add_argument('--kite-ingest',
                        type=FileType('r'),
                        help='Used to generate kite-ingest workflow')

    args = parser.parse_args()

    usr_opts = vars(args)
    # Filter usr_opt of None values
    usr_opts = {k: usr_opts[k] for k in usr_opts if usr_opts[k] is not None}
    # Filter usr_opt of False values
    usr_opts = {k: usr_opts[k] for k in usr_opts if usr_opts[k] is not False}

    ibis_opts = {
        'checks_balances': checks_balances,
        'export': export,
        'gen_esp_workflow_tables': gen_esp_workflow_tables,
        'update_activator': update_activator,
        'wipe_perf_env': wipe_perf_env,
        'gen_esp_workflow': gen_esp_workflow,
        'gen_config_workflow': gen_config_workflow,
        'retrieve_backup': retrieve_backup,
        'run_job': run_job,
        'gen_it_table': gen_it_table,
        'submit_request': submit_request,
        'export_request': export_request,
        'export_oracle': export_oracle,
        'save_it_table': save_it_table,
        'update_it_table': update_it_table,
        'update_it_table_export': update_it_table_export,
        'auth_test': auth_test,
        'ingest_version': ingest_version,
        'parse_request_file': parse_request_file,
        'kite_ingest': gen_kite_workflow
    }

    is_failed = False
    if args.env:
        cfg_mgr = ConfigManager(args.env[0], args.for_env)
        file_permission = 0774

        if not os.path.isdir(cfg_mgr.files):
            os.mkdir(cfg_mgr.files)
            os.chmod(cfg_mgr.files, file_permission)
        if not os.path.isdir(cfg_mgr.logs):
            os.mkdir(cfg_mgr.logs)
            os.chmod(cfg_mgr.logs, file_permission)
        if not os.path.isdir(cfg_mgr.saves):
            os.mkdir(cfg_mgr.saves)
            os.chmod(cfg_mgr.saves, file_permission)

        # clear log file
        with open(cfg_mgr.log_file, 'wb'):
            pass
        logger = get_logger(cfg_mgr)

        driver = Driver(cfg_mgr)

        try:
            # Utilize ibis_opts to call correct function(s)
            for key in usr_opts.keys():
                if ibis_opts.get(key, None):
                    # call the appropriate method
                    success = ibis_opts[key](args)
                    if success is False:
                        is_failed = True
            inventory.Inventory.close()
        except Exception:
            logger.error('\n' + traceback.format_exc())
            is_failed = True

        # print the log
        with open(cfg_mgr.log_file, 'rb') as file_handler:
            log_text = file_handler.read()
            if log_text:
                print '+' * 20
                print 'Printing ibis.log'
                print '=' * 20
                print log_text
                print '+' * 20
    else:
        is_failed = True
        err_msg = ('Environment required for ibis. '
                   'Please specify --env argument and provide a environment.')
        print err_msg

    if is_failed:
        # expose ibis failure to the calling env
        sys.exit(1)
Example #14
0
 def __init__(self, cfg_mgr):
     """init"""
     self.cfg_mgr = cfg_mgr
     self.logger = get_logger(self.cfg_mgr)
Example #15
0
 def __init__(self, cfg_mgr):
     """Init."""
     self.cfg_mgr = cfg_mgr  # ConfigManager object
     self.table = None
     self.logger = get_logger(self.cfg_mgr)
Example #16
0
 def __init__(self, cfg_mgr):
     self.cfg_mgr = cfg_mgr
     self.pattern_non_alphanumeric = re.compile(r'[^A-Za-z0-9_]')
     self.pattern_numeric_at_start = r'^\d'
     self.pattern_underscore_at_start = r'^_'
     self.logger = get_logger(self.cfg_mgr)
Example #17
0
 def __init__(self, cfg_mgr):
     self.cfg_mgr = cfg_mgr
     self.logger = get_logger(self.cfg_mgr)
     self.oozie_api = OozieAPi(self.cfg_mgr)