Example #1
0
    def test_add_map_column_java(self):
        """test add_map_column_java"""
        ora_tbl = ItTable(fake_fact_tbl_prop, self.cfg_mgr)
        sqls_tbl = ItTable(sqlserver_fake_tablename, self.cfg_mgr)
        td_tbl = ItTable(td_fake_tablename, self.cfg_mgr)
        db2_tbl = ItTable(db2_fake_tablename, self.cfg_mgr)
        mysql_tbl = ItTable(fake_fact_tbl_prop_mysql, self.cfg_mgr)

        col_types_ora = [['Col1', 'TIMESTAMP'], ['Account_#', 'TIMESTAMP'],
                         ['colu_', 'TIMESTAMP']]
        col_types = [['Col1', 'TIMESTAMP'], ['1name', 'TIMESTAMP'],
                     ['Account_#', 'TIMESTAMP'], ['cc-oun-t', 'TIMESTAMP'],
                     ['o unt', 'TIMESTAMP'], ['_colu_', 'TIMESTAMP']]

        ora_test = self.builder.add_map_column_java(ora_tbl, col_types_ora)
        sqls_test = self.builder.add_map_column_java(sqls_tbl, col_types)
        td_test = self.builder.add_map_column_java(td_tbl, col_types)
        db2_test = self.builder.add_map_column_java(db2_tbl, col_types)
        mysql_test = self.builder.add_map_column_java(mysql_tbl, col_types)

        ora_exp = 'COL1=String,ACCOUNT_=String,COLU_=String'
        sqls_exp = ('Col1=String,i_1name=String,Account_=String,'
                    'ccount=String,ount=String,i_colu_=String')
        td_exp = ('Col1=String,i_1name=String,Account_=String,'
                  'ccount=String,ount=String,i_colu_=String')
        db2_exp = ('COL1=String,I_1NAME=String,ACCOUNT_=String,CCOUNT=String,'
                   'OUNT=String,I_COLU_=String')
        mysql_exp = ('Col1=String,i_1name=String,Account_=String,'
                     'ccount=String,ount=String,i_colu_=String')

        self.assertEquals(ora_test[1], ora_exp)
        self.assertEquals(sqls_test[1], sqls_exp)
        self.assertEquals(td_test[1], td_exp)
        self.assertEquals(db2_test[1], db2_exp)
        self.assertEquals(mysql_test[1], mysql_exp)
Example #2
0
 def test_sort_table_prop_by_load(self):
     """Tests that a map of tables gets sorted by load """
     tables = [
         ItTable(fake_fct_tbl_prop, self.cfg_mgr),
         ItTable(fake_prof_tbl_prop, self.cfg_mgr),
         ItTable(fake_fact_tbl_prop, self.cfg_mgr),
         ItTable(fake_ben_tbl_prop, self.cfg_mgr),
         ItTable(fake_cens_tbl_prop, self.cfg_mgr)
     ]
     expected = {
         '100': [
             ItTable(fake_ben_tbl_prop, self.cfg_mgr),
             ItTable(fake_cens_tbl_prop, self.cfg_mgr)
         ],
         '010': [
             ItTable(fake_fct_tbl_prop, self.cfg_mgr),
             ItTable(fake_fact_tbl_prop, self.cfg_mgr)
         ],
         '001': [ItTable(fake_prof_tbl_prop, self.cfg_mgr)]
     }
     expected = collections.OrderedDict(sorted(expected.items()))
     result = self.generator.sort_table_prop_by_load(tables)
     equals = True
     for key, val in expected.iteritems():
         if key in result:
             for i, table in enumerate(val):
                 if not result[key][i] == table:
                     equals = False
         else:
             equals = False
     self.assertTrue(equals)
Example #3
0
 def test_update_frequency_load(self):
     """update frequency and load"""
     it_obj = ItTable(self.it_table_dict, self.cfg_mgr)
     it_obj.frequency_readable = 'monthly'
     self.assertEqual(it_obj.frequency, '010')
     it_obj.frequency = '110'
     self.assertEqual(it_obj.frequency_readable, 'fortnightly')
     it_obj.load = '001'
     self.assertEqual(it_obj.frequency_readable, 'fortnightly')
     self.assertEqual(it_obj.load, '001')
Example #4
0
 def test_gen_full_ingest_actions_authinfo_fail(self):
     """test if auth info is missing"""
     self.generator = WorkflowGenerator('test_workflow_dev', self.cfg_mgr)
     ok_to_action = {'sqoop_to': 'test', 'end_to': 'test'}
     it_table = ItTable(fake_ben_pwdfile_tbl_prop, self.cfg_mgr)
     # empty username value
     it_table.username = ''
     with self.assertRaises(ValueError) as exp_cm:
         self.generator.gen_full_ingest_actions(it_table, ok_to_action)
     test_err_msg = open(
         os.path.join(BASE_DIR, 'expected/auth_info_missing.txt')).read()
     bool_test = self.strings_equal(exp_cm.exception.message, test_err_msg)
     self.assertTrue(bool_test)
Example #5
0
    def test_load(self):
        """test readable load"""
        self.it_table_dict['load'] = '101100'
        it_obj = ItTable(self.it_table_dict, self.cfg_mgr)
        self.assertEqual(it_obj.load_readable, 'small')

        self.it_table_dict['load'] = '100010'
        it_obj = ItTable(self.it_table_dict, self.cfg_mgr)
        self.assertEqual(it_obj.load_readable, 'medium')

        self.it_table_dict['load'] = '011001'
        it_obj = ItTable(self.it_table_dict, self.cfg_mgr)
        self.assertEqual(it_obj.load_readable, 'heavy')
Example #6
0
    def test_frequency(self):
        """test readable frequency"""
        self.it_table_dict['load'] = '101010'
        it_obj = ItTable(self.it_table_dict, self.cfg_mgr)
        self.assertEqual(it_obj.frequency_readable, 'daily')
        self.assertEqual(it_obj.load, '010')

        self.it_table_dict['load'] = '100010'
        it_obj = ItTable(self.it_table_dict, self.cfg_mgr)
        self.assertEqual(it_obj.frequency_readable, 'weekly')

        self.it_table_dict['load'] = '011010'
        it_obj = ItTable(self.it_table_dict, self.cfg_mgr)
        self.assertEqual(it_obj.frequency_readable, 'biweekly')

        self.it_table_dict['load'] = '110010'
        it_obj = ItTable(self.it_table_dict, self.cfg_mgr)
        self.assertEqual(it_obj.frequency_readable, 'fortnightly')

        self.it_table_dict['load'] = '010010'
        it_obj = ItTable(self.it_table_dict, self.cfg_mgr)
        self.assertEqual(it_obj.frequency_readable, 'monthly')

        self.it_table_dict['load'] = '001010'
        it_obj = ItTable(self.it_table_dict, self.cfg_mgr)
        self.assertEqual(it_obj.frequency_readable, 'quarterly')

        self.it_table_dict['load'] = '111010'
        it_obj = ItTable(self.it_table_dict, self.cfg_mgr)
        self.assertEqual(it_obj.frequency_readable, 'yearly')

        self.it_table_dict['load'] = ''
        it_obj = ItTable(self.it_table_dict, self.cfg_mgr)
        self.assertEqual(it_obj.frequency_load, '000001')
Example #7
0
 def test_gen_schedule_subworkflows_int_sys(self):
     """test gen schedule subworkflows for same table from sys and int"""
     sub_wf_file_name = 'test_subwf'
     tab1 = ItTable(sqls_int, self.cfg_mgr)
     tab2 = ItTable(sqls_sys, self.cfg_mgr)
     heavy_tables = [tab1, tab2]
     workflows_chunks = [[tab1, 'int_full_load'], [tab2, 'sys_full_load']]
     gen_files = self.driver.gen_schedule_subworkflows(
         sub_wf_file_name, workflows_chunks, heavy_tables, 'FAKED001')
     self.assertEquals(len(gen_files), 4)
     expected_file = os.path.join(BASE_DIR,
                                  'test_resources/subwf_sys_int.xml')
     test_file = os.path.join(self.cfg_mgr.files, 'test_subwf.xml')
     self.assertTrue(self.files_equal(expected_file, test_file))
Example #8
0
 def test_gen_wld_tables(self):
     """test wld for table workflow"""
     table1 = ItTable(fake_fact_tbl_prop, self.cfg_mgr)
     table2 = ItTable(fake_ben_tbl_prop, self.cfg_mgr)
     table3 = ItTable(fake_prof_tbl_prop, self.cfg_mgr)
     tables = [table1, table2, table3]
     workflow_names = [
         table1.db_table_name, table2.db_table_name, table3.db_table_name
     ]
     self.inventory.gen_wld_tables('FAKED306', tables, workflow_names)
     test = os.path.join(self.inventory.cfg_mgr.files, 'FAKED306.wld')
     expected = os.path.join(BASE_DIR, 'expected/test_tables.wld')
     bool_val = self.files_equal(test, expected)
     self.assertTrue(bool_val)
Example #9
0
    def test_get_sqoop_query(self):
        """test special chars"""
        ora_tbl = ItTable(fake_fact_tbl_prop, self.cfg_mgr)
        ora_dollar_tbl = ItTable(dollar_fake_tablename, self.cfg_mgr)
        sqls_tbl = ItTable(sqlserver_fake_tablename, self.cfg_mgr)
        td_tbl = ItTable(td_fake_tablename, self.cfg_mgr)
        db2_tbl = ItTable(db2_fake_tablename, self.cfg_mgr)
        col_types_ora = [['Col1', 'varchar'], ['Account_#', 'varchar'],
                         ['colu_', 'varchar']]
        col_types_ora_dollar = [['account', 'varchar']]
        col_types = [['Col1', 'varchar'], ['1name', 'varchar'],
                     ['Account_#', 'varchar'], ['cc-oun-t', 'varchar'],
                     ['o unt', 'varchar'], ['_colu_', 'varchar']]

        ora_test = self.builder.get_sqoop_query(ora_tbl, col_types_ora)
        ora_dollar_test = self.builder.get_sqoop_query(ora_dollar_tbl,
                                                       col_types_ora_dollar)
        sqls_test = self.builder.get_sqoop_query(sqls_tbl, col_types)
        td_test = self.builder.get_sqoop_query(td_tbl, col_types)
        db2_test = self.builder.get_sqoop_query(db2_tbl, col_types)

        ora_exp = ('SELECT Col1, "Account_#" AS Account_, '
                   'colu_ FROM fake_database.risk_fake_tablename t WHERE 1=1  '
                   'AND $CONDITIONS')
        ora_dollar_exp = (
            'SELECT "account" AS account FROM '
            'fake_database.fake_$tablename t WHERE 1=1  AND $CONDITIONS')
        sqls_exp = (
            "SELECT Col1, [1name] AS [i_1name], "
            "[Account_#] AS [Account_], "
            "[cc-oun-t] AS [ccount], [o unt] AS [ount], "
            "[_colu_] AS [i_colu_]"
            " FROM [fake_database].[dbo].[sqlserver_fake_tablename] WHERE"
            " 1=1  AND $CONDITIONS")
        td_exp = (
            "SELECT Col1, 1name AS i_1name, Account_# AS Account_, "
            "cc-oun-t AS ccount, o unt AS ount, _colu_ AS i_colu_ "
            "FROM FAKE_DATABASE.TD_FAKE_TABLENAME WHERE 1=1  AND $CONDITIONS")
        db2_exp = (
            "SELECT Col1, 1name AS i_1name, Account_# AS Account_, "
            "cc-oun-t AS ccount, o unt AS ount, _colu_ AS i_colu_ "
            "FROM FAKE_DATABASE.DB2_FAKE_TABLENAME WHERE 1=1  AND $CONDITIONS")

        self.assertEquals(ora_test[1], ora_exp)
        self.assertEquals(ora_dollar_test[1], ora_dollar_exp)
        self.assertEquals(sqls_test[1], sqls_exp)
        self.assertEquals(td_test[1], td_exp)
        self.assertEquals(db2_test[1], db2_exp)
Example #10
0
    def test_gen_import_incremental_failure(self, m_get_col_types):
        """Test the generation of the import action for various sources."""
        m_get_col_types.return_value = []
        incremental = {
            'check_column': "test_col",
            'incremental': "lastmodified",
            'last_value': "5555555"
        }
        _tbl = {
            'domain': 'member',
            'jdbcurl': 'jdbc:oracle:thin:@//fake.oracle:1521/'
            'fake_servicename',
            'db_username': '******',
            'password_file': 'jceks://hdfs/user/dev/fake.passwords.jceks#'
            'fake.password.alias',
            'load': '000001',
            'fetch_size': 50000,
            'hold': 0,
            'source_database_name': 'fake_database',
            'source_table_name': 'fake_mem_tablename'
        }
        self.builder.it_table = ItTable(_tbl, self.cfg_mgr)
        # test invalid source
        with self.assertRaises(ValueError) as context:
            self.builder.gen_import_action('fake_mem_tablename_import',
                                           incremental=incremental)

        self.assertTrue(
            'Unknown incremental options' in str(context.exception))
Example #11
0
 def test_get_workflow_name_table_export(self, m_getuser):
     """test _get_workflow_name_table_export"""
     m_getuser.return_value = 'userId'
     tab1 = ItTable(fake_cens_tbl_prop, self.cfg_mgr)
     val = self.driver._get_workflow_name_table_export(tab1)
     self.assertEquals(
         val, 'dev_userId_fake_database_fake_cens_tablename_export')
Example #12
0
 def insert_placeholder(self, db_name, table_name):
     """Insert a placeholder row, containing source_database_name,
     source_table_name, and hold for a new table request into IT table.
     """
     tbl_dict = {
         'full_table_name': '',
         'domain': '',
         'target_dir': '',
         'split_by': '',
         'mappers': 0,
         'jdbcurl': '',
         'connection_factories': '',
         'db_username': '',
         'password_file': '',
         'load': '',
         'fetch_size': 0,
         'hold': 1,
         'esp_appl_id': '',
         'views': '',
         'esp_group': '',
         'check_column': '',
         'source_schema_name': '',
         'sql_query': '',
         'actions': '',
         'source_database_name': db_name,
         'source_table_name': table_name,
         'db_env': self.cfg_mgr.default_db_env.lower()
     }
     it_table_row = ItTable(tbl_dict, self.cfg_mgr)
     self.insert(it_table_row)
Example #13
0
def parallel_sqoop_output(info):
    """Fetch distinct column count
    For sake of multiprocessing.Pool, this needs to be a top level function
    """
    col_quality_list = []
    cfg_mgr, it_table, column_name, query = info[0], info[1], info[2], info[3]
    it_table_obj = ItTable(it_table, cfg_mgr)
    source_obj = SourceTable(cfg_mgr, it_table_obj)
    returncode, output, err = source_obj.eval(query)
    if returncode == 0:
        _, groupby_counts = source_obj.fetch_rows_sqoop(output)
    else:
        source_obj.logger.error(err)
        raise ValueError(err)
    # collect counts per bin, calculate relevant stats on them, and sort
    # to most preferred first
    bin_counts = [int(item) for sublist in groupby_counts for item in sublist]
    num_groups = len(bin_counts)
    bin_counts_arr = numpy.array(bin_counts)
    std_deviation = numpy.std(bin_counts_arr)
    # print column_name, os.getpid(), os.getppid()
    # memory_usage_ps()
    del groupby_counts
    del bin_counts
    del bin_counts_arr
    gc.collect()
    # memory_usage_ps()
    # print '--' * 50
    col_quality_list.append((column_name, std_deviation, num_groups))
    return col_quality_list
Example #14
0
    def test_gen_import_incremental_lastmodified(self, m_get_col_types,
                                                 mock_eval):
        """Test the generation of the import action for various sources."""
        m_get_col_types.return_value = []
        mock_eval.return_value = [['table']]

        incremental = {
            'check_column': "test_col", 'incremental': "lastmodified",
            'last_value': "2012-10-11 11:11:11"}

        test_incr_query = "x > y"
        _tbl = {
            'domain': 'member',
            'jdbcurl': 'jdbc:oracle:thin:@//fake.oracle:'
                       '1521/fake_servicename',
            'db_username': '******',
            'password_file': 'jceks://hdfs/user/dev/fake.passwords.jceks#'
                             'fake.password.alias',
            'load': '000001', 'fetch_size': 50000, 'hold': 0,
            'source_database_name': 'fake_database', 'source_table_name': 'fake_mem_tablename'}
        self.builder.it_table = ItTable(_tbl, self.cfg_mgr)
        # ORACLE
        action = self.builder.gen_import_action(
            'fake_mem_tablename_import', incremental=incremental,
            sqoop_where_query=test_incr_query)
        action.ok = 'fake_mem_tablename_avro'
        oracle_xml = action.generate_action()
        _path = os.path.join(
            BASE_DIR, 'expected_workflows/oracle_incremental_sqoop.xml')
        with open(_path, 'r') as my_file:
            expected = my_file.read()
        self.assertTrue(self.compare_files(oracle_xml, expected))
Example #15
0
 def test_gen_import_ora_where(self, mock_get_col_types, mock_eval):
     """test oracle where clause"""
     mock_get_col_types.return_value = [('Col1', 'varchar'),
                                        ('Col2', 'number'),
                                        ('Col3', 'number'),
                                        ('Col4', 'number')]
     mock_eval.return_value = [['table']]
     _tbl = {
         'domain': 'member',
         'jdbcurl': 'jdbc:oracle:thin:@//fake.oracle'
                    ':1600/fake_servicename',
         'db_username': '******',
         'password_file': 'jceks://hdfs/user/dev/fake.passwords.jceks#'
                          'fake.password.alias',
         'load': '000001', 'fetch_size': 50000, 'hold': 0, 'mappers': 6,
         'sql_query': 'ASSMT_DT_KEY > 1',
         'source_database_name': 'fake_database',
         'source_table_name': 'fake_mth_tablename'}
     self.builder.it_table = ItTable(_tbl, self.cfg_mgr)
     action = self.builder.gen_import_action(
         'fake_mth_tablename_import')
     action.ok = 'fake_mth_tablename_avro'
     ora_xml = action.generate_action()
     _path = os.path.join(
         BASE_DIR, 'expected_workflows/ora_sqoop_with_where.xml')
     with open(_path, 'r') as my_file:
         expected = my_file.read()
     self.assertTrue(self.compare_files(ora_xml, expected))
Example #16
0
    def test_sql_windows_auth(self, m_get_col_types, mock_eval):
        """test sql windows authentication"""
        m_get_col_types.return_value = []
        mock_eval.return_value = [['table']]
        _tbl = {
            'domain': 'qa_domain',
            'jdbcurl': 'jdbc:jtds:sqlserver://fake.sqlserver:1433;'
                       'useNTLMv2=true;domain=fake_domain;database=qa_db',
            'db_username': '******',
            'password_file': 'jceks://hdfs/user/fake_username0/fake.passwords.jceks#'
                             'fake.password.alias',
            'mappers': 1,
            'load': '000001',
            'fetch_size': 5000,
            'hold': 0,
            'source_database_name': 'qa_db',
            'source_table_name': 'qa_tbl'}

        self.builder.it_table = ItTable(_tbl, self.cfg_mgr)
        action = self.builder.gen_import_action('qa_tbl_import')
        action.ok = 'qa_tbl_avro'
        sql_windows_xml = action.generate_action()
        _path = os.path.join(
            BASE_DIR, 'expected_workflows/sqlserver_windows_auth.xml')
        with open(_path, 'r') as my_file:
            expected = my_file.read()
        self.assertTrue(self.compare_files(sql_windows_xml, expected))
Example #17
0
 def test_gen_full_table_ingest_custom_scripts(self, m_get_col_types,
                                               mock_eval):
     """Test the generation of all xml actions for a table ingest
     when custom config scripts are provided."""
     m_get_col_types.return_value = [('trans_time', 'TIMESTAMP')]
     mock_eval.return_value = [['table']]
     real_host = self.cfg_mgr.host
     self.builder.cfg_mgr.host = 'fake.workflow.host'
     # setup alternative requests dir
     self.cfg_mgr.requests_dir = os.path.join(
         self.cfg_mgr.files, 'requests')
     os.makedirs(self.cfg_mgr.requests_dir)
     os.makedirs(os.path.join(self.cfg_mgr.requests_dir, 'DEV'))
     self.builder.dsl_parser.scripts_dir = self.cfg_mgr.requests_dir
     fixture_config_path = os.path.join(
         BASE_DIR, 'test_resources/custom_config_no_views.dsl')
     shutil.copy(fixture_config_path, self.cfg_mgr.requests_dir)
     fixture_hql_path = os.path.join(
         BASE_DIR, 'test_resources/hive_test.hql')
     shutil.copy(fixture_hql_path,
                 os.path.join(self.cfg_mgr.requests_dir, 'DEV'))
     fixture_sh_path = os.path.join(
         BASE_DIR, 'test_resources/shell_test.sh')
     shutil.copy(fixture_sh_path, self.cfg_mgr.requests_dir)
     # Expected workflows hardcoded w/ dev host
     table_obj = ItTable(full_ingest_tbl_custom_config, self.cfg_mgr)
     ingest_xml = self.builder.gen_full_table_ingest(table_obj)
     self.builder.cfg_mgr.host = real_host  # Return host to real value
     path = os.path.join(
         BASE_DIR, 'expected_workflows/'
                   'full_ingest_custom_config_scripts.xml')
     with open(path, 'r') as my_file:
         expected = my_file.read()
     self.assertTrue(self.compare_files(ingest_xml, expected))
Example #18
0
 def test_get_split_by_DB2_int(self, mock1):
     # Mock table found DB2
     with open(BASE_DIR + '/fixtures/db2_table_eval_mock.txt',
               'r') as file_h:
         sqoop_eval_output1 = file_h.read()
     # Mock no primary key
     with open(BASE_DIR + '/fixtures/eval_empty.txt', 'r') as file_h:
         sqoop_eval_output2 = file_h.read()
     # Mock integer columns
     with open(BASE_DIR + '/fixtures/db2_primary_eval_mock.txt',
               'r') as file_h:
         sqoop_eval_output3 = file_h.read()
     # Mock integer column in index found
     with open(BASE_DIR + '/fixtures/db2_uniqidx_eval_mock.txt',
               'r') as file_h:
         sqoop_eval_output4 = file_h.read()
     mock1.side_effect = [(0, sqoop_eval_output1, ''),
                          (0, sqoop_eval_output2, ''),
                          (0, sqoop_eval_output3, ''),
                          (0, sqoop_eval_output4, '')]
     table_obj = ItTable(self.mock_claim_tbl_dict_DB2[0], self.cfg_mgr)
     split_by_obj = Get_Auto_Split(self.cfg_mgr)
     result = split_by_obj.get_split_by_column(table_obj)
     print "Result: ", result
     self.assertEquals(result, "KEY")
Example #19
0
 def test_get_all_tables_for_automation(self, mock_rows):
     mock_rows.side_effect = [mock_claim_tbl, []]
     result = self.inventory.get_all_tables_for_automation('TEST01')
     table_obj = ItTable(mock_claim_tbl_dict[0], self.cfg_mgr)
     self.assertEquals(result[0], table_obj)
     result = self.inventory.get_all_tables_for_automation('TEST02')
     self.assertEquals(result, [])
Example #20
0
 def test_gen_import_custom_query(self, mock_get_col_types):
     """test user input - custom query"""
     mock_get_col_types.return_value = [('Col1', 'varchar'),
                                        ('Col2', 'DATETIME'),
                                        ('client_struc_key', 'INT')]
     _tbl = {
         'domain': 'member',
         'jdbcurl': 'jdbc:teradata://fake.teradata/DATABASE='
                    'fake_database',
         'db_username': '******', 'split_by': 'client_struc_key',
         'password_file': 'jceks://hdfs/user/dev/fake.passwords.jceks#'
                          'fake.password.alias',
         'load': '000001', 'fetch_size': 50000, 'hold': 0, 'mappers': 6,
         'source_database_name': 'fake_database',
         'source_table_name': 'fake_mth_tablename',
         'sql_query': "ID > 2 AND COL = 'TEST'"}
     self.builder.it_table = ItTable(_tbl, self.cfg_mgr)
     action = self.builder.gen_import_action(
         'fake_mth_tablename_import')
     action.ok = 'fake_mth_tablename_avro'
     td_xml = action.generate_action()
     _path = os.path.join(
         BASE_DIR, 'expected_workflows/sqoop_custom_query.xml')
     with open(_path, 'r') as my_file:
         expected = my_file.read()
     self.assertTrue(self.compare_files(expected, td_xml))
Example #21
0
    def test_gen_prod_workflow_perf_nodomain(self, m_freq_ingest, m_convert_pdf,
                                            m_v_xml, m_eval, m_c,
                                            m_get_id, m_get_t_automation,
                                            m_dryrun, m_put_w, m_sqoop_cache,
                                            m_sqoop_cache_view, m_dryrun_all):
        """ Tests generate_prod_workflows with 3 tables. One light,
        one medium and oen heavy."""
        m_eval.return_value = [['Col1', 'varchar'], ['Col2', 'varchar']]
        m_get_id.side_effect = [appl_ref_id_tbl_01, appl_ref_id_tbl_02]
        _mock_automation_tables = [ItTable(tbl, self.cfg_mgr) for tbl in
                            mock_automation_tbl_perf_domain]
        m_get_t_automation.return_value = _mock_automation_tables
        self.cfg_mgr.env = 'perf'
        status, msg, git_files = self.driver.gen_prod_workflow('FAKED001')
        for file_name in git_files:
            git_file = 'full_fake_open_fake_prog_tablename.hql'
            if git_file in file_name:
                actual_hql_nm = os.path.join(self.cfg_mgr.files, file_name)

                with open(actual_hql_nm, 'r') as file_h:
                    actual_hql = file_h.read()
                with open(BASE_DIR +
                          '/test_resources/git_team_hql_nodomain.hql',
                          'r') as file_h:
                    expected_hql = file_h.read()
                self.assertTrue(expected_hql, actual_hql)
        self.assertEquals(len(git_files), 23)
        self.assertIn('Generated', msg)
        self.assertIn('workflow:', msg)
        self.assertIn('subworkflow:', msg)
        self.assertTrue(status)
Example #22
0
 def test__add_split_by(self):
     """test valid column split by"""
     td_tbl = ItTable(td_fake_tablename, self.cfg_mgr)
     self.builder.it_table = td_tbl
     with self.assertRaises(ValueError) as context:
         self.builder._add_split_by([], [['invalid_col', 'varchar']])
     msg = "Not a valid column name for split_by: 'fake_split_by'"
     self.assertTrue(msg in str(context.exception))
Example #23
0
    def test_gen_schedule_subworkflows_heavy(self):
        """test gen schedule subworkflows for 3 heavy tables"""
        sub_wf_file_name = 'test_subwf'
        tab1 = ItTable(heavy_2_prop, self.cfg_mgr)
        tab2 = ItTable(heavy_3_prop, self.cfg_mgr)
        tab3 = ItTable(full_ingest_tbl_mysql, self.cfg_mgr)

        heavy_tables = [tab1, tab2, tab3]
        workflows_chunks = [[tab1, 'tab1_full_load'], [tab2, 'tab2_full_load'],
                            [tab3, 'tab3_full_load']]
        gen_files = self.driver.gen_schedule_subworkflows(
            sub_wf_file_name, workflows_chunks, heavy_tables, 'FAKED001')
        self.assertEquals(len(gen_files), 4)
        expected_file = os.path.join(BASE_DIR,
                                     'test_resources/subwf_heavy.xml')
        test_file = os.path.join(self.cfg_mgr.files, 'test_subwf.xml')
        self.assertTrue(self.files_equal(expected_file, test_file))
Example #24
0
 def get_available_requests(self, requests):
     """Given a list[{Request}] return a List[{ItTable}]
     of tables available, not on hold in the it table, and a
     List[{ItTable}] of tables on hold, due to a hold value
     of 1 and List[{Request}]
     Args:
         requests: list(ibis.inventory.request_inventory.Request) objects
     Returns:
         list(ibis.model.table.ItTable),
         list(ibis.model.table.ItTable),
         list(ibis.inventory.request_inventory.Request)
     """
     available_tables = []
     hold_tables = []
     unavailable_requests = []
     for req_obj in requests:
         src_db = req_obj.database
         src_table = req_obj.table_name
         if req_obj.db_env:
             db_env = req_obj.db_env
         else:
             db_env = self.cfg_mgr.default_db_env.lower()
         table = self.it_inventory.get_table_mapping(
             src_db, src_table, db_env)
         if table:
             # A record of the table exists in the it table
             if table['hold'] == 0:
                 available_tables.append(ItTable(table, self.cfg_mgr))
             else:
                 # hold = 1 means it is disabled temporarily
                 hold_tables.append(ItTable(table, self.cfg_mgr))
         else:
             # Record doesn't exist
             unavailable_requests.append(req_obj)
     if hold_tables:
         hold_tables_names = [table.table_name for table in hold_tables]
         msg = 'Tables: {0} are on hold'
         msg = msg.format(', '.join(hold_tables_names))
         self.logger.warning(msg)
     if unavailable_requests:
         unavail_names = [req.table_name for req in unavailable_requests]
         msg = 'Tables: {0} are missing in it_table'
         msg = msg.format(', '.join(unavail_names))
         self.logger.warning(msg)
     return available_tables, hold_tables, unavailable_requests
Example #25
0
 def test_gen_prod_workflow_tables(self, m_s_it_file, gen_prod_workflow,
                                   mock_get_available_requests):
     mock_get_available_requests.return_value = \
         ([ItTable(mock_table_mapping_val, self.cfg_mgr)], [], [])
     gen_prod_workflow.return_value = (None, None, None)
     file_h = open(
         os.path.join(BASE_DIR, 'test_resources/request_test_valid.txt'),
         'r')
     self.assertTrue(self.driver.gen_prod_workflow_tables(file_h))
Example #26
0
 def test_gen_refresh(self):
     """Test the generation of the refresh action."""
     self.builder.it_table = ItTable(full_ingest_tbl, self.cfg_mgr)
     action = self.builder.gen_refresh('fake_mem_tablename_refresh')
     action.ok = 'end'
     xml = action.generate_action()
     _path = os.path.join(BASE_DIR, 'expected_workflows/refresh.xml')
     with open(_path, 'r') as my_file:
         expected = my_file.read()
     self.assertTrue(self.compare_files(xml, expected))
Example #27
0
 def test_gen_schedule_subworkflows_light(self):
     """test gen schedule subworkflows for 5 light tables"""
     sub_wf_file_name = 'test_subwf'
     tab1 = ItTable(fake_cens_tbl_prop, self.cfg_mgr)
     tab2 = ItTable(light_3_prop, self.cfg_mgr)
     tab3 = ItTable(light_4_prop, self.cfg_mgr)
     tab4 = ItTable(light_5_prop, self.cfg_mgr)
     tab5 = ItTable(fake_ben_tbl_prop, self.cfg_mgr)
     light_tables = [tab1, tab2, tab3, tab4, tab5]
     workflows_chunks = [[tab1, 'tab1_full_load'], [tab2, 'tab2_full_load'],
                         [tab3, 'tab3_full_load'], [tab4, 'tab4_full_load'],
                         [tab5, 'tab5_full_load']]
     gen_files = self.driver.gen_schedule_subworkflows(
         sub_wf_file_name, workflows_chunks, light_tables, 'FAKED001')
     self.assertEquals(len(gen_files), 4)
     expected_file = os.path.join(BASE_DIR,
                                  'test_resources/subwf_light.xml')
     test_file = os.path.join(self.cfg_mgr.files, 'test_subwf.xml')
     self.assertTrue(self.files_equal(expected_file, test_file))
Example #28
0
 def test_get_split_by_teradata_pk(self, mock1, mock2):
     mock2.return_value = [['KEY']]
     with open(BASE_DIR + '/fixtures/td_primary_eval_mock.txt',
               'r') as file_h:
         sqoop_eval_output = file_h.read()
     mock1.return_value = (0, sqoop_eval_output, '')
     table_obj = ItTable(self.mock_claim_tbl_dict[0], self.cfg_mgr)
     split_by_obj = Get_Auto_Split(self.cfg_mgr)
     result = split_by_obj.get_split_by_column(table_obj)
     self.assertEquals(result, "KEY")
Example #29
0
 def test_gen_avro_action(self):
     """Test the generation of the avro action xml."""
     self.builder.it_table = ItTable(fake_algnmt_tbl, self.cfg_mgr)
     action = self.builder.gen_avro_action('fake_algnmt_tablename_avro')
     action.ok = 'fake_algnmt_tablename_avro_parquet'
     avro_xml = action.generate_action()
     _path = os.path.join(BASE_DIR, 'expected_workflows/avro_action.xml')
     with open(_path, 'r') as my_file:
         expected = my_file.read()
     self.assertTrue(self.compare_files(avro_xml, expected))
Example #30
0
 def get_all_tables_for_esp(self, esp_id):
     """Returns a list[table] of all sql-tables that match with ESP id"""
     tables = []
     query = "SELECT * FROM {tbl} WHERE esp_appl_id='{id}'"
     query = query.format(tbl=self.table, id=esp_id)
     result = self.get_rows(query)
     if result:
         for table_row in result:
             tbl_dict = self._build_row_dict(table_row)
             tables.append(ItTable(tbl_dict, self.cfg_mgr))
     return tables