def test_add_map_column_java(self): """test add_map_column_java""" ora_tbl = ItTable(fake_fact_tbl_prop, self.cfg_mgr) sqls_tbl = ItTable(sqlserver_fake_tablename, self.cfg_mgr) td_tbl = ItTable(td_fake_tablename, self.cfg_mgr) db2_tbl = ItTable(db2_fake_tablename, self.cfg_mgr) mysql_tbl = ItTable(fake_fact_tbl_prop_mysql, self.cfg_mgr) col_types_ora = [['Col1', 'TIMESTAMP'], ['Account_#', 'TIMESTAMP'], ['colu_', 'TIMESTAMP']] col_types = [['Col1', 'TIMESTAMP'], ['1name', 'TIMESTAMP'], ['Account_#', 'TIMESTAMP'], ['cc-oun-t', 'TIMESTAMP'], ['o unt', 'TIMESTAMP'], ['_colu_', 'TIMESTAMP']] ora_test = self.builder.add_map_column_java(ora_tbl, col_types_ora) sqls_test = self.builder.add_map_column_java(sqls_tbl, col_types) td_test = self.builder.add_map_column_java(td_tbl, col_types) db2_test = self.builder.add_map_column_java(db2_tbl, col_types) mysql_test = self.builder.add_map_column_java(mysql_tbl, col_types) ora_exp = 'COL1=String,ACCOUNT_=String,COLU_=String' sqls_exp = ('Col1=String,i_1name=String,Account_=String,' 'ccount=String,ount=String,i_colu_=String') td_exp = ('Col1=String,i_1name=String,Account_=String,' 'ccount=String,ount=String,i_colu_=String') db2_exp = ('COL1=String,I_1NAME=String,ACCOUNT_=String,CCOUNT=String,' 'OUNT=String,I_COLU_=String') mysql_exp = ('Col1=String,i_1name=String,Account_=String,' 'ccount=String,ount=String,i_colu_=String') self.assertEquals(ora_test[1], ora_exp) self.assertEquals(sqls_test[1], sqls_exp) self.assertEquals(td_test[1], td_exp) self.assertEquals(db2_test[1], db2_exp) self.assertEquals(mysql_test[1], mysql_exp)
def test_sort_table_prop_by_load(self): """Tests that a map of tables gets sorted by load """ tables = [ ItTable(fake_fct_tbl_prop, self.cfg_mgr), ItTable(fake_prof_tbl_prop, self.cfg_mgr), ItTable(fake_fact_tbl_prop, self.cfg_mgr), ItTable(fake_ben_tbl_prop, self.cfg_mgr), ItTable(fake_cens_tbl_prop, self.cfg_mgr) ] expected = { '100': [ ItTable(fake_ben_tbl_prop, self.cfg_mgr), ItTable(fake_cens_tbl_prop, self.cfg_mgr) ], '010': [ ItTable(fake_fct_tbl_prop, self.cfg_mgr), ItTable(fake_fact_tbl_prop, self.cfg_mgr) ], '001': [ItTable(fake_prof_tbl_prop, self.cfg_mgr)] } expected = collections.OrderedDict(sorted(expected.items())) result = self.generator.sort_table_prop_by_load(tables) equals = True for key, val in expected.iteritems(): if key in result: for i, table in enumerate(val): if not result[key][i] == table: equals = False else: equals = False self.assertTrue(equals)
def test_update_frequency_load(self): """update frequency and load""" it_obj = ItTable(self.it_table_dict, self.cfg_mgr) it_obj.frequency_readable = 'monthly' self.assertEqual(it_obj.frequency, '010') it_obj.frequency = '110' self.assertEqual(it_obj.frequency_readable, 'fortnightly') it_obj.load = '001' self.assertEqual(it_obj.frequency_readable, 'fortnightly') self.assertEqual(it_obj.load, '001')
def test_gen_full_ingest_actions_authinfo_fail(self): """test if auth info is missing""" self.generator = WorkflowGenerator('test_workflow_dev', self.cfg_mgr) ok_to_action = {'sqoop_to': 'test', 'end_to': 'test'} it_table = ItTable(fake_ben_pwdfile_tbl_prop, self.cfg_mgr) # empty username value it_table.username = '' with self.assertRaises(ValueError) as exp_cm: self.generator.gen_full_ingest_actions(it_table, ok_to_action) test_err_msg = open( os.path.join(BASE_DIR, 'expected/auth_info_missing.txt')).read() bool_test = self.strings_equal(exp_cm.exception.message, test_err_msg) self.assertTrue(bool_test)
def test_load(self): """test readable load""" self.it_table_dict['load'] = '101100' it_obj = ItTable(self.it_table_dict, self.cfg_mgr) self.assertEqual(it_obj.load_readable, 'small') self.it_table_dict['load'] = '100010' it_obj = ItTable(self.it_table_dict, self.cfg_mgr) self.assertEqual(it_obj.load_readable, 'medium') self.it_table_dict['load'] = '011001' it_obj = ItTable(self.it_table_dict, self.cfg_mgr) self.assertEqual(it_obj.load_readable, 'heavy')
def test_frequency(self): """test readable frequency""" self.it_table_dict['load'] = '101010' it_obj = ItTable(self.it_table_dict, self.cfg_mgr) self.assertEqual(it_obj.frequency_readable, 'daily') self.assertEqual(it_obj.load, '010') self.it_table_dict['load'] = '100010' it_obj = ItTable(self.it_table_dict, self.cfg_mgr) self.assertEqual(it_obj.frequency_readable, 'weekly') self.it_table_dict['load'] = '011010' it_obj = ItTable(self.it_table_dict, self.cfg_mgr) self.assertEqual(it_obj.frequency_readable, 'biweekly') self.it_table_dict['load'] = '110010' it_obj = ItTable(self.it_table_dict, self.cfg_mgr) self.assertEqual(it_obj.frequency_readable, 'fortnightly') self.it_table_dict['load'] = '010010' it_obj = ItTable(self.it_table_dict, self.cfg_mgr) self.assertEqual(it_obj.frequency_readable, 'monthly') self.it_table_dict['load'] = '001010' it_obj = ItTable(self.it_table_dict, self.cfg_mgr) self.assertEqual(it_obj.frequency_readable, 'quarterly') self.it_table_dict['load'] = '111010' it_obj = ItTable(self.it_table_dict, self.cfg_mgr) self.assertEqual(it_obj.frequency_readable, 'yearly') self.it_table_dict['load'] = '' it_obj = ItTable(self.it_table_dict, self.cfg_mgr) self.assertEqual(it_obj.frequency_load, '000001')
def test_gen_schedule_subworkflows_int_sys(self): """test gen schedule subworkflows for same table from sys and int""" sub_wf_file_name = 'test_subwf' tab1 = ItTable(sqls_int, self.cfg_mgr) tab2 = ItTable(sqls_sys, self.cfg_mgr) heavy_tables = [tab1, tab2] workflows_chunks = [[tab1, 'int_full_load'], [tab2, 'sys_full_load']] gen_files = self.driver.gen_schedule_subworkflows( sub_wf_file_name, workflows_chunks, heavy_tables, 'FAKED001') self.assertEquals(len(gen_files), 4) expected_file = os.path.join(BASE_DIR, 'test_resources/subwf_sys_int.xml') test_file = os.path.join(self.cfg_mgr.files, 'test_subwf.xml') self.assertTrue(self.files_equal(expected_file, test_file))
def test_gen_wld_tables(self): """test wld for table workflow""" table1 = ItTable(fake_fact_tbl_prop, self.cfg_mgr) table2 = ItTable(fake_ben_tbl_prop, self.cfg_mgr) table3 = ItTable(fake_prof_tbl_prop, self.cfg_mgr) tables = [table1, table2, table3] workflow_names = [ table1.db_table_name, table2.db_table_name, table3.db_table_name ] self.inventory.gen_wld_tables('FAKED306', tables, workflow_names) test = os.path.join(self.inventory.cfg_mgr.files, 'FAKED306.wld') expected = os.path.join(BASE_DIR, 'expected/test_tables.wld') bool_val = self.files_equal(test, expected) self.assertTrue(bool_val)
def test_get_sqoop_query(self): """test special chars""" ora_tbl = ItTable(fake_fact_tbl_prop, self.cfg_mgr) ora_dollar_tbl = ItTable(dollar_fake_tablename, self.cfg_mgr) sqls_tbl = ItTable(sqlserver_fake_tablename, self.cfg_mgr) td_tbl = ItTable(td_fake_tablename, self.cfg_mgr) db2_tbl = ItTable(db2_fake_tablename, self.cfg_mgr) col_types_ora = [['Col1', 'varchar'], ['Account_#', 'varchar'], ['colu_', 'varchar']] col_types_ora_dollar = [['account', 'varchar']] col_types = [['Col1', 'varchar'], ['1name', 'varchar'], ['Account_#', 'varchar'], ['cc-oun-t', 'varchar'], ['o unt', 'varchar'], ['_colu_', 'varchar']] ora_test = self.builder.get_sqoop_query(ora_tbl, col_types_ora) ora_dollar_test = self.builder.get_sqoop_query(ora_dollar_tbl, col_types_ora_dollar) sqls_test = self.builder.get_sqoop_query(sqls_tbl, col_types) td_test = self.builder.get_sqoop_query(td_tbl, col_types) db2_test = self.builder.get_sqoop_query(db2_tbl, col_types) ora_exp = ('SELECT Col1, "Account_#" AS Account_, ' 'colu_ FROM fake_database.risk_fake_tablename t WHERE 1=1 ' 'AND $CONDITIONS') ora_dollar_exp = ( 'SELECT "account" AS account FROM ' 'fake_database.fake_$tablename t WHERE 1=1 AND $CONDITIONS') sqls_exp = ( "SELECT Col1, [1name] AS [i_1name], " "[Account_#] AS [Account_], " "[cc-oun-t] AS [ccount], [o unt] AS [ount], " "[_colu_] AS [i_colu_]" " FROM [fake_database].[dbo].[sqlserver_fake_tablename] WHERE" " 1=1 AND $CONDITIONS") td_exp = ( "SELECT Col1, 1name AS i_1name, Account_# AS Account_, " "cc-oun-t AS ccount, o unt AS ount, _colu_ AS i_colu_ " "FROM FAKE_DATABASE.TD_FAKE_TABLENAME WHERE 1=1 AND $CONDITIONS") db2_exp = ( "SELECT Col1, 1name AS i_1name, Account_# AS Account_, " "cc-oun-t AS ccount, o unt AS ount, _colu_ AS i_colu_ " "FROM FAKE_DATABASE.DB2_FAKE_TABLENAME WHERE 1=1 AND $CONDITIONS") self.assertEquals(ora_test[1], ora_exp) self.assertEquals(ora_dollar_test[1], ora_dollar_exp) self.assertEquals(sqls_test[1], sqls_exp) self.assertEquals(td_test[1], td_exp) self.assertEquals(db2_test[1], db2_exp)
def test_gen_import_incremental_failure(self, m_get_col_types): """Test the generation of the import action for various sources.""" m_get_col_types.return_value = [] incremental = { 'check_column': "test_col", 'incremental': "lastmodified", 'last_value': "5555555" } _tbl = { 'domain': 'member', 'jdbcurl': 'jdbc:oracle:thin:@//fake.oracle:1521/' 'fake_servicename', 'db_username': '******', 'password_file': 'jceks://hdfs/user/dev/fake.passwords.jceks#' 'fake.password.alias', 'load': '000001', 'fetch_size': 50000, 'hold': 0, 'source_database_name': 'fake_database', 'source_table_name': 'fake_mem_tablename' } self.builder.it_table = ItTable(_tbl, self.cfg_mgr) # test invalid source with self.assertRaises(ValueError) as context: self.builder.gen_import_action('fake_mem_tablename_import', incremental=incremental) self.assertTrue( 'Unknown incremental options' in str(context.exception))
def test_get_workflow_name_table_export(self, m_getuser): """test _get_workflow_name_table_export""" m_getuser.return_value = 'userId' tab1 = ItTable(fake_cens_tbl_prop, self.cfg_mgr) val = self.driver._get_workflow_name_table_export(tab1) self.assertEquals( val, 'dev_userId_fake_database_fake_cens_tablename_export')
def insert_placeholder(self, db_name, table_name): """Insert a placeholder row, containing source_database_name, source_table_name, and hold for a new table request into IT table. """ tbl_dict = { 'full_table_name': '', 'domain': '', 'target_dir': '', 'split_by': '', 'mappers': 0, 'jdbcurl': '', 'connection_factories': '', 'db_username': '', 'password_file': '', 'load': '', 'fetch_size': 0, 'hold': 1, 'esp_appl_id': '', 'views': '', 'esp_group': '', 'check_column': '', 'source_schema_name': '', 'sql_query': '', 'actions': '', 'source_database_name': db_name, 'source_table_name': table_name, 'db_env': self.cfg_mgr.default_db_env.lower() } it_table_row = ItTable(tbl_dict, self.cfg_mgr) self.insert(it_table_row)
def parallel_sqoop_output(info): """Fetch distinct column count For sake of multiprocessing.Pool, this needs to be a top level function """ col_quality_list = [] cfg_mgr, it_table, column_name, query = info[0], info[1], info[2], info[3] it_table_obj = ItTable(it_table, cfg_mgr) source_obj = SourceTable(cfg_mgr, it_table_obj) returncode, output, err = source_obj.eval(query) if returncode == 0: _, groupby_counts = source_obj.fetch_rows_sqoop(output) else: source_obj.logger.error(err) raise ValueError(err) # collect counts per bin, calculate relevant stats on them, and sort # to most preferred first bin_counts = [int(item) for sublist in groupby_counts for item in sublist] num_groups = len(bin_counts) bin_counts_arr = numpy.array(bin_counts) std_deviation = numpy.std(bin_counts_arr) # print column_name, os.getpid(), os.getppid() # memory_usage_ps() del groupby_counts del bin_counts del bin_counts_arr gc.collect() # memory_usage_ps() # print '--' * 50 col_quality_list.append((column_name, std_deviation, num_groups)) return col_quality_list
def test_gen_import_incremental_lastmodified(self, m_get_col_types, mock_eval): """Test the generation of the import action for various sources.""" m_get_col_types.return_value = [] mock_eval.return_value = [['table']] incremental = { 'check_column': "test_col", 'incremental': "lastmodified", 'last_value': "2012-10-11 11:11:11"} test_incr_query = "x > y" _tbl = { 'domain': 'member', 'jdbcurl': 'jdbc:oracle:thin:@//fake.oracle:' '1521/fake_servicename', 'db_username': '******', 'password_file': 'jceks://hdfs/user/dev/fake.passwords.jceks#' 'fake.password.alias', 'load': '000001', 'fetch_size': 50000, 'hold': 0, 'source_database_name': 'fake_database', 'source_table_name': 'fake_mem_tablename'} self.builder.it_table = ItTable(_tbl, self.cfg_mgr) # ORACLE action = self.builder.gen_import_action( 'fake_mem_tablename_import', incremental=incremental, sqoop_where_query=test_incr_query) action.ok = 'fake_mem_tablename_avro' oracle_xml = action.generate_action() _path = os.path.join( BASE_DIR, 'expected_workflows/oracle_incremental_sqoop.xml') with open(_path, 'r') as my_file: expected = my_file.read() self.assertTrue(self.compare_files(oracle_xml, expected))
def test_gen_import_ora_where(self, mock_get_col_types, mock_eval): """test oracle where clause""" mock_get_col_types.return_value = [('Col1', 'varchar'), ('Col2', 'number'), ('Col3', 'number'), ('Col4', 'number')] mock_eval.return_value = [['table']] _tbl = { 'domain': 'member', 'jdbcurl': 'jdbc:oracle:thin:@//fake.oracle' ':1600/fake_servicename', 'db_username': '******', 'password_file': 'jceks://hdfs/user/dev/fake.passwords.jceks#' 'fake.password.alias', 'load': '000001', 'fetch_size': 50000, 'hold': 0, 'mappers': 6, 'sql_query': 'ASSMT_DT_KEY > 1', 'source_database_name': 'fake_database', 'source_table_name': 'fake_mth_tablename'} self.builder.it_table = ItTable(_tbl, self.cfg_mgr) action = self.builder.gen_import_action( 'fake_mth_tablename_import') action.ok = 'fake_mth_tablename_avro' ora_xml = action.generate_action() _path = os.path.join( BASE_DIR, 'expected_workflows/ora_sqoop_with_where.xml') with open(_path, 'r') as my_file: expected = my_file.read() self.assertTrue(self.compare_files(ora_xml, expected))
def test_sql_windows_auth(self, m_get_col_types, mock_eval): """test sql windows authentication""" m_get_col_types.return_value = [] mock_eval.return_value = [['table']] _tbl = { 'domain': 'qa_domain', 'jdbcurl': 'jdbc:jtds:sqlserver://fake.sqlserver:1433;' 'useNTLMv2=true;domain=fake_domain;database=qa_db', 'db_username': '******', 'password_file': 'jceks://hdfs/user/fake_username0/fake.passwords.jceks#' 'fake.password.alias', 'mappers': 1, 'load': '000001', 'fetch_size': 5000, 'hold': 0, 'source_database_name': 'qa_db', 'source_table_name': 'qa_tbl'} self.builder.it_table = ItTable(_tbl, self.cfg_mgr) action = self.builder.gen_import_action('qa_tbl_import') action.ok = 'qa_tbl_avro' sql_windows_xml = action.generate_action() _path = os.path.join( BASE_DIR, 'expected_workflows/sqlserver_windows_auth.xml') with open(_path, 'r') as my_file: expected = my_file.read() self.assertTrue(self.compare_files(sql_windows_xml, expected))
def test_gen_full_table_ingest_custom_scripts(self, m_get_col_types, mock_eval): """Test the generation of all xml actions for a table ingest when custom config scripts are provided.""" m_get_col_types.return_value = [('trans_time', 'TIMESTAMP')] mock_eval.return_value = [['table']] real_host = self.cfg_mgr.host self.builder.cfg_mgr.host = 'fake.workflow.host' # setup alternative requests dir self.cfg_mgr.requests_dir = os.path.join( self.cfg_mgr.files, 'requests') os.makedirs(self.cfg_mgr.requests_dir) os.makedirs(os.path.join(self.cfg_mgr.requests_dir, 'DEV')) self.builder.dsl_parser.scripts_dir = self.cfg_mgr.requests_dir fixture_config_path = os.path.join( BASE_DIR, 'test_resources/custom_config_no_views.dsl') shutil.copy(fixture_config_path, self.cfg_mgr.requests_dir) fixture_hql_path = os.path.join( BASE_DIR, 'test_resources/hive_test.hql') shutil.copy(fixture_hql_path, os.path.join(self.cfg_mgr.requests_dir, 'DEV')) fixture_sh_path = os.path.join( BASE_DIR, 'test_resources/shell_test.sh') shutil.copy(fixture_sh_path, self.cfg_mgr.requests_dir) # Expected workflows hardcoded w/ dev host table_obj = ItTable(full_ingest_tbl_custom_config, self.cfg_mgr) ingest_xml = self.builder.gen_full_table_ingest(table_obj) self.builder.cfg_mgr.host = real_host # Return host to real value path = os.path.join( BASE_DIR, 'expected_workflows/' 'full_ingest_custom_config_scripts.xml') with open(path, 'r') as my_file: expected = my_file.read() self.assertTrue(self.compare_files(ingest_xml, expected))
def test_get_split_by_DB2_int(self, mock1): # Mock table found DB2 with open(BASE_DIR + '/fixtures/db2_table_eval_mock.txt', 'r') as file_h: sqoop_eval_output1 = file_h.read() # Mock no primary key with open(BASE_DIR + '/fixtures/eval_empty.txt', 'r') as file_h: sqoop_eval_output2 = file_h.read() # Mock integer columns with open(BASE_DIR + '/fixtures/db2_primary_eval_mock.txt', 'r') as file_h: sqoop_eval_output3 = file_h.read() # Mock integer column in index found with open(BASE_DIR + '/fixtures/db2_uniqidx_eval_mock.txt', 'r') as file_h: sqoop_eval_output4 = file_h.read() mock1.side_effect = [(0, sqoop_eval_output1, ''), (0, sqoop_eval_output2, ''), (0, sqoop_eval_output3, ''), (0, sqoop_eval_output4, '')] table_obj = ItTable(self.mock_claim_tbl_dict_DB2[0], self.cfg_mgr) split_by_obj = Get_Auto_Split(self.cfg_mgr) result = split_by_obj.get_split_by_column(table_obj) print "Result: ", result self.assertEquals(result, "KEY")
def test_get_all_tables_for_automation(self, mock_rows): mock_rows.side_effect = [mock_claim_tbl, []] result = self.inventory.get_all_tables_for_automation('TEST01') table_obj = ItTable(mock_claim_tbl_dict[0], self.cfg_mgr) self.assertEquals(result[0], table_obj) result = self.inventory.get_all_tables_for_automation('TEST02') self.assertEquals(result, [])
def test_gen_import_custom_query(self, mock_get_col_types): """test user input - custom query""" mock_get_col_types.return_value = [('Col1', 'varchar'), ('Col2', 'DATETIME'), ('client_struc_key', 'INT')] _tbl = { 'domain': 'member', 'jdbcurl': 'jdbc:teradata://fake.teradata/DATABASE=' 'fake_database', 'db_username': '******', 'split_by': 'client_struc_key', 'password_file': 'jceks://hdfs/user/dev/fake.passwords.jceks#' 'fake.password.alias', 'load': '000001', 'fetch_size': 50000, 'hold': 0, 'mappers': 6, 'source_database_name': 'fake_database', 'source_table_name': 'fake_mth_tablename', 'sql_query': "ID > 2 AND COL = 'TEST'"} self.builder.it_table = ItTable(_tbl, self.cfg_mgr) action = self.builder.gen_import_action( 'fake_mth_tablename_import') action.ok = 'fake_mth_tablename_avro' td_xml = action.generate_action() _path = os.path.join( BASE_DIR, 'expected_workflows/sqoop_custom_query.xml') with open(_path, 'r') as my_file: expected = my_file.read() self.assertTrue(self.compare_files(expected, td_xml))
def test_gen_prod_workflow_perf_nodomain(self, m_freq_ingest, m_convert_pdf, m_v_xml, m_eval, m_c, m_get_id, m_get_t_automation, m_dryrun, m_put_w, m_sqoop_cache, m_sqoop_cache_view, m_dryrun_all): """ Tests generate_prod_workflows with 3 tables. One light, one medium and oen heavy.""" m_eval.return_value = [['Col1', 'varchar'], ['Col2', 'varchar']] m_get_id.side_effect = [appl_ref_id_tbl_01, appl_ref_id_tbl_02] _mock_automation_tables = [ItTable(tbl, self.cfg_mgr) for tbl in mock_automation_tbl_perf_domain] m_get_t_automation.return_value = _mock_automation_tables self.cfg_mgr.env = 'perf' status, msg, git_files = self.driver.gen_prod_workflow('FAKED001') for file_name in git_files: git_file = 'full_fake_open_fake_prog_tablename.hql' if git_file in file_name: actual_hql_nm = os.path.join(self.cfg_mgr.files, file_name) with open(actual_hql_nm, 'r') as file_h: actual_hql = file_h.read() with open(BASE_DIR + '/test_resources/git_team_hql_nodomain.hql', 'r') as file_h: expected_hql = file_h.read() self.assertTrue(expected_hql, actual_hql) self.assertEquals(len(git_files), 23) self.assertIn('Generated', msg) self.assertIn('workflow:', msg) self.assertIn('subworkflow:', msg) self.assertTrue(status)
def test__add_split_by(self): """test valid column split by""" td_tbl = ItTable(td_fake_tablename, self.cfg_mgr) self.builder.it_table = td_tbl with self.assertRaises(ValueError) as context: self.builder._add_split_by([], [['invalid_col', 'varchar']]) msg = "Not a valid column name for split_by: 'fake_split_by'" self.assertTrue(msg in str(context.exception))
def test_gen_schedule_subworkflows_heavy(self): """test gen schedule subworkflows for 3 heavy tables""" sub_wf_file_name = 'test_subwf' tab1 = ItTable(heavy_2_prop, self.cfg_mgr) tab2 = ItTable(heavy_3_prop, self.cfg_mgr) tab3 = ItTable(full_ingest_tbl_mysql, self.cfg_mgr) heavy_tables = [tab1, tab2, tab3] workflows_chunks = [[tab1, 'tab1_full_load'], [tab2, 'tab2_full_load'], [tab3, 'tab3_full_load']] gen_files = self.driver.gen_schedule_subworkflows( sub_wf_file_name, workflows_chunks, heavy_tables, 'FAKED001') self.assertEquals(len(gen_files), 4) expected_file = os.path.join(BASE_DIR, 'test_resources/subwf_heavy.xml') test_file = os.path.join(self.cfg_mgr.files, 'test_subwf.xml') self.assertTrue(self.files_equal(expected_file, test_file))
def get_available_requests(self, requests): """Given a list[{Request}] return a List[{ItTable}] of tables available, not on hold in the it table, and a List[{ItTable}] of tables on hold, due to a hold value of 1 and List[{Request}] Args: requests: list(ibis.inventory.request_inventory.Request) objects Returns: list(ibis.model.table.ItTable), list(ibis.model.table.ItTable), list(ibis.inventory.request_inventory.Request) """ available_tables = [] hold_tables = [] unavailable_requests = [] for req_obj in requests: src_db = req_obj.database src_table = req_obj.table_name if req_obj.db_env: db_env = req_obj.db_env else: db_env = self.cfg_mgr.default_db_env.lower() table = self.it_inventory.get_table_mapping( src_db, src_table, db_env) if table: # A record of the table exists in the it table if table['hold'] == 0: available_tables.append(ItTable(table, self.cfg_mgr)) else: # hold = 1 means it is disabled temporarily hold_tables.append(ItTable(table, self.cfg_mgr)) else: # Record doesn't exist unavailable_requests.append(req_obj) if hold_tables: hold_tables_names = [table.table_name for table in hold_tables] msg = 'Tables: {0} are on hold' msg = msg.format(', '.join(hold_tables_names)) self.logger.warning(msg) if unavailable_requests: unavail_names = [req.table_name for req in unavailable_requests] msg = 'Tables: {0} are missing in it_table' msg = msg.format(', '.join(unavail_names)) self.logger.warning(msg) return available_tables, hold_tables, unavailable_requests
def test_gen_prod_workflow_tables(self, m_s_it_file, gen_prod_workflow, mock_get_available_requests): mock_get_available_requests.return_value = \ ([ItTable(mock_table_mapping_val, self.cfg_mgr)], [], []) gen_prod_workflow.return_value = (None, None, None) file_h = open( os.path.join(BASE_DIR, 'test_resources/request_test_valid.txt'), 'r') self.assertTrue(self.driver.gen_prod_workflow_tables(file_h))
def test_gen_refresh(self): """Test the generation of the refresh action.""" self.builder.it_table = ItTable(full_ingest_tbl, self.cfg_mgr) action = self.builder.gen_refresh('fake_mem_tablename_refresh') action.ok = 'end' xml = action.generate_action() _path = os.path.join(BASE_DIR, 'expected_workflows/refresh.xml') with open(_path, 'r') as my_file: expected = my_file.read() self.assertTrue(self.compare_files(xml, expected))
def test_gen_schedule_subworkflows_light(self): """test gen schedule subworkflows for 5 light tables""" sub_wf_file_name = 'test_subwf' tab1 = ItTable(fake_cens_tbl_prop, self.cfg_mgr) tab2 = ItTable(light_3_prop, self.cfg_mgr) tab3 = ItTable(light_4_prop, self.cfg_mgr) tab4 = ItTable(light_5_prop, self.cfg_mgr) tab5 = ItTable(fake_ben_tbl_prop, self.cfg_mgr) light_tables = [tab1, tab2, tab3, tab4, tab5] workflows_chunks = [[tab1, 'tab1_full_load'], [tab2, 'tab2_full_load'], [tab3, 'tab3_full_load'], [tab4, 'tab4_full_load'], [tab5, 'tab5_full_load']] gen_files = self.driver.gen_schedule_subworkflows( sub_wf_file_name, workflows_chunks, light_tables, 'FAKED001') self.assertEquals(len(gen_files), 4) expected_file = os.path.join(BASE_DIR, 'test_resources/subwf_light.xml') test_file = os.path.join(self.cfg_mgr.files, 'test_subwf.xml') self.assertTrue(self.files_equal(expected_file, test_file))
def test_get_split_by_teradata_pk(self, mock1, mock2): mock2.return_value = [['KEY']] with open(BASE_DIR + '/fixtures/td_primary_eval_mock.txt', 'r') as file_h: sqoop_eval_output = file_h.read() mock1.return_value = (0, sqoop_eval_output, '') table_obj = ItTable(self.mock_claim_tbl_dict[0], self.cfg_mgr) split_by_obj = Get_Auto_Split(self.cfg_mgr) result = split_by_obj.get_split_by_column(table_obj) self.assertEquals(result, "KEY")
def test_gen_avro_action(self): """Test the generation of the avro action xml.""" self.builder.it_table = ItTable(fake_algnmt_tbl, self.cfg_mgr) action = self.builder.gen_avro_action('fake_algnmt_tablename_avro') action.ok = 'fake_algnmt_tablename_avro_parquet' avro_xml = action.generate_action() _path = os.path.join(BASE_DIR, 'expected_workflows/avro_action.xml') with open(_path, 'r') as my_file: expected = my_file.read() self.assertTrue(self.compare_files(avro_xml, expected))
def get_all_tables_for_esp(self, esp_id): """Returns a list[table] of all sql-tables that match with ESP id""" tables = [] query = "SELECT * FROM {tbl} WHERE esp_appl_id='{id}'" query = query.format(tbl=self.table, id=esp_id) result = self.get_rows(query) if result: for table_row in result: tbl_dict = self._build_row_dict(table_row) tables.append(ItTable(tbl_dict, self.cfg_mgr)) return tables