def test_get_lock_status_call(self, mock_exec): dsname = "foo" stmt = ''' select locked, release_attempts from thrive_dataset_lock where dataset_name = '%s'; ''' % dsname _ = self.mm.get_lock_status(dsname) self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
def test_increment_release_attempt_call(self, mock_exec): dsname = "foo" stmt = ''' update thrive_dataset_lock set release_attempts = release_attempts + 1 where dataset_name = '%s'; ''' % dsname self.mm.increment_release_attempt(dsname) self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
def test_release_call(self, mock_exec): dsname = "foo" stmt = ''' update thrive_dataset_lock set locked = FALSE where dataset_name='%s'; ''' % dsname self.mm.release(dsname) self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
def test_lock_call(self, mock_exec): dsname = "foo" stmt = ''' update thrive_dataset_lock set locked = TRUE, release_attempts = 0 where dataset_name = '%s'; ''' % dsname self.mm.lock(dsname) self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
def test_delete_call(self, mock_exec): dataset, mdcolname, mdcolvalue = "foo", "bar", "baz" stmt = ''' delete from thrive_load_metadata where dataset_name = '%s' and %s = '%s'; ''' % (dataset, mdcolname, mdcolvalue) self.mm.delete(dataset, mdcolname, mdcolvalue) self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
def test_get_last_dir_call(self, mock_exec): dataset_name, hive_table, load_type = "foo", "bar", "baz" stmt = ''' select last_load_folder from thrive_load_metadata where dataset_name = '%s' and hive_table = '%s' and load_type = '%s' order by hive_end_ts desc limit 1; ''' % (dataset_name, hive_table, load_type) _ = self.mm.get_lastdir(dataset_name, hive_table, load_type) self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
def test_get_unprocessed_partitions_call(self, mock_exec): hive_db, hive_table = "foo", "bar" stmt = ''' SELECT load_id, hive_last_partition, hive_rows_loaded, hadoop_records_processed from thrive_load_metadata where hive_db = '%s' and hive_table = '%s' and hive_last_partition <> '' and vertica_last_partition is NULL; ''' % (hive_db, hive_table) _ = self.mm.get_unprocessed_partitions("foo", "bar") self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
def test_update(self, mock_exec): pk = ("pk1", "pk2") mdmap = {"setup": "thrive_setup", "load": "thrive_load_metadata"} mdfilter = { "setup": "dataset_id = '%s'" % pk[0], "load": "(load_id, hive_last_partition) = ('%s', '%s')" % pk } data = {"fookey": "fooval", "barkey": "barval"} for mdtype, mdtable in mdmap.items(): updates = ",".join('%s="%s"' % (key, val) for key, val in data.items()) stmt = "update %s set %s where %s;" \ % (mdtable, updates, mdfilter[mdtype]) self.mm.update(pk, data, mdtype=mdtype) self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
def test__make_schema(self): tf_cols = make_tempfile() tf_template = make_tempfile() cols = "col1 int\ncol2 float\ncol3 varchar(20)" template = """ use @DATABASE; create external table @TABLE ( @COLUMNMAPPINGS ) partitioned by (year string, month string, day string, hour string, part string) row format delimited fields terminated by '\u0001' null defined as ''; """ tempfile_write(tf_cols, cols) tempfile_write(tf_template, template) substitutions = { "@DATABASE": "dbfoo", "@COLUMNMAPPINGS": cols, "@TABLE": "tablefoo" } tf_outfile = make_tempfile() self.sh._make_schema(tf_cols.name, tf_template.name, tf_outfile.name, substitutions) tf_outfile.seek(0) expected = \ """ use dbfoo; create external table tablefoo ( col1 int, col2 float, col3 varchar(20) ) partitioned by (year string, month string, day string, hour string, part string) row format delimited fields terminated by '\u0001' null defined as ''; """ self.assertEqual(squeeze(expected), squeeze(tf_outfile.read()))
def test_make_workflowpropsfile(self, mock_mtz, mock_dt, mock_open): mdt = datetime(2016, 8, 18, 14, 10) mock_dt.now.return_value = mdt cv = self.config_value workflow_xml_path = "%s/workflow/%s" % (cv, cv) template_str = squeeze(""" jobTracker=@JOBTRACKER oozie.wf.application.path=@WORKFLOWXML nameNode=@NAMENODE inputFile=@INPUTFILES outputDir=@OUTPUTDIR numReducers=@NUM_REDUCERS """) dirlist = ["foo", "bar"] input_files = os.path.join(cv, "{" + "\\\\\\\\,".join(dirlist) + "}") output_path = "output/path" substitutions = { "@JOBTRACKER": cv, "@WORKFLOWXML": workflow_xml_path, "@NAMENODE": cv, "@INPUTFILES": input_files, "@OUTPUTDIR": output_path, "@NUM_REDUCERS": cv } mf = mock.MagicMock(spec=file) mock_open.return_value.__enter__.return_value = mf mf.read.return_value = template_str lh = tlh.LoadHandler(datacfg_file="foo", envcfg_file="bar", resources_file="baz.zip") lh.make_workflowpropsfile(output_path, dirlist) shexec = self.mock_shell.return_value shexec.safe_execute.assert_called_with("mkdir -p %s" % cv) propfile_name = "%s/workflow_%s.properties" % ( cv, mdt.strftime("%Y%m%d-%H")) mock_mtz.assert_called_with(template_str, substitutions, outfile=propfile_name)