コード例 #1
0
 def test_proceed_dataset_locked(self):
     mm = self.mock_mm.return_value
     mm.get_lock_status.return_value = (1, 0)
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     self.assertFalse(lh.proceed())
コード例 #2
0
 def test_execute_no_proceed(self, mock_proceed):
     mock_proceed.return_value = False
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     # If proceed() is False, execute() returns None
     self.assertEqual(lh.execute(), None)
コード例 #3
0
    def test_execute_vertica_load_false_hdfs_path_exists(
            self, mock_wpf, mock_proceed, mock_lock, mock_chunk_dirs,
            mock_iso_fmt):
        cv = self.config_value
        mock_proceed.return_value = True
        mock_chunk_dirs.return_value = {"2016/08/19/14": ["d_20160819-1410"]}
        mock_iso_fmt.return_value = "2016-08-19 14:10:00"
        hm = self.mock_hdfs.return_value
        hm.path_exists.return_value = True
        hm.get_subdirs.return_value = ["0", "1"]
        ptn_parent = "%s/%s" % (cv, "2016/08/19/14")
        ptn_path = "%s/3334" % ptn_parent
        mo = self.mock_oozie.return_value
        oozie_jobid = "job: 12345"
        mo.launch.return_value = oozie_jobid
        mo.get_counts.return_value = {
            "map_input_records": 12466493,
            "map_output_records": 28701197,
            "reduce_input_records": 28701197,
            "reduce_output_records": 28701197,
            "skipped": 9
        }
        mm = self.mock_mm.return_value
        mm.get_unprocessed_partitions.return_value = ["2016/08/19/14"]
        lh = tlh.LoadHandler(datacfg_file="foo",
                             envcfg_file="bar",
                             resources_file="baz.zip")

        lh.execute()

        # workflow properties file tests
        mock_wpf.assert_called_with(ptn_path, ["d_20160819-1410"])
コード例 #4
0
 def test_init_hdfs_manager_exception(self):
     hdfs_mgr = self.mock_hdfs.return_value
     hdfs_mgr.get_primary_namenode.side_effect = thex.HdfsManagerException()
     with self.assertRaises(thex.LoadHandlerException):
         _ = tlh.LoadHandler(datacfg_file="foo",
                             envcfg_file="bar",
                             resources_file="baz.zip")
コード例 #5
0
    def test_execute_vertica_load_false(self, mock_wpf, mock_proceed,
                                        mock_lock, mock_chunk_dirs,
                                        mock_iso_fmt):
        cv = self.config_value
        mock_proceed.return_value = True
        mock_chunk_dirs.return_value = {"2016/08/19/14": ["d_20160819-1410"]}
        mock_iso_fmt.return_value = "2016-08-19 14:10:00"
        hm = self.mock_hdfs.return_value
        hm.path_exists.return_value = False
        ptn_parent = "%s/%s" % (cv, "2016/08/19/14")
        ptn_path = "%s/0" % ptn_parent
        mo = self.mock_oozie.return_value
        oozie_jobid = "job: 12345"
        mo.launch.return_value = oozie_jobid
        mo.get_counts.return_value = {
            "map_input_records": 12466493,
            "map_output_records": 28701197,
            "reduce_input_records": 28701197,
            "reduce_output_records": 28701197,
            "skipped": 9
        }
        mm = self.mock_mm.return_value
        mm.get_unprocessed_partitions.return_value = ["2016/08/19/14"]
        lh = tlh.LoadHandler(datacfg_file="foo",
                             envcfg_file="bar",
                             resources_file="baz.zip")

        lh.execute()

        # workflow properties file tests
        mock_wpf.assert_called_with(ptn_path, ["d_20160819-1410"])

        # Oozie tests
        mo.launch.assert_called_with(propfile=None)
        mo.poll.assert_called_with(oozie_jobid, interval=10)
        mo.get_counts.assert_called_with(oozie_jobid)

        # Hive tests
        hive_mgr = self.mock_hive.return_value
        hive_mgr.create_partition.assert_called_with(ptn_path)

        # HDFS tests
        hm.grantall.assert_called_with("rx", cv)

        # Metadata manager tests
        hive_load_metadata = {
            "load_id": "12345",
            "load_type": "scheduled",
            "dataset_name": cv,
            "hive_db": cv,
            "hive_table": cv,
            "hive_start_ts": "2016-08-19 14:10:00",
            "hive_end_ts": "2016-08-19 14:10:00",
            "last_load_folder": "d_20160819-1410",
            "hive_last_partition": "2016/08/19/14/0",
            "hadoop_records_processed": 12466493,
            "hive_rows_loaded": 28701197
        }
        mm.insert.assert_called_with(hive_load_metadata, mdtype="load")
コード例 #6
0
 def test_make_tmproot_hdfs_mkdir_exception(self):
     hdfs_mgr = self.mock_hdfs.return_value
     hdfs_mgr.makedir.side_effect = tlh.HdfsManagerException()
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     with self.assertRaises(thex.LoadHandlerException):
         _ = lh.make_tmproot()
コード例 #7
0
 def test_proceed_no_newdirs(self, mock_gn):
     mock_gn.return_value = []
     mm = self.mock_mm.return_value
     mm.get_lock_status.return_value = (0, 0)
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     self.assertFalse(lh.proceed())
コード例 #8
0
 def test_get_newdirs_call_metadata(self):
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     _ = lh.get_newdirs()
     cv = self.config_value
     mm = self.mock_mm.return_value
     mm.get_lastdir.assert_called_with(cv, cv, None)
コード例 #9
0
 def test_lock_exception(self):
     mm = self.mock_mm.return_value
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     mm.lock.side_effect = thex.MetadataManagerException()
     with self.assertRaises(thex.LoadHandlerException):
         lh.lock()
コード例 #10
0
 def test_lock(self):
     cv = self.config_value
     mm = self.mock_mm.return_value
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     lh.lock()
     mm.lock.assert_called_with(cv)
コード例 #11
0
    def test_init(self):
        cv = self.config_value
        _ = tlh.LoadHandler(datacfg_file="foo",
                            envcfg_file="bar",
                            resources_file="baz.zip")
        hdfs_mgr = self.mock_hdfs.return_value
        hdfs_mgr.get_primary_namenode.assert_called_with([cv], cv, cv)

        self.mock_oozie.assert_called_with()
コード例 #12
0
    def test_get_newdirs_val(self):
        hdfs_mgr = self.mock_hdfs.return_value
        expected_newdirs = "foo bar baz".split()
        hdfs_mgr.get_newdirs.return_value = expected_newdirs

        lh = tlh.LoadHandler(datacfg_file="foo",
                             envcfg_file="bar",
                             resources_file="baz.zip")
        actual_newdirs = lh.get_newdirs()
        self.assertEqual(actual_newdirs, expected_newdirs)
コード例 #13
0
 def test_proceed_all_conditions_satisfied(self, mock_gn):
     mock_gn.return_value = ["foo", "bar"]
     mm = self.mock_mm.return_value
     mm.get_lock_status.return_value = (0, 0)
     hm = self.mock_hdfs.return_value
     hm.get_primary_namenode.return_value = "foo"
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     self.assertTrue(lh.proceed())
コード例 #14
0
 def test_proceed_no_primary_namenode(self, mock_gn):
     mock_gn.return_value = ["foo", "bar"]
     mm = self.mock_mm.return_value
     mm.get_lock_status.return_value = (0, 0)
     hm = self.mock_hdfs.return_value
     hm.get_primary_namenode.return_value = None
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     self.assertFalse(lh.proceed())
コード例 #15
0
 def test_make_tmproot_val(self):
     hdfs_mgr = self.mock_hdfs.return_value
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     cv = self.config_value
     expected_tmproot = "/tmp/thrive/%s/%s" % (cv, cv)
     hdfs_mgr.makedir.return_value = expected_tmproot
     actual_tmproot = lh.make_tmproot()
     self.assertEqual(actual_tmproot, expected_tmproot)
コード例 #16
0
 def test_make_tmproot_call(self):
     hdfs_mgr = self.mock_hdfs.return_value
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     _ = lh.make_tmproot()
     cv = self.config_value
     tmproot = "/tmp/thrive/%s/%s" % (cv, cv)
     hdfs_mgr.rmdir.assert_called_with(tmproot)
     hdfs_mgr.makedir.assert_called_with(tmproot)
コード例 #17
0
 def test_vload_copy_direct_val(self):
     cv = self.config_value
     hiveptn_ = "2016/08/16/14/0"
     hm = self.mock_hdfs.return_value
     hm.get_primary_namenode.return_value = cv
     vm = self.mock_vtica.return_value
     vm.load.return_value = 100
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     numrows = lh.vload_copy(hiveptn_, mode="direct")
     self.assertEqual(numrows, 100)
コード例 #18
0
 def test_get_newdirs_call_hdfs(self, mock_dt):
     mdt = datetime(2016, 8, 18, 14, 10)
     mock_dt.now.return_value = mdt
     cv = self.config_value
     mm = self.mock_mm.return_value
     lastdir = "d_20160818-1410"
     mm.get_lastdir.return_value = lastdir
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     _ = lh.get_newdirs()
     hdfs_mgr = self.mock_hdfs.return_value
     hdfs_mgr.get_newdirs.assert_called_with(cv, lastdir, mdt, 4.0)
コード例 #19
0
 def test_execute_BaseException(self, mock_wpf, mock_proceed, mock_lock,
                                mock_chunk_dirs, mock_iso_fmt):
     mock_proceed.return_value = True
     mock_chunk_dirs.return_value = {"2016/08/19/14": ["d_20160819-1410"]}
     hm = self.mock_hdfs.return_value
     hm.path_exists.return_value = False
     mm = self.mock_mm.return_value
     mm.insert.side_effect = BaseException()
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     with self.assertRaises(BaseException):
         lh.execute()
コード例 #20
0
 def test_execute_HdfsManager_grantall_exception(self, mock_wpf,
                                                 mock_proceed, mock_lock,
                                                 mock_chunk_dirs,
                                                 mock_iso_fmt):
     mock_proceed.return_value = True
     mock_chunk_dirs.return_value = {"2016/08/19/14": ["d_20160819-1410"]}
     hm = self.mock_hdfs.return_value
     hm.path_exists.return_value = False
     hm.grantall.side_effect = thex.HdfsManagerException
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     with self.assertRaises(thex.LoadHandlerException):
         lh.execute()
コード例 #21
0
 def test_execute_finally(self, mock_wpf, mock_proceed, mock_lock,
                          mock_chunk_dirs, mock_iso_fmt, mock_dtd):
     mock_proceed.return_value = True
     mock_chunk_dirs.return_value = {"2016/08/19/14": ["d_20160819-1410"]}
     hm = self.mock_hdfs.return_value
     hm.path_exists.return_value = False
     mm = self.mock_mm.return_value
     mm.get_unprocessed_partitions.return_value = \
         [("12345", "2016/08/19/14/0", "100", "50")]
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     lh.locked = True
     lh.execute()
     mm.release.assert_called_with(self.config_value)
コード例 #22
0
 def test_vload_copy_decompress_call(self, mock_join):
     mock_pth = "a/b/c"
     mock_join.return_value = mock_pth
     cv = self.config_value
     hiveptn_ = "2016/08/16/14/0"
     hm = self.mock_hdfs.return_value
     hm.get_primary_namenode.return_value = cv
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     _ = lh.vload_copy(hiveptn_, mode="decompress")
     vm = self.mock_vtica.return_value
     hm.makedir.assert_called_with(mock_pth)
     hm.decompress.assert_called_with(mock_pth, mock_pth)
     vm.load.assert_called_with(cv, mock_pth, cv, cv, cv, mode="decompress")
コード例 #23
0
 def test_execute_OozieManager_poll_exception(self, mock_wpf, mock_proceed,
                                              mock_lock, mock_chunk_dirs,
                                              mock_iso_fmt):
     mock_proceed.return_value = True
     mock_chunk_dirs.return_value = {"2016/08/19/14": ["d_20160819-1410"]}
     ts = "2016-08-19 14:10:00"
     mock_iso_fmt.return_value = ts
     hm = self.mock_hdfs.return_value
     hm.path_exists.return_value = False
     mo = self.mock_oozie.return_value
     mo.poll.side_effect = tlh.OozieManagerException()
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     with self.assertRaises(thex.LoadHandlerException):
         lh.execute()
コード例 #24
0
    def test_make_workflowpropsfile(self, mock_mtz, mock_dt, mock_open):
        mdt = datetime(2016, 8, 18, 14, 10)
        mock_dt.now.return_value = mdt

        cv = self.config_value
        workflow_xml_path = "%s/workflow/%s" % (cv, cv)

        template_str = squeeze("""
            jobTracker=@JOBTRACKER
            oozie.wf.application.path=@WORKFLOWXML
            nameNode=@NAMENODE
            inputFile=@INPUTFILES
            outputDir=@OUTPUTDIR
            numReducers=@NUM_REDUCERS
            """)

        dirlist = ["foo", "bar"]
        input_files = os.path.join(cv, "{" + "\\\\\\\\,".join(dirlist) + "}")
        output_path = "output/path"

        substitutions = {
            "@JOBTRACKER": cv,
            "@WORKFLOWXML": workflow_xml_path,
            "@NAMENODE": cv,
            "@INPUTFILES": input_files,
            "@OUTPUTDIR": output_path,
            "@NUM_REDUCERS": cv
        }

        mf = mock.MagicMock(spec=file)
        mock_open.return_value.__enter__.return_value = mf
        mf.read.return_value = template_str

        lh = tlh.LoadHandler(datacfg_file="foo",
                             envcfg_file="bar",
                             resources_file="baz.zip")
        lh.make_workflowpropsfile(output_path, dirlist)

        shexec = self.mock_shell.return_value
        shexec.safe_execute.assert_called_with("mkdir -p %s" % cv)
        propfile_name = "%s/workflow_%s.properties" % (
            cv, mdt.strftime("%Y%m%d-%H"))
        mock_mtz.assert_called_with(template_str,
                                    substitutions,
                                    outfile=propfile_name)
コード例 #25
0
 def test_vload_copy_direct_call(self):
     cv = self.config_value
     hiveptn_ = "2016/08/16/14/0"
     partfiles = "%s/%s/*" % (cv, hiveptn_)
     vschema = dtable = rtable = cv
     hm = self.mock_hdfs.return_value
     hm.get_primary_namenode.return_value = cv
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     _ = lh.vload_copy(hiveptn_, mode="direct")
     vm = self.mock_vtica.return_value
     vm.load.assert_called_with(cv,
                                partfiles,
                                vschema,
                                dtable,
                                rtable,
                                mode="direct")
コード例 #26
0
 def test_execute_MetadataManager_update_exception(self, mock_wpf,
                                                   mock_proceed, mock_lock,
                                                   mock_chunk_dirs,
                                                   mock_iso_fmt, mock_dtd):
     cv = self.mock_get_config.return_value = "true"
     mock_proceed.return_value = True
     mock_chunk_dirs.return_value = {"2016/08/19/14": ["d_20160819-1410"]}
     hm = self.mock_hdfs.return_value
     hm.path_exists.return_value = False
     mm = self.mock_mm.return_value
     mm.get_unprocessed_partitions.return_value = \
         [("12345", "2016/08/19/14/0", "100", "50")]
     mm.update.side_effect = tlh.MetadataManagerException()
     lh = tlh.LoadHandler(datacfg_file="foo",
                          envcfg_file="bar",
                          resources_file="baz.zip")
     with self.assertRaises(thex.LoadHandlerException):
         lh.execute()
コード例 #27
0
 def test_init_float_exception(self):
     self.mock_float.side_effect = ValueError()
     with self.assertRaises(thex.LoadHandlerException):
         _ = tlh.LoadHandler(datacfg_file="foo",
                             envcfg_file="bar",
                             resources_file="baz.zip")
コード例 #28
0
    def test_execute_vertica_load_true(self, mock_vload_copy, mock_wpf,
                                       mock_proceed, mock_lock,
                                       mock_chunk_dirs, mock_iso_fmt):
        cv = self.mock_get_config.return_value = "true"
        mock_proceed.return_value = True
        mock_chunk_dirs.return_value = {"2016/08/19/14": ["d_20160819-1410"]}
        ts = "2016-08-19 14:10:00"
        mock_iso_fmt.return_value = ts
        hm = self.mock_hdfs.return_value
        hm.path_exists.return_value = False
        hive_ptn = "2016/08/19/14/0"
        ptn_path = "%s/%s" % (cv, hive_ptn)
        mo = self.mock_oozie.return_value
        oozie_jobid = "job: 12345"
        mo.launch.return_value = oozie_jobid
        mo.get_counts.return_value = {
            "map_input_records": 12466493,
            "map_output_records": 28701197,
            "reduce_input_records": 28701197,
            "reduce_output_records": 28701197,
            "skipped": 9
        }
        mm = self.mock_mm.return_value
        mm.get_unprocessed_partitions.return_value = \
            [("12345", hive_ptn, "100", "50")]

        mock_vload_copy.return_value = 100

        dto = datetime(2016, 8, 19, 14)
        #mock_dtd.return_value = dto
        lh = tlh.LoadHandler(datacfg_file="foo",
                             envcfg_file="bar",
                             resources_file="baz.zip")

        mn = self.mock_newrelic.return_value
        mn.get_count.return_value = 100

        lh.execute()

        # workflow properties file tests
        mock_wpf.assert_called_with(ptn_path, ["d_20160819-1410"])

        # Oozie tests
        mo.launch.assert_called_with(propfile=None)
        mo.poll.assert_called_with(oozie_jobid, interval=10)
        mo.get_counts.assert_called_with(oozie_jobid)

        # Hive tests
        hive_mgr = self.mock_hive.return_value
        hive_mgr.create_partition.assert_called_with(ptn_path)

        # HDFS tests
        hm.grantall.assert_called_with("rx", cv)

        # Metadata manager tests
        hive_load_metadata = {
            "load_id": "12345",
            "load_type": "scheduled",
            "dataset_name": cv,
            "hive_db": cv,
            "hive_table": cv,
            "hive_start_ts": ts,
            "hive_end_ts": ts,
            "last_load_folder": "d_20160819-1410",
            "hive_last_partition": hive_ptn,
            "hadoop_records_processed": 12466493,
            "hive_rows_loaded": 28701197
        }
        mm.insert.assert_called_with(hive_load_metadata, mdtype="load")

        vertica_metadata = {
            "vertica_db": cv,
            "vertica_schema": cv,
            "vertica_table": cv,
            "vertica_start_ts": ts,
            "vertica_end_ts": ts,
            "vertica_last_partition": hive_ptn,
            "vertica_rows_loaded": 100,
            "status": "SUCCESS"
        }
        mm.update.assert_called_with(("12345", hive_ptn),
                                     vertica_metadata,
                                     mdtype="load")

        # Vertica manager tests
        mock_vload_copy.assert_called_with(hive_ptn, mode="direct")