Ejemplo n.º 1
0
 def test_get_lock_status_call(self, mock_exec):
     dsname = "foo"
     stmt = '''
               select locked, release_attempts
               from thrive_dataset_lock
               where dataset_name = '%s'; ''' % dsname
     _ = self.mm.get_lock_status(dsname)
     self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
Ejemplo n.º 2
0
 def test_increment_release_attempt_call(self, mock_exec):
     dsname = "foo"
     stmt = '''
              update thrive_dataset_lock
              set release_attempts = release_attempts + 1
              where dataset_name = '%s';
            ''' % dsname
     self.mm.increment_release_attempt(dsname)
     self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
Ejemplo n.º 3
0
 def test_release_call(self, mock_exec):
     dsname = "foo"
     stmt = '''
               update thrive_dataset_lock
               set locked = FALSE
               where dataset_name='%s';
            ''' % dsname
     self.mm.release(dsname)
     self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
Ejemplo n.º 4
0
 def test_lock_call(self, mock_exec):
     dsname = "foo"
     stmt = '''
               update thrive_dataset_lock
               set locked = TRUE, release_attempts = 0
               where  dataset_name = '%s';
            ''' % dsname
     self.mm.lock(dsname)
     self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
Ejemplo n.º 5
0
    def test_delete_call(self, mock_exec):
        dataset, mdcolname, mdcolvalue = "foo", "bar", "baz"

        stmt = '''
                  delete from thrive_load_metadata
                  where dataset_name = '%s'
                  and %s = '%s';
               ''' % (dataset, mdcolname, mdcolvalue)
        self.mm.delete(dataset, mdcolname, mdcolvalue)
        self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
Ejemplo n.º 6
0
 def test_get_last_dir_call(self, mock_exec):
     dataset_name, hive_table, load_type = "foo", "bar", "baz"
     stmt = '''
              select last_load_folder
              from thrive_load_metadata
              where dataset_name = '%s'
              and hive_table = '%s'
              and load_type = '%s'
              order by hive_end_ts desc
              limit 1;
           ''' % (dataset_name, hive_table, load_type)
     _ = self.mm.get_lastdir(dataset_name, hive_table, load_type)
     self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
Ejemplo n.º 7
0
 def test_get_unprocessed_partitions_call(self, mock_exec):
     hive_db, hive_table = "foo", "bar"
     stmt = '''
               SELECT load_id,
                      hive_last_partition,
                      hive_rows_loaded,
                      hadoop_records_processed
               from thrive_load_metadata
               where hive_db = '%s'
               and hive_table = '%s'
               and hive_last_partition <> ''
               and vertica_last_partition is NULL;
           ''' % (hive_db, hive_table)
     _ = self.mm.get_unprocessed_partitions("foo", "bar")
     self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
Ejemplo n.º 8
0
    def test_update(self, mock_exec):
        pk = ("pk1", "pk2")
        mdmap = {"setup": "thrive_setup", "load": "thrive_load_metadata"}
        mdfilter = {
            "setup": "dataset_id = '%s'" % pk[0],
            "load": "(load_id, hive_last_partition) = ('%s', '%s')" % pk
        }
        data = {"fookey": "fooval", "barkey": "barval"}

        for mdtype, mdtable in mdmap.items():
            updates = ",".join('%s="%s"' % (key, val)
                               for key, val in data.items())
            stmt = "update %s set %s where %s;" \
                         % (mdtable, updates, mdfilter[mdtype])
            self.mm.update(pk, data, mdtype=mdtype)
            self.assertEqual(squeeze(stmt), squeeze(mock_exec.call_args[0][0]))
Ejemplo n.º 9
0
    def test__make_schema(self):
        tf_cols = make_tempfile()
        tf_template = make_tempfile()
        cols = "col1 int\ncol2 float\ncol3 varchar(20)"
        template = """
        use @DATABASE;
        create external table @TABLE (
            @COLUMNMAPPINGS
        )
          partitioned by (year string, month string, day string, hour string, part string)
          row format delimited
          fields terminated by '\u0001'
          null defined as '';
        """
        tempfile_write(tf_cols, cols)
        tempfile_write(tf_template, template)
        substitutions = {
            "@DATABASE": "dbfoo",
            "@COLUMNMAPPINGS": cols,
            "@TABLE": "tablefoo"
        }

        tf_outfile = make_tempfile()
        self.sh._make_schema(tf_cols.name, tf_template.name, tf_outfile.name,
                             substitutions)
        tf_outfile.seek(0)

        expected = \
            """
            use dbfoo;
                    create external table tablefoo (
                        col1 int,
                        col2 float,
                        col3 varchar(20)
                    )
                      partitioned by (year string, month string, day string, hour string, part string)
                      row format delimited
                      fields terminated by '\u0001'
                      null defined as '';

            """
        self.assertEqual(squeeze(expected), squeeze(tf_outfile.read()))
Ejemplo n.º 10
0
    def test_make_workflowpropsfile(self, mock_mtz, mock_dt, mock_open):
        mdt = datetime(2016, 8, 18, 14, 10)
        mock_dt.now.return_value = mdt

        cv = self.config_value
        workflow_xml_path = "%s/workflow/%s" % (cv, cv)

        template_str = squeeze("""
            jobTracker=@JOBTRACKER
            oozie.wf.application.path=@WORKFLOWXML
            nameNode=@NAMENODE
            inputFile=@INPUTFILES
            outputDir=@OUTPUTDIR
            numReducers=@NUM_REDUCERS
            """)

        dirlist = ["foo", "bar"]
        input_files = os.path.join(cv, "{" + "\\\\\\\\,".join(dirlist) + "}")
        output_path = "output/path"

        substitutions = {
            "@JOBTRACKER": cv,
            "@WORKFLOWXML": workflow_xml_path,
            "@NAMENODE": cv,
            "@INPUTFILES": input_files,
            "@OUTPUTDIR": output_path,
            "@NUM_REDUCERS": cv
        }

        mf = mock.MagicMock(spec=file)
        mock_open.return_value.__enter__.return_value = mf
        mf.read.return_value = template_str

        lh = tlh.LoadHandler(datacfg_file="foo",
                             envcfg_file="bar",
                             resources_file="baz.zip")
        lh.make_workflowpropsfile(output_path, dirlist)

        shexec = self.mock_shell.return_value
        shexec.safe_execute.assert_called_with("mkdir -p %s" % cv)
        propfile_name = "%s/workflow_%s.properties" % (
            cv, mdt.strftime("%Y%m%d-%H"))
        mock_mtz.assert_called_with(template_str,
                                    substitutions,
                                    outfile=propfile_name)