Example #1
0
    def runModule(self, fqn, forceRun=False, quickRun=False):
        """Runs SmvModule by its Fully Qualified Name(fqn)

        Args:
            fqn (str): The FQN of a module
            forceRun (bool): True if the module should be forced to run even if it has persisted output. False otherwise.
            quickRun (bool): skip computing dqm+metadata and persisting csv

        Example:
            To get just the dataframe of the module:
                dataframe = smvApp.runModule('package.module.SmvModuleClass')[0]
            To get both the dataframe and the run info collector:
                dataframe, collector = smvApp.runModule('package.module.SmvModuleClass')

        Returns:
            (DataFrame, SmvRunInfoCollector) tuple
            - DataFrame is the computed result of the module
            - SmvRunInfoCollector contains additional information
              about the run, such as validation results.
        """
        ds = self.dsm.load(fqn)[0]

        if (quickRun):
            return self._to_single_run_res(
                SmvModuleRunner([ds], self).quick_run(forceRun))
        else:
            return self._to_single_run_res(
                SmvModuleRunner([ds], self).run(forceRun))
Example #2
0
    def getRunInfo(self, fqn):
        """Returns the run information of a module and all its dependencies
        from the last run.

        Unlike the runModule() method, which returns the run
        information just for that run, this method returns the run
        information from the last run.

        If no module was run (e.g. the code did not change, so the
        data is read from persistent storage), the SmRunInfoCollector
        returned from the runModule() method would be empty.  But the
        SmvRunInfoCollector returned from this method would contain
        all latest run information about all dependent modules.

        Args:
            fqn (str): fqn of target module
            runConfig (dict): runConfig to apply when collecting info. If module
                              was run with a config, the same config needs to be
                              specified here to retrieve the info.

        Returns:
            SmvRunInfoCollector

        """
        ds = self.dsm.load(fqn)[0]
        return SmvModuleRunner([ds], self).get_runinfo()
Example #3
0
    def test_basic_metadata_creation(self):
        fqn = "stage.modules.M2"
        m = self.load(fqn)[0]

        SmvModuleRunner([m], self.smvApp).run()

        result = m.module_meta._metadata['_dqmValidation']
        rule_cnt = result['dqmStateSnapshot']['ruleErrors']['b_lt_04']['total']

        self.assertEqual(m.module_meta._metadata['_fqn'], fqn)
        self.assertEqual(rule_cnt, 1)
Example #4
0
    def test_publish(self):
        fqn = "stage.modules.M3"
        pub_dir = self.smvApp.all_data_dirs().publishDir

        m = self.load(fqn)[0]
        SmvModuleRunner([m], self.smvApp).publish(pub_dir)

        csv_path = '{}/{}.csv'.format(pub_dir, m.fqn())
        meta_path = '{}/{}.meta'.format(pub_dir, m.fqn())
        hist_path = '{}/{}.hist'.format(pub_dir, m.fqn())

        self.assertTrue(os.path.exists(csv_path))
        self.assertTrue(os.path.exists(meta_path))
        self.assertTrue(os.path.exists(hist_path))
Example #5
0
    def test_purge_persisted(self):
        fqn1 = "stage.modules.M2"
        fqn2 = "stage.modules.M3"

        (m1, m2) = self.load(fqn1, fqn2)

        self.df(fqn2)

        # Should be persisted
        self.assertTrue(os.path.exists(m1.persistStrategy()._file_path))

        # Should be removed
        SmvModuleRunner([m2], self.smvApp).purge_persisted()
        self.assertFalse(os.path.exists(m1.persistStrategy()._file_path))
Example #6
0
 def _generate_output_modules(self, mods):
     SmvModuleRunner(mods, self).run()
Example #7
0
 def _publish_modules_locally(self, mods):
     local_dir = self.cmd_line.exportCsv
     SmvModuleRunner(mods, self).publish_local(local_dir)
Example #8
0
 def _publish_modules_through_jdbc(self, mods):
     SmvModuleRunner(mods, self).publish_to_jdbc()
Example #9
0
 def _publish_modules_to_hive(self, mods):
     SmvModuleRunner(mods, self).publish_to_hive()
Example #10
0
 def _publish_modules(self, mods):
     SmvModuleRunner(mods, self).publish()
Example #11
0
 def _purge_current_output_files(self, mods):
     SmvModuleRunner(mods, self).purge_persisted()
Example #12
0
 def publishModuleToHiveByName(self, name):
     """Publish an SmvModule to Hive by its name (can be partial FQN)
     """
     fqn = self.dsm.inferFqn(name)
     ds = self.load_single_ds(fqn)
     return SmvModuleRunner([ds], self).publish_to_hive()
Example #13
0
 def quickRunModule(self, fqn):
     ds = self.dsm.load(fqn)[0]
     return SmvModuleRunner([ds], self).quick_run()[0]
Example #14
0
 def test_publish_to_hive2(self):
     m = self.load("stage.modules.M")[0]
     df = self.df("stage.modules.M")
     SmvModuleRunner([m], self.smvApp).publish_to_hive()
     read_back = self.smvApp.sqlContext.sql("select * from " + "M")
     self.should_be_same(df, read_back)