def FailAny(): """Any rule fail or fix with FailAny will cause the entire DF to fail Returns: (DQMTaskPolicy): policy for DQM Task """ return SmvApp.getInstance()._jvm.DqmTaskPolicies.failAny()
def FailNone(): """Tasks with FailNone will not trigger any DF level policy Returns: (DQMTaskPolicy): policy for DQM Task """ return SmvApp.getInstance()._jvm.DqmTaskPolicies.failNone()
def FailTotalFixPercentPolicy(threshold): """For all the fixes in a DQM, if the total number of times they are triggered is >= threshold * total Records, fail the DF Args: threshold (double): the threshold after which the DF fails. value is between 0.0 and 1.0 Returns: (DQMPolicy): policy for DQM """ return SmvApp.getInstance()._jvm.FailTotalFixPercentPolicy(threshold * 1.0)
def FailParserCountPolicy(threshold): """If the total time of parser fails >= threshold, fail the DF Args: threshold (int): the threshold after which the DF fails Returns: (DQMPolicy): policy for DQM """ return SmvApp.getInstance()._jvm.FailParserCountPolicy(threshold)
def FailTotalFixCountPolicy(threshold): """For all the fixes in a DQM, if the total number of times they are triggered is >= threshold, fail the DF Args: threshold (int): the threshold after which the DF fails Returns: (DQMPolicy): policy for DQM """ return SmvApp.getInstance()._jvm.FailTotalFixCountPolicy(threshold)
def FailCount(threshold): """Tasks with FailCount(n) will fail the DF if the task is triggered >= n times Args: threshold (int): the threshold after which the DF fails Returns: (DQMTaskPolicy): policy for DQM Task """ return SmvApp.getInstance()._jvm.FailCount(threshold)
def FailPercent(threshold): """Tasks with FailPercent(r) will fail the DF if the task is triggered >= r percent of the total number of records in the DF Args: threshold (double): the threshold after which the DF fails. value is between 0.0 and 1.0 Returns: (DQMTaskPolicy): policy for DQM Task """ return SmvApp.getInstance()._jvm.FailPercent(threshold * 1.0)
def DQMRule(rule, name=None, taskPolicy=None): """DQMRule defines a requirement on the records of a DF Example: # Require the sum of "a" and "b" columns less than 100 DQMRule(col('a') + col('b') < 100.0, 'a_b_sum_lt100', FailPercent(0.01)) Args: rule (Column): boolean condition that defines the requirement on the records of a DF name (string): optional parameter for naming the DQMRule. if not specified, defaults to the rule text taskPolicy (DQMTaskPolicy): optional parameter for the DQM policy. if not specified, defaults to FailNone() Returns: (DQMRule): a DQMRule object """ task = taskPolicy or FailNone() return SmvApp.getInstance()._jvm.DQMRule(rule._jc, name, task)
def DQMFix(condition, fix, name=None, taskPolicy=None): """DQMFix will fix a column with a default value Example: # If "age" greater than 100, make it 100 val f = DQMFix($"age" > 100, lit(100) as "age", "age_cap100", FailNone) DQMFix(col('age') > 100, lit(100).alias('age'), 'age_cap100', FailNone) Args: condition (Column): boolean condition that determines when the fix should occur on the records of a DF fix (Column): the fix to use when replacing a value that does not pass the condition name (String): optional parameter for naming the DQMFix. if not specified, defaults to the condition text taskPolicy (DQMTaskPolicy): optional parameter for the DQM policy. if not specified, defaults to FailNone() Returns: (DQMFix): a DQMFix object """ task = taskPolicy or FailNone() return SmvApp.getInstance()._jvm.DQMFix(condition._jc, fix._jc, name, task)
def SmvExtDataSet(refname): """Creates an SmvDataSet representing an external (Scala) SmvDataSet E.g. MyExtMod = SmvExtDataSet("the.scala.mod") Args: fqn (str): fqn of the Scala SmvDataSet Returns: (SmvExtDataSet): external dataset with given fqn """ if refname in PyExtDataSetCache: return PyExtDataSetCache[refname] cls = type( "SmvExtDataSet", (SmvPyDataSet, ), { "refname": refname, "smvApp": SmvApp.getInstance(), "doRun": lambda self, validator, known: smvApp.runModule(self.urn) }) cls.fqn = classmethod(lambda klass: refname) PyExtDataSetCache[refname] = cls return cls
def SmvDQM(): """Factory method for Scala SmvDQM""" return SmvApp.getInstance()._jvm.SmvDQM.apply()
def _smvGetRunConfigHash(self): """return the app level hash of the all the current user config values""" return SmvApp.getInstance().j_smvPyClient.getRunConfigHash()
def smvGetRunConfig(self, key): """return the current user run configuration value for the given key.""" return SmvApp.getInstance().j_smvPyClient.getRunConfig(key)
def CsvAttributes(delimiter=',', quotechar='"', hasHeader=False): return SmvApp.getInstance()._mkCsvAttr(delimiter, quotechar, hasHeader)