예제 #1
0
파일: dqm.py 프로젝트: hubertp/SMV
def FailAny():
    """Any rule fail or fix with FailAny will cause the entire DF to fail

        Returns:
            (DQMTaskPolicy): policy for DQM Task
    """
    return SmvApp.getInstance()._jvm.DqmTaskPolicies.failAny()
예제 #2
0
파일: dqm.py 프로젝트: hubertp/SMV
def FailNone():
    """Tasks with FailNone will not trigger any DF level policy

        Returns:
            (DQMTaskPolicy): policy for DQM Task
    """
    return SmvApp.getInstance()._jvm.DqmTaskPolicies.failNone()
예제 #3
0
파일: dqm.py 프로젝트: hubertp/SMV
def FailTotalFixPercentPolicy(threshold):
    """For all the fixes in a DQM, if the total number of times they are triggered is >= threshold * total Records, fail the DF

        Args:
            threshold (double): the threshold after which the DF fails. value is between 0.0 and 1.0

        Returns:
            (DQMPolicy): policy for DQM
    """
    return SmvApp.getInstance()._jvm.FailTotalFixPercentPolicy(threshold * 1.0)
예제 #4
0
파일: dqm.py 프로젝트: hubertp/SMV
def FailParserCountPolicy(threshold):
    """If the total time of parser fails >= threshold, fail the DF

        Args:
            threshold (int): the threshold after which the DF fails

        Returns:
            (DQMPolicy): policy for DQM
    """
    return SmvApp.getInstance()._jvm.FailParserCountPolicy(threshold)
예제 #5
0
파일: dqm.py 프로젝트: hubertp/SMV
def FailTotalFixCountPolicy(threshold):
    """For all the fixes in a DQM, if the total number of times they are triggered is >= threshold, fail the DF

        Args:
            threshold (int): the threshold after which the DF fails

        Returns:
            (DQMPolicy): policy for DQM
    """
    return SmvApp.getInstance()._jvm.FailTotalFixCountPolicy(threshold)
예제 #6
0
파일: dqm.py 프로젝트: hubertp/SMV
def FailCount(threshold):
    """Tasks with FailCount(n) will fail the DF if the task is triggered >= n times

        Args:
            threshold (int): the threshold after which the DF fails

        Returns:
            (DQMTaskPolicy): policy for DQM Task
    """
    return SmvApp.getInstance()._jvm.FailCount(threshold)
예제 #7
0
파일: dqm.py 프로젝트: hubertp/SMV
def FailPercent(threshold):
    """Tasks with FailPercent(r) will fail the DF if the task is triggered >= r percent of the
        total number of records in the DF

        Args:
            threshold (double): the threshold after which the DF fails. value is between 0.0 and 1.0

        Returns:
            (DQMTaskPolicy): policy for DQM Task
    """
    return SmvApp.getInstance()._jvm.FailPercent(threshold * 1.0)
예제 #8
0
파일: dqm.py 프로젝트: hubertp/SMV
def DQMRule(rule, name=None, taskPolicy=None):
    """DQMRule defines a requirement on the records of a DF

        Example:
            # Require the sum of "a" and "b" columns less than 100
            DQMRule(col('a') + col('b') < 100.0, 'a_b_sum_lt100', FailPercent(0.01))

        Args:
            rule (Column): boolean condition that defines the requirement on the records of a DF
            name (string): optional parameter for naming the DQMRule. if not specified, defaults to the rule text
            taskPolicy (DQMTaskPolicy): optional parameter for the DQM policy. if not specified, defaults to FailNone()

        Returns:
            (DQMRule): a DQMRule object
    """
    task = taskPolicy or FailNone()
    return SmvApp.getInstance()._jvm.DQMRule(rule._jc, name, task)
예제 #9
0
파일: dqm.py 프로젝트: hubertp/SMV
def DQMFix(condition, fix, name=None, taskPolicy=None):
    """DQMFix will fix a column with a default value

        Example:
            # If "age" greater than 100, make it 100
            val f = DQMFix($"age" > 100, lit(100) as "age", "age_cap100", FailNone)
            DQMFix(col('age') > 100, lit(100).alias('age'), 'age_cap100', FailNone)

        Args:
            condition (Column): boolean condition that determines when the fix should occur on the records of a DF
            fix (Column): the fix to use when replacing a value that does not pass the condition
            name (String): optional parameter for naming the DQMFix. if not specified, defaults to the condition text
            taskPolicy (DQMTaskPolicy): optional parameter for the DQM policy. if not specified, defaults to FailNone()

        Returns:
            (DQMFix): a DQMFix object
    """
    task = taskPolicy or FailNone()
    return SmvApp.getInstance()._jvm.DQMFix(condition._jc, fix._jc, name, task)
예제 #10
0
파일: smvpydataset.py 프로젝트: hubertp/SMV
def SmvExtDataSet(refname):
    """Creates an SmvDataSet representing an external (Scala) SmvDataSet

        E.g. MyExtMod = SmvExtDataSet("the.scala.mod")

        Args:
            fqn (str): fqn of the Scala SmvDataSet

        Returns:
            (SmvExtDataSet): external dataset with given fqn
    """
    if refname in PyExtDataSetCache:
        return PyExtDataSetCache[refname]
    cls = type(
        "SmvExtDataSet", (SmvPyDataSet, ), {
            "refname": refname,
            "smvApp": SmvApp.getInstance(),
            "doRun": lambda self, validator, known: smvApp.runModule(self.urn)
        })
    cls.fqn = classmethod(lambda klass: refname)
    PyExtDataSetCache[refname] = cls
    return cls
예제 #11
0
파일: dqm.py 프로젝트: hubertp/SMV
def SmvDQM():
    """Factory method for Scala SmvDQM"""
    return SmvApp.getInstance()._jvm.SmvDQM.apply()
예제 #12
0
파일: runconfig.py 프로젝트: hubertp/SMV
 def _smvGetRunConfigHash(self):
     """return the app level hash of the all the current user config values"""
     return SmvApp.getInstance().j_smvPyClient.getRunConfigHash()
예제 #13
0
파일: runconfig.py 프로젝트: hubertp/SMV
 def smvGetRunConfig(self, key):
     """return the current user run configuration value for the given key."""
     return SmvApp.getInstance().j_smvPyClient.getRunConfig(key)
예제 #14
0
def CsvAttributes(delimiter=',', quotechar='"', hasHeader=False):
    return SmvApp.getInstance()._mkCsvAttr(delimiter, quotechar, hasHeader)