Beispiel #1
0
def determine_modules_to_test(changed_modules):
    """
    Given a set of modules that have changed, compute the transitive closure of those modules'
    dependent modules in order to determine the set of modules that should be tested.

    Returns a topologically-sorted list of modules (ties are broken by sorting on module names).

    >>> [x.name for x in determine_modules_to_test([modules.root])]
    ['root']
    >>> [x.name for x in determine_modules_to_test([modules.build])]
    ['root']
    >>> [x.name for x in determine_modules_to_test([modules.graphx])]
    ['graphx', 'examples']
    >>> x = [x.name for x in determine_modules_to_test([modules.sql])]
    >>> x # doctest: +NORMALIZE_WHITESPACE
    ['sql', 'hive', 'mllib', 'examples', 'hive-thriftserver', 'hivecontext-compatibility',
     'pyspark-sql', 'sparkr', 'pyspark-mllib', 'pyspark-ml']
    """
    modules_to_test = set()
    for module in changed_modules:
        modules_to_test = modules_to_test.union(determine_modules_to_test(module.dependent_modules))
    modules_to_test = modules_to_test.union(set(changed_modules))
    # If we need to run all of the tests, then we should short-circuit and return 'root'
    if modules.root in modules_to_test:
        return [modules.root]
    return toposort_flatten(
        {m: set(m.dependencies).intersection(modules_to_test) for m in modules_to_test}, sort=True)
Beispiel #2
0
def determine_modules_to_test(changed_modules):
    """
    Given a set of modules that have changed, compute the transitive closure of those modules'
    dependent modules in order to determine the set of modules that should be tested.

    Returns a topologically-sorted list of modules (ties are broken by sorting on module names).

    >>> [x.name for x in determine_modules_to_test([modules.root])]
    ['root']
    >>> [x.name for x in determine_modules_to_test([modules.build])]
    ['root']
    >>> [x.name for x in determine_modules_to_test([modules.graphx])]
    ['graphx', 'examples']
    >>> x = [x.name for x in determine_modules_to_test([modules.sql])]
    >>> x # doctest: +NORMALIZE_WHITESPACE
    ['sql', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver',
     'pyspark-sql', 'sparkr', 'pyspark-mllib', 'pyspark-ml']
    """
    modules_to_test = set()
    for module in changed_modules:
        modules_to_test = modules_to_test.union(determine_modules_to_test(module.dependent_modules))
    modules_to_test = modules_to_test.union(set(changed_modules))
    # If we need to run all of the tests, then we should short-circuit and return 'root'
    if modules.root in modules_to_test:
        return [modules.root]
    return toposort_flatten(
        {m: set(m.dependencies).intersection(modules_to_test) for m in modules_to_test}, sort=True)
Beispiel #3
0
def determine_modules_to_test(changed_modules, deduplicated=True):
    """
    Given a set of modules that have changed, compute the transitive closure of those modules'
    dependent modules in order to determine the set of modules that should be tested.

    Returns a topologically-sorted list of modules (ties are broken by sorting on module names).
    If ``deduplicated`` is disabled, the modules are returned without tacking the deduplication
    by dependencies into account.

    >>> [x.name for x in determine_modules_to_test([modules.root])]
    ['root']
    >>> [x.name for x in determine_modules_to_test([modules.build])]
    ['root']
    >>> [x.name for x in determine_modules_to_test([modules.core])]
    ['root']
    >>> [x.name for x in determine_modules_to_test([modules.launcher])]
    ['root']
    >>> [x.name for x in determine_modules_to_test([modules.graphx])]
    ['graphx', 'examples']
    >>> [x.name for x in determine_modules_to_test([modules.sql])]
    ... # doctest: +NORMALIZE_WHITESPACE
    ['sql', 'avro', 'docker-integration-tests', 'hive', 'mllib', 'sql-kafka-0-10', 'examples',
     'hive-thriftserver', 'pyspark-sql', 'repl', 'sparkr',
     'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-slow', 'pyspark-ml']
    >>> sorted([x.name for x in determine_modules_to_test(
    ...     [modules.sparkr, modules.sql], deduplicated=False)])
    ... # doctest: +NORMALIZE_WHITESPACE
    ['avro', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver', 'mllib',
     'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-slow', 'pyspark-sql',
     'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
    >>> sorted([x.name for x in determine_modules_to_test(
    ...     [modules.sql, modules.core], deduplicated=False)])
    ... # doctest: +NORMALIZE_WHITESPACE
    ['avro', 'catalyst', 'core', 'docker-integration-tests', 'examples', 'graphx', 'hive',
     'hive-thriftserver', 'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib',
     'pyspark-pandas', 'pyspark-pandas-slow', 'pyspark-resource', 'pyspark-sql',
     'pyspark-streaming', 'repl', 'root', 'sparkr', 'sql', 'sql-kafka-0-10', 'streaming',
     'streaming-kafka-0-10', 'streaming-kinesis-asl']
    """
    modules_to_test = set()
    for module in changed_modules:
        modules_to_test = modules_to_test.union(
            determine_modules_to_test(module.dependent_modules, deduplicated))
    modules_to_test = modules_to_test.union(set(changed_modules))

    if not deduplicated:
        return modules_to_test

    # If we need to run all of the tests, then we should short-circuit and return 'root'
    if modules.root in modules_to_test:
        return [modules.root]
    return toposort_flatten(
        {
            m: set(m.dependencies).intersection(modules_to_test)
            for m in modules_to_test
        },
        sort=True)
Beispiel #4
0
def determine_modules_to_test(changed_modules):
    """
    Given a set of modules that have changed, compute the transitive closure of those modules'
    dependent modules in order to determine the set of modules that should be tested.

    Returns a topologically-sorted list of modules (ties are broken by sorting on module names).

    >>> [x.name for x in determine_modules_to_test([modules.root])]
    ['root']
    >>> [x.name for x in determine_modules_to_test([modules.build])]
    ['root']
    >>> [x.name for x in determine_modules_to_test([modules.graphx])]
    ['graphx', 'examples']
    >>> x = [x.name for x in determine_modules_to_test([modules.sql])]
    >>> x # doctest: +NORMALIZE_WHITESPACE
    ...   # doctest: +SKIP
    ['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver',
     'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-ml']
    """
    modules_to_test = set()
    for module in changed_modules:
        modules_to_test = modules_to_test.union(
            determine_modules_to_test(module.dependent_modules))
    modules_to_test = modules_to_test.union(set(changed_modules))
    # If we need to run all of the tests, then we should short-circuit and return 'root'
    if modules.root in modules_to_test:
        return [modules.root]
    changed_modules = toposort_flatten(
        {
            m: set(m.dependencies).intersection(modules_to_test)
            for m in modules_to_test
        },
        sort=True)

    # TODO: Skip hive-thriftserver module for hadoop-3.2. remove this once hadoop-3.2 support it
    if modules.hadoop_version == "hadoop3.2":
        changed_modules = [
            m for m in changed_modules if m.name != "hive-thriftserver"
        ]

    return changed_modules