Exemple #1
0
def get_default_metrics(config):
    """
    Get the default metrics for a configuration.

    :param config: The configuration
    :type  config: :class:`wily.config.WilyConfig`

    :return: Return the list of default metrics in this index
    :rtype: ``list`` of ``str``
    """
    archivers = list_archivers(config)
    default_metrics = []

    for archiver in archivers:
        index = get_archiver_index(config, archiver)

        if len(index) == 0:
            logger.warning("No records found in the index, no metrics available")
            return []

        operators = index[0]["operators"]
        for operator in operators:
            o = resolve_operator(operator)
            if o.cls.default_metric_index is not None:
                metric = o.cls.metrics[o.cls.default_metric_index]
                default_metrics.append("{0}.{1}".format(o.cls.name, metric.name))
    return default_metrics
Exemple #2
0
def diff(config, files, metrics, changes_only=True, detail=True):
    """
    Show the differences in metrics for each of the files.

    :param config: The wily configuration
    :type  config: :namedtuple:`wily.config.WilyConfig`

    :param files: The files to compare.
    :type  files: ``list`` of ``str``

    :param metrics: The metrics to measure.
    :type  metrics: ``list`` of ``str``

    :param changes_only: Only include changes files in output.
    :type  changes_only: ``bool``

    :param detail: Show details (function-level)
    :type  detail: ``bool``
    """
    config.targets = files
    files = list(files)
    state = State(config)
    last_revision = state.index[state.default_archiver].revisions[0]

    # Convert the list of metrics to a list of metric instances
    operators = {resolve_operator(metric.split(".")[0]) for metric in metrics}
    metrics = [(metric.split(".")[0], resolve_metric(metric)) for metric in metrics]
    data = {}
    results = []

    # Build a set of operators
    _operators = [operator.cls(config) for operator in operators]

    cwd = os.getcwd()
    os.chdir(config.path)
    for operator in _operators:
        logger.debug(f"Running {operator.name} operator")
        data[operator.name] = operator.run(None, config)
    os.chdir(cwd)

    # Write a summary table..
    extra = []
    for operator, metric in metrics:
        if detail and resolve_operator(operator).level == OperatorLevel.Object:
            for file in files:
                try:
                    extra.extend(
                        [
                            f"{file}:{k}"
                            for k in data[operator][file]["detailed"].keys()
                            if k != metric.name
                            and isinstance(data[operator][file]["detailed"][k], dict)
                        ]
                    )
                except KeyError:
                    logger.debug(f"File {file} not in cache")
                    logger.debug("Cache follows -- ")
                    logger.debug(data[operator])
    files.extend(extra)
    logger.debug(files)
    for file in files:
        metrics_data = []
        has_changes = False
        for operator, metric in metrics:
            try:
                current = last_revision.get(
                    config, state.default_archiver, operator, file, metric.name
                )
            except KeyError as e:
                current = "-"
            try:
                new = get_metric(data, operator, file, metric.name)
            except KeyError as e:
                new = "-"
            if new != current:
                has_changes = True
            if metric.type in (int, float) and new != "-" and current != "-":
                if current > new:
                    metrics_data.append(
                        "{0:n} -> \u001b[{2}m{1:n}\u001b[0m".format(
                            current, new, BAD_COLORS[metric.measure]
                        )
                    )
                elif current < new:
                    metrics_data.append(
                        "{0:n} -> \u001b[{2}m{1:n}\u001b[0m".format(
                            current, new, GOOD_COLORS[metric.measure]
                        )
                    )
                else:
                    metrics_data.append("{0:n} -> {1:n}".format(current, new))
            else:
                if current == "-" and new == "-":
                    metrics_data.append("-")
                else:
                    metrics_data.append("{0} -> {1}".format(current, new))
        if has_changes or not changes_only:
            results.append((file, *metrics_data))
        else:
            logger.debug(metrics_data)

    descriptions = [metric.description for operator, metric in metrics]
    headers = ("File", *descriptions)
    if len(results) > 0:
        print(
            # But it still makes more sense to show the newest at the top, so reverse again
            tabulate.tabulate(
                headers=headers, tabular_data=results, tablefmt=DEFAULT_GRID_STYLE
            )
        )
Exemple #3
0
def diff(config,
         files,
         metrics,
         changes_only=True,
         detail=True,
         revision=None):
    """
    Show the differences in metrics for each of the files.

    :param config: The wily configuration
    :type  config: :namedtuple:`wily.config.WilyConfig`

    :param files: The files to compare.
    :type  files: ``list`` of ``str``

    :param metrics: The metrics to measure.
    :type  metrics: ``list`` of ``str``

    :param changes_only: Only include changes files in output.
    :type  changes_only: ``bool``

    :param detail: Show details (function-level)
    :type  detail: ``bool``

    :param revision: Compare with specific revision
    :type  revision: ``str``
    """
    config.targets = files
    files = list(files)
    state = State(config)

    # Resolve target paths when the cli has specified --path
    if config.path != DEFAULT_PATH:
        targets = [str(Path(config.path) / Path(file)) for file in files]
    else:
        targets = files

    # Expand directories to paths
    files = [
        os.path.relpath(fn, config.path)
        for fn in radon.cli.harvest.iter_filenames(targets)
    ]
    logger.debug(f"Targeting - {files}")

    if not revision:
        target_revision = state.index[state.default_archiver].last_revision
    else:
        rev = resolve_archiver(
            state.default_archiver).cls(config).find(revision)
        logger.debug(f"Resolved {revision} to {rev.key} ({rev.message})")
        try:
            target_revision = state.index[state.default_archiver][rev.key]
        except KeyError:
            logger.error(
                f"Revision {revision} is not in the cache, make sure you have run wily build."
            )
            exit(1)

    logger.info(
        f"Comparing current with {format_revision(target_revision.revision.key)} by {target_revision.revision.author_name} on {format_date(target_revision.revision.date)}."
    )

    # Convert the list of metrics to a list of metric instances
    operators = {resolve_operator(metric.split(".")[0]) for metric in metrics}
    metrics = [(metric.split(".")[0], resolve_metric(metric))
               for metric in metrics]
    results = []

    # Build a set of operators
    with multiprocessing.Pool(processes=len(operators)) as pool:
        operator_exec_out = pool.starmap(run_operator,
                                         [(operator, None, config, targets)
                                          for operator in operators])
    data = {}
    for operator_name, result in operator_exec_out:
        data[operator_name] = result

    # Write a summary table
    extra = []
    for operator, metric in metrics:
        if detail and resolve_operator(operator).level == OperatorLevel.Object:
            for file in files:
                try:
                    extra.extend([
                        f"{file}:{k}"
                        for k in data[operator][file]["detailed"].keys()
                        if k != metric.name and isinstance(
                            data[operator][file]["detailed"][k], dict)
                    ])
                except KeyError:
                    logger.debug(f"File {file} not in cache")
                    logger.debug("Cache follows -- ")
                    logger.debug(data[operator])
    files.extend(extra)
    logger.debug(files)
    for file in files:
        metrics_data = []
        has_changes = False
        for operator, metric in metrics:
            try:
                current = target_revision.get(config, state.default_archiver,
                                              operator, file, metric.name)
            except KeyError:
                current = "-"
            try:
                new = get_metric(data, operator, file, metric.name)
            except KeyError:
                new = "-"
            if new != current:
                has_changes = True
            if metric.type in (int, float) and new != "-" and current != "-":
                if current > new:
                    metrics_data.append(
                        "{0:n} -> \u001b[{2}m{1:n}\u001b[0m".format(
                            current, new, BAD_COLORS[metric.measure]))
                elif current < new:
                    metrics_data.append(
                        "{0:n} -> \u001b[{2}m{1:n}\u001b[0m".format(
                            current, new, GOOD_COLORS[metric.measure]))
                else:
                    metrics_data.append("{0:n} -> {1:n}".format(current, new))
            else:
                if current == "-" and new == "-":
                    metrics_data.append("-")
                else:
                    metrics_data.append("{0} -> {1}".format(current, new))
        if has_changes or not changes_only:
            results.append((file, *metrics_data))
        else:
            logger.debug(metrics_data)

    descriptions = [metric.description for operator, metric in metrics]
    headers = ("File", *descriptions)
    if len(results) > 0:
        print(
            # But it still makes more sense to show the newest at the top, so reverse again
            tabulate.tabulate(headers=headers,
                              tabular_data=results,
                              tablefmt=DEFAULT_GRID_STYLE))
Exemple #4
0
def build(config, archiver, operators):
    """
    Build the history given a archiver and collection of operators.

    :param config: The wily configuration
    :type  config: :namedtuple:`wily.config.WilyConfig`

    :param archiver: The archiver to use
    :type  archiver: :namedtuple:`wily.archivers.Archiver`

    :param operators: The list of operators to execute
    :type operators: `list` of :namedtuple:`wily.operators.Operator`
    """
    try:
        logger.debug(f"Using {archiver.name} archiver module")
        archiver = archiver.cls(config)
        revisions = archiver.revisions(config.path, config.max_revisions)
    except InvalidGitRepositoryError:
        # TODO: This logic shouldn't really be here (SoC)
        logger.info(f"Defaulting back to the filesystem archiver, not a valid git repo")
        archiver = FilesystemArchiver(config)
        revisions = archiver.revisions(config.path, config.max_revisions)
    except Exception as e:
        if hasattr(e, "message"):
            logger.error(f"Failed to setup archiver: '{e.message}'")
        else:
            logger.error(f"Failed to setup archiver: '{type(e)} - {e}'")
        exit(1)

    state = State(config, archiver=archiver)
    # Check for existence of cache, else provision
    state.ensure_exists()

    index = state.index[archiver.name]

    # remove existing revisions from the list
    revisions = [revision for revision in revisions if revision not in index][::-1]

    logger.info(
        f"Found {len(revisions)} revisions from '{archiver.name}' archiver in '{config.path}'."
    )

    _op_desc = ",".join([operator.name for operator in operators])
    logger.info(f"Running operators - {_op_desc}")

    bar = Bar("Processing", max=len(revisions) * len(operators))
    state.operators = operators

    # Index all files the first time, only scan changes afterward
    seed = True
    prev_roots = None
    try:
        with multiprocessing.Pool(processes=len(operators)) as pool:
            for revision in revisions:
                # Checkout target revision
                archiver.checkout(revision, config.checkout_options)
                stats = {"operator_data": {}}

                if seed:
                    targets = config.targets
                else:  # Only target changed files
                    # TODO : Check that changed files are children of the targets
                    targets = [
                        str(pathlib.Path(config.path) / pathlib.Path(file))
                        for file in revision.files
                        # if any([True for target in config.targets if
                        #         target in pathlib.Path(pathlib.Path(config.path) / pathlib.Path(file)).parents])
                    ]

                # Run each operator as a separate process
                data = pool.starmap(
                    run_operator,
                    [(operator, revision, config, targets) for operator in operators],
                )
                # data is a list of tuples, where for each operator, it is a tuple of length 2,
                operator_data_len = 2
                # second element in the tuple, i.e data[i][1]) has the collected data
                for i in range(0, len(operators)):
                    if i < len(data) and len(data[i]) >= operator_data_len and len(data[i][1]) == 0:
                        logger.warn(f"In revision {revision.key}, for operator {operators[i].name}: No data collected")

                # Map the data back into a dictionary
                for operator_name, result in data:
                    # find all unique directories in the results
                    roots = {pathlib.Path(entry).parents[0] for entry in result.keys()}
                    indices = set(result.keys())

                    # For a seed run, there is no previous change set, so use current
                    if seed:
                        prev_roots = roots
                        prev_indices = indices
                    roots = prev_roots | roots

                    # Copy the ir from any unchanged files from the prev revision
                    if not seed:
                        missing_indices = prev_indices - indices
                        # TODO: Check existence of file path.
                        for missing in missing_indices:
                            # Don't copy aggregate keys as their values may have changed
                            if missing in roots:
                                continue
                            # previous index may not have that operator
                            if operator_name not in prev_stats["operator_data"]:
                                continue
                            # previous index may not have file either
                            if (
                                missing
                                not in prev_stats["operator_data"][operator_name]
                            ):
                                continue
                            result[missing] = prev_stats["operator_data"][
                                operator_name
                            ][missing]

                    # Aggregate metrics across all root paths using the aggregate function in the metric
                    for root in roots:
                        # find all matching entries recursively
                        aggregates = [
                            path
                            for path in result.keys()
                            if root in pathlib.Path(path).parents
                        ]
                        result[str(root)] = {"total": {}}
                        # aggregate values
                        for metric in resolve_operator(operator_name).cls.metrics:
                            func = metric.aggregate
                            values = [
                                result[aggregate]["total"][metric.name]
                                for aggregate in aggregates
                                if aggregate in result
                                and metric.name in result[aggregate]["total"]
                            ]
                            if len(values) > 0:
                                result[str(root)]["total"][metric.name] = func(values)

                    prev_indices = set(result.keys())
                    prev_roots = roots
                    stats["operator_data"][operator_name] = result
                    bar.next()

                prev_stats = stats
                seed = False
                ir = index.add(revision, operators=operators)
                ir.store(config, archiver, stats)
        index.save()
        bar.finish()
    except Exception as e:
        logger.error(f"Failed to build cache: {type(e)}: '{e}'")
        raise e
    finally:
        # Reset the archive after every run back to the head of the branch
        archiver.finish()
Exemple #5
0
def build(config, archiver, operators):
    """
    Build the history given a archiver and collection of operators.

    :param config: The wily configuration
    :type  config: :namedtuple:`wily.config.WilyConfig`

    :param archiver: The archiver to use
    :type  archiver: :namedtuple:`wily.archivers.Archiver`

    :param operators: The list of operators to execute
    :type operators: `list` of :namedtuple:`wily.operators.Operator`
    """
    try:
        logger.debug(f"Using {archiver.name} archiver module")
        archiver = archiver.cls(config)
        revisions = archiver.revisions(config.path, config.max_revisions)
    except InvalidGitRepositoryError:
        # TODO: This logic shouldn't really be here (SoC)
        logger.info(
            f"Defaulting back to the filesystem archiver, not a valid git repo"
        )
        archiver = FilesystemArchiver(config)
        revisions = archiver.revisions(config.path, config.max_revisions)
    except Exception as e:
        if hasattr(e, "message"):
            logger.error(f"Failed to setup archiver: '{e.message}'")
        else:
            logger.error(f"Failed to setup archiver: '{type(e)} - {e}'")
        exit(1)

    state = State(config, archiver=archiver)
    # Check for existence of cache, else provision
    state.ensure_exists()

    index = state.index[archiver.name]

    # remove existing revisions from the list
    revisions = [revision for revision in revisions if revision not in index]

    logger.info(
        f"Found {len(revisions)} revisions from '{archiver.name}' archiver in '{config.path}'."
    )

    _op_desc = ",".join([operator.name for operator in operators])
    logger.info(f"Running operators - {_op_desc}")

    bar = Bar("Processing", max=len(revisions) * len(operators))
    state.operators = operators
    try:
        with multiprocessing.Pool(processes=len(operators)) as pool:
            for revision in revisions:
                # Checkout target revision
                archiver.checkout(revision, config.checkout_options)
                stats = {"operator_data": {}}

                # Run each operator as a seperate process
                data = pool.starmap(
                    run_operator,
                    [(operator, revision, config) for operator in operators],
                )

                # Map the data back into a dictionary
                for operator_name, result in data:
                    # aggregate values to directories
                    roots = []

                    # find all unique directories in the results
                    for entry in result.keys():
                        parent = pathlib.Path(entry).parents[0]
                        if parent not in roots:
                            roots.append(parent)

                    for root in roots:
                        # find all matching entries recursively
                        aggregates = [
                            path for path in result.keys()
                            if root in pathlib.Path(path).parents
                        ]
                        result[str(root)] = {"total": {}}
                        # aggregate values
                        for metric in resolve_operator(
                                operator_name).cls.metrics:
                            func = metric.aggregate
                            values = [
                                result[aggregate]["total"][metric.name]
                                for aggregate in aggregates
                                if aggregate in result
                                and metric.name in result[aggregate]["total"]
                            ]
                            if len(values) > 0:
                                result[str(root)]["total"][metric.name] = func(
                                    values)

                    stats["operator_data"][operator_name] = result
                    bar.next()

                ir = index.add(revision, operators=operators)
                ir.store(config, archiver, stats)
        index.save()
        bar.finish()
    except Exception as e:
        logger.error(f"Failed to build cache: '{e}'")
        raise e
    finally:
        # Reset the archive after every run back to the head of the branch
        archiver.finish()