Esempio n. 1
0
def exists(config):
    """
    Check whether the .wily/ directory exists.

    :param config: The configuration
    :type  config: :class:`wily.config.WilyConfig`

    :return: Whether the .wily directory exists
    :rtype: ``boolean``
    """
    exists = (
        pathlib.Path(config.cache_path).exists()
        and pathlib.Path(config.cache_path).is_dir()
    )
    if not exists:
        return False
    index_path = pathlib.Path(config.cache_path) / "index.json"
    if index_path.exists():
        with open(index_path, "r") as out:
            index = json.load(out)
        if index["version"] != __version__:
            # TODO: Inspect the versions properly.
            logger.warning(
                "Wily cache is old, you may incur errors until you rebuild the cache."
            )
    else:
        logger.warning(
            "Wily cache was not versioned, you may incur errors until you rebuild the cache."
        )
        create_index(config)
    return True
Esempio n. 2
0
    def run(self, module, options):
        """
        Run the operator.

        :param module: The target module path.
        :type  module: ``str``

        :param options: Any runtime options.
        :type  options: ``dict``

        :return: The operator results.
        :rtype: ``dict``
        """
        logger.debug("Running halstead harvester")
        results = {}
        for filename, details in dict(self.harvester.results).items():
            results[filename] = {"detailed": {}, "total": {}}
            for instance in details:
                if isinstance(instance, list):
                    for item in instance:
                        function, report = item
                        results[filename]["detailed"][
                            function] = self._report_to_dict(report)
                else:
                    if isinstance(instance, str) and instance == "error":
                        logger.warning(
                            f"Failed to run Halstead harvester on {filename} : {details['error']}"
                        )
                        continue
                    results[filename]["total"] = self._report_to_dict(instance)
        return results
Esempio n. 3
0
def get_default_metrics(config):
    """
    Get the default metrics for a configuration.

    :param config: The configuration
    :type  config: :class:`wily.config.WilyConfig`

    :return: Return the list of default metrics in this index
    :rtype: ``list`` of ``str``
    """
    archivers = list_archivers(config)
    default_metrics = []

    for archiver in archivers:
        index = get_archiver_index(config, archiver)

        if len(index) == 0:
            logger.warning(
                _("No records found in the index, no metrics available"))
            return []

        operators = index[0]["operators"]
        for operator in operators:
            o = resolve_operator(operator)
            if o.cls.default_metric_index is not None:
                metric = o.cls.metrics[o.cls.default_metric_index]
                default_metrics.append("{0}.{1}".format(
                    o.cls.name, metric.name))
    return default_metrics
Esempio n. 4
0
    def run(self, module, options):
        """
        Run the operator.

        :param module: The target module path.
        :type  module: ``str``

        :param options: Any runtime options.
        :type  options: ``dict``

        :return: The operator results.
        :rtype: ``dict``
        """
        logger.debug("Running CC harvester")
        results = {}
        for filename, details in dict(self.harvester.results).items():
            results[filename] = {}
            total = 0  # running CC total
            for instance in details:
                if isinstance(instance, Class):
                    i = self._dict_from_class(instance)
                elif isinstance(instance, Function):
                    i = self._dict_from_function(instance)
                else:
                    if isinstance(instance, str) and instance == "error":
                        logger.warning(
                            f"Failed to run CC harvester on {filename} : {details['error']}"
                        )
                        continue
                    else:
                        logger.warning(
                            f"Unexpected result from Radon : {instance} of {type(instance)}. Please report on Github."
                        )
                        continue
                results[filename][i["fullname"]] = i
                del i["fullname"]
                total += i["complexity"]
            results[filename]["complexity"] = total
        return results
Esempio n. 5
0
def build(config, archiver, operators):
    """
    Build the history given a archiver and collection of operators.

    :param config: The wily configuration
    :type  config: :namedtuple:`wily.config.WilyConfig`

    :param archiver: The archiver to use
    :type  archiver: :namedtuple:`wily.archivers.Archiver`

    :param operators: The list of operators to execute
    :type operators: `list` of :namedtuple:`wily.operators.Operator`
    """
    try:
        logger.debug(f"Using {archiver.name} archiver module")
        archiver = archiver.cls(config)
        revisions = archiver.revisions(config.path, config.max_revisions)
    except InvalidGitRepositoryError:
        # TODO: This logic shouldn't really be here (SoC)
        logger.info(f"Defaulting back to the filesystem archiver, not a valid git repo")
        archiver = FilesystemArchiver(config)
        revisions = archiver.revisions(config.path, config.max_revisions)
    except Exception as e:
        if hasattr(e, "message"):
            logger.error(f"Failed to setup archiver: '{e.message}'")
        else:
            logger.error(f"Failed to setup archiver: '{type(e)} - {e}'")
        exit(1)

    state = State(config, archiver=archiver)
    # Check for existence of cache, else provision
    state.ensure_exists()

    index = state.index[archiver.name]

    # remove existing revisions from the list
    revisions = [revision for revision in revisions if revision not in index][::-1]

    logger.info(
        f"Found {len(revisions)} revisions from '{archiver.name}' archiver in '{config.path}'."
    )

    _op_desc = ",".join([operator.name for operator in operators])
    logger.info(f"Running operators - {_op_desc}")

    bar = Bar("Processing", max=len(revisions) * len(operators))
    state.operators = operators

    # Index all files the first time, only scan changes afterward
    seed = True
    try:
        with multiprocessing.Pool(processes=len(operators)) as pool:
            for revision in revisions:
                # Checkout target revision
                archiver.checkout(revision, config.checkout_options)
                stats = {"operator_data": {}}

                # TODO : Check that changed files are children of the targets
                targets = [
                    str(pathlib.Path(config.path) / pathlib.Path(file))
                    for file in revision.added_files + revision.modified_files
                    # if any([True for target in config.targets if
                    #         target in pathlib.Path(pathlib.Path(config.path) / pathlib.Path(file)).parents])
                ]

                # Run each operator as a separate process
                data = pool.starmap(
                    run_operator,
                    [(operator, revision, config, targets) for operator in operators],
                )
                # data is a list of tuples, where for each operator, it is a tuple of length 2,
                operator_data_len = 2
                # second element in the tuple, i.e data[i][1]) has the collected data
                for i in range(0, len(operators)):
                    if (
                        i < len(data)
                        and len(data[i]) >= operator_data_len
                        and len(data[i][1]) == 0
                    ):
                        logger.warning(
                            f"In revision {revision.key}, for operator {operators[i].name}: No data collected"
                        )

                # Map the data back into a dictionary
                for operator_name, result in data:
                    # find all unique directories in the results
                    indices = set(result.keys())

                    # For a seed run, there is no previous change set, so use current
                    if seed:
                        prev_indices = indices

                    # Copy the ir from any unchanged files from the prev revision
                    if not seed:
                        missing_indices = set(revision.tracked_files) - indices
                        # TODO: Check existence of file path.
                        for missing in missing_indices:
                            # Don't copy aggregate keys as their values may have changed
                            if missing in revision.tracked_dirs:
                                continue
                            # previous index may not have that operator
                            if operator_name not in prev_stats["operator_data"]:
                                continue
                            # previous index may not have file either
                            if (
                                missing
                                not in prev_stats["operator_data"][operator_name]
                            ):
                                continue
                            result[missing] = prev_stats["operator_data"][
                                operator_name
                            ][missing]
                        for deleted in revision.deleted_files:
                            result.pop(deleted, None)

                    # Aggregate metrics across all root paths using the aggregate function in the metric
                    # Note assumption is that nested dirs are listed after parent..
                    for root in revision.tracked_dirs:
                        # find all matching entries recursively
                        aggregates = [
                            path for path in result.keys() if path.startswith(root)
                        ]

                        result[str(root)] = {"total": {}}
                        # aggregate values
                        for metric in resolve_operator(operator_name).cls.metrics:
                            func = metric.aggregate
                            values = [
                                result[aggregate]["total"][metric.name]
                                for aggregate in aggregates
                                if aggregate in result
                                and metric.name in result[aggregate]["total"]
                            ]
                            if len(values) > 0:
                                result[str(root)]["total"][metric.name] = func(values)

                    prev_indices = set(result.keys())
                    stats["operator_data"][operator_name] = result
                    bar.next()

                prev_stats = stats
                seed = False
                ir = index.add(revision, operators=operators)
                ir.store(config, archiver, stats)
        index.save()
        bar.finish()
    except Exception as e:
        logger.error(f"Failed to build cache: {type(e)}: '{e}'")
        raise e
    finally:
        # Reset the archive after every run back to the head of the branch
        archiver.finish()