Exemple #1
0
    def matches_output(self, file_to_check):
        """ "Are you my mother" test.
        Returns True-like if the file matches one of the 'outputs' of this code.
        Should be tested against inputs of other code to find a parent dependency.

        Checks the input file against this code's ``path_relative`` (for a file dependency) and dataset outputs
        (for a dataset dependency).

        Args:
            file_to_check (str, path): File to check against this one's 'outputs', the relative path.

        Returns:
            (str, None): The path to the file or dataset that matches, otherwise None.

        Example:
            parent = CodeParser(path_relative="a.sas")
            child = CodeParser(path_relative="b.sas")

            for f in child.input_files | child.input_datasets:
                match = parent.matches_output(f)
                if not match:
                    print(f"No relation for {f}")
                else:
                    print(f"{parent.path_relative} is the {match} we are looking for")
        """
        file_to_check = _clnpth(file_to_check)

        return (
            (file_to_check == self.path_relative)
            or (self.path_absolute.endswith(file_to_check))
            or (file_to_check in self.output_datasets)
        )
Exemple #2
0
    def analyze_code(self, code):
        """Analyze the text of the file.

        Args:
            code (str): Text of the file to be analyzed.

        Returns:
            bool: Returns True if the code had tags to parse.
        """
        found_something = False

        ignore = self._ignore_comment_regex.search(code)
        if ignore and ignore.group(1).strip().lower() in ('true', 'yes', '1'):
            return False

        for _regex, _set in (
            (self._input_file_comment_regex, self.input_files),
            (self._input_dataset_comment_regex, self.input_datasets),
            (self._output_dataset_comment_regex, self.output_datasets),
        ):

            for _pth in _regex.finditer(code):
                _set.add(_clnpth(_pth.group(1)))

                # We found something, this is a file worth 'keeping'
                found_something = True

        if found_something:
            self._parsed_file = self.path_relative

        return found_something
Exemple #3
0
    def set_path(
        self, path_relative=None, path_absolute=None, project_root=".", code_path_prefix=None, data_path_prefix=None
    ):
        """Set the file path of the analyzed object, and calculate its relative position to base_dir.

        Arguments:
            path_relative (str, path, None): Path of the code to be analyzed, relative to
                ``os.path.join(project_root, code_path_prefix)``. Defaults to None.
            path_absolute (str, path, None): Absolute path of the code to be analyzed, from which ``path_relative`` is
                calculated. Ignored if ``path_relative`` is provided. Defaults to None.
            code_path_prefix (str, path, None): Relative path to the code directory, starting from ``project_root``.
                Defaults to ``None``, meaning all code paths are relative to ``project_root``.
            data_path_prefix (str, path, None): Relative path to the data directory, starting from ``project_root``.
                Defaults to ``None``, meaning all data paths are relative to ``project_root``.
            project_root (str, path): Root of the project. If your project has multiple roots, I can't help you friend.
        """
        # Reset dependencies when analyzing
        self._parsed_file = None
        self.input_files = set()
        self.input_datasets = set()
        self.output_datasets = set()

        if "~" in project_root:
            project_root = os.path.expanduser(project_root)
        self.project_root = _clnpth(os.path.abspath(project_root))

        self.code_path_prefix = _clnpth(code_path_prefix)
        self.data_path_prefix = _clnpth(data_path_prefix)

        # If they provided path_absolute, not path_relative, derive path_relative from it, and recalculate path_absolute below
        if path_relative is None and path_absolute is not None:
            if os.path.isabs(path_absolute):
                path_absolute = os.path.relpath(path_absolute, _pthjoin(self.project_root, self.code_path_prefix))
            path_relative = path_absolute

        if path_relative is not None:
            self.path_relative = _clnpth(path_relative)
            # Recalculate the path_absolute to the code file
            self.path_absolute = _pthjoin(self.project_root, self.code_path_prefix, self.path_relative)
Exemple #4
0
    def scan(self):
        """
        Scan through the directory starting from ``self.project_root`` (or ``override_path`` if provided),
        calling analyze(file) for each file that matches ``*.extension``.

        The dir that is passed to parser.analyze is always based on what was passed in.
        If project_root is absolute, the parser will get absolute paths.
        If it is relative, it will get relatives paths.

        Each CodeParser object contains four important values:

            relative_path
            input_files
            input_datasets
            output_datasets
        """
        self._scanned_code = []

        start_dir = self.project_root
        if self.code_path_prefix is not None:
            start_dir = _pthjoin(start_dir, self.code_path_prefix)

        for _dir, _, _files in os.walk(start_dir):
            # Ignore if the ignore_folders are anywhere in the path
            if len(set(_dir.split(os.path.sep)) & self._ignore_folders):
                continue
            for _file in _files:
                path = _clnpth(_pthjoin(_dir, _file))
                for parser in self.parser_list:
                    if not parser.matches(path):
                        continue
                    # Create the parser object with this code path
                    res = parser(
                        path_absolute=path,
                        project_root=self.project_root,
                        code_path_prefix=self.code_path_prefix,
                        data_path_prefix=self.data_path_prefix,
                    )

                    if res.is_parsed:
                        self._scanned_code.append(res)

                    break  # break out of parser_list, we found our match

        return self._scanned_code
Exemple #5
0
    def matches_input(self, file_to_check):
        """Asks "Is this one of your inputs", for testing if this is your 'child'.

        Checks the input file against this code's name (for a file dependency) and outputs (for a dataset dependency).

        Args:
            file_to_check (str, path): File to check against this one's 'outputs'

        Returns:
            (bool): Returns ``True`` if file_to_check is in the input files or datasets.

        Example:
            parent = CodeParser(path_relative="a.sas")
            child = CodeParser(path_relative="b.sas")

            for f in parent.output_datasets:
                match = child.matches_input(f)
                if not match:
                    print(f"No relation for {f}")
                else:
                    print(f"{child.path_relative} is the {match} we are looking for")
        """
        return _clnpth(file_to_check) in self.input_files | self.input_datasets