def matches_output(self, file_to_check): """ "Are you my mother" test. Returns True-like if the file matches one of the 'outputs' of this code. Should be tested against inputs of other code to find a parent dependency. Checks the input file against this code's ``path_relative`` (for a file dependency) and dataset outputs (for a dataset dependency). Args: file_to_check (str, path): File to check against this one's 'outputs', the relative path. Returns: (str, None): The path to the file or dataset that matches, otherwise None. Example: parent = CodeParser(path_relative="a.sas") child = CodeParser(path_relative="b.sas") for f in child.input_files | child.input_datasets: match = parent.matches_output(f) if not match: print(f"No relation for {f}") else: print(f"{parent.path_relative} is the {match} we are looking for") """ file_to_check = _clnpth(file_to_check) return ( (file_to_check == self.path_relative) or (self.path_absolute.endswith(file_to_check)) or (file_to_check in self.output_datasets) )
def analyze_code(self, code): """Analyze the text of the file. Args: code (str): Text of the file to be analyzed. Returns: bool: Returns True if the code had tags to parse. """ found_something = False ignore = self._ignore_comment_regex.search(code) if ignore and ignore.group(1).strip().lower() in ('true', 'yes', '1'): return False for _regex, _set in ( (self._input_file_comment_regex, self.input_files), (self._input_dataset_comment_regex, self.input_datasets), (self._output_dataset_comment_regex, self.output_datasets), ): for _pth in _regex.finditer(code): _set.add(_clnpth(_pth.group(1))) # We found something, this is a file worth 'keeping' found_something = True if found_something: self._parsed_file = self.path_relative return found_something
def set_path( self, path_relative=None, path_absolute=None, project_root=".", code_path_prefix=None, data_path_prefix=None ): """Set the file path of the analyzed object, and calculate its relative position to base_dir. Arguments: path_relative (str, path, None): Path of the code to be analyzed, relative to ``os.path.join(project_root, code_path_prefix)``. Defaults to None. path_absolute (str, path, None): Absolute path of the code to be analyzed, from which ``path_relative`` is calculated. Ignored if ``path_relative`` is provided. Defaults to None. code_path_prefix (str, path, None): Relative path to the code directory, starting from ``project_root``. Defaults to ``None``, meaning all code paths are relative to ``project_root``. data_path_prefix (str, path, None): Relative path to the data directory, starting from ``project_root``. Defaults to ``None``, meaning all data paths are relative to ``project_root``. project_root (str, path): Root of the project. If your project has multiple roots, I can't help you friend. """ # Reset dependencies when analyzing self._parsed_file = None self.input_files = set() self.input_datasets = set() self.output_datasets = set() if "~" in project_root: project_root = os.path.expanduser(project_root) self.project_root = _clnpth(os.path.abspath(project_root)) self.code_path_prefix = _clnpth(code_path_prefix) self.data_path_prefix = _clnpth(data_path_prefix) # If they provided path_absolute, not path_relative, derive path_relative from it, and recalculate path_absolute below if path_relative is None and path_absolute is not None: if os.path.isabs(path_absolute): path_absolute = os.path.relpath(path_absolute, _pthjoin(self.project_root, self.code_path_prefix)) path_relative = path_absolute if path_relative is not None: self.path_relative = _clnpth(path_relative) # Recalculate the path_absolute to the code file self.path_absolute = _pthjoin(self.project_root, self.code_path_prefix, self.path_relative)
def scan(self): """ Scan through the directory starting from ``self.project_root`` (or ``override_path`` if provided), calling analyze(file) for each file that matches ``*.extension``. The dir that is passed to parser.analyze is always based on what was passed in. If project_root is absolute, the parser will get absolute paths. If it is relative, it will get relatives paths. Each CodeParser object contains four important values: relative_path input_files input_datasets output_datasets """ self._scanned_code = [] start_dir = self.project_root if self.code_path_prefix is not None: start_dir = _pthjoin(start_dir, self.code_path_prefix) for _dir, _, _files in os.walk(start_dir): # Ignore if the ignore_folders are anywhere in the path if len(set(_dir.split(os.path.sep)) & self._ignore_folders): continue for _file in _files: path = _clnpth(_pthjoin(_dir, _file)) for parser in self.parser_list: if not parser.matches(path): continue # Create the parser object with this code path res = parser( path_absolute=path, project_root=self.project_root, code_path_prefix=self.code_path_prefix, data_path_prefix=self.data_path_prefix, ) if res.is_parsed: self._scanned_code.append(res) break # break out of parser_list, we found our match return self._scanned_code
def matches_input(self, file_to_check): """Asks "Is this one of your inputs", for testing if this is your 'child'. Checks the input file against this code's name (for a file dependency) and outputs (for a dataset dependency). Args: file_to_check (str, path): File to check against this one's 'outputs' Returns: (bool): Returns ``True`` if file_to_check is in the input files or datasets. Example: parent = CodeParser(path_relative="a.sas") child = CodeParser(path_relative="b.sas") for f in parent.output_datasets: match = child.matches_input(f) if not match: print(f"No relation for {f}") else: print(f"{child.path_relative} is the {match} we are looking for") """ return _clnpth(file_to_check) in self.input_files | self.input_datasets