Beispiel #1
0
 def _substring_occurrences(
     cls, in_str: str, substrings: Iterable[str]
 ) -> Dict[str, List[int]]:
     """Find every occurrence of the given substrings."""
     occurrences = {}
     for substring in substrings:
         occurrences[substring] = list(findall(substring, in_str))
     return occurrences
    def fix_string(self) -> Tuple[Any, bool]:
        """Obtain the changes to a path as a string.

        We use the source mapping features of TemplatedFile
        to generate a list of "patches" which cover the non
        templated parts of the file and refer back to the locations
        in the original file.

        NB: This is MUCH FASTER than the original approach
        using difflib in pre 0.4.0.

        There is an important distinction here between Slices and
        Segments. A Slice is a portion of a file which is determined
        by the templater based on which portions of the source file
        are templated or not, and therefore before Lexing and so is
        completely dialect agnostic. A Segment is determined by the
        Lexer from portions of strings after templating.
        """
        linter_logger.debug("Original Tree: %r",
                            self.templated_file.templated_str)
        assert self.tree
        linter_logger.debug("Fixed Tree: %r", self.tree.raw)

        # The sliced file is contiguous in the TEMPLATED space.
        # NB: It has gaps and repeats in the source space.
        # It's also not the FIXED file either.
        linter_logger.debug("### Templated File.")
        for idx, file_slice in enumerate(self.templated_file.sliced_file):
            t_str = self.templated_file.templated_str[
                file_slice.templated_slice]
            s_str = self.templated_file.source_str[file_slice.source_slice]
            if t_str == s_str:
                linter_logger.debug("    File slice: %s %r [invariant]", idx,
                                    file_slice)
            else:
                linter_logger.debug("    File slice: %s %r", idx, file_slice)
                linter_logger.debug("    \t\t\ttemplated: %r\tsource: %r",
                                    t_str, s_str)

        original_source = self.templated_file.source_str

        # Make sure no patches overlap and divide up the source file into slices.
        # Any Template tags in the source file are off limits.
        source_only_slices = self.templated_file.source_only_slices()

        linter_logger.debug("Source-only slices: %s", source_only_slices)

        # Iterate patches, filtering and translating as we go:
        linter_logger.debug("### Beginning Patch Iteration.")
        filtered_source_patches = []
        dedupe_buffer = []
        # We use enumerate so that we get an index for each patch. This is entirely
        # so when debugging logs we can find a given patch again!
        patch: Union[EnrichedFixPatch, FixPatch]
        for idx, patch in enumerate(
                self.tree.iter_patches(
                    templated_str=self.templated_file.templated_str)):
            linter_logger.debug("  %s Yielded patch: %s", idx, patch)
            self._log_hints(patch, self.templated_file)

            # Attempt to convert to source space.
            try:
                source_slice = self.templated_file.templated_slice_to_source_slice(
                    patch.templated_slice, )
            except ValueError:
                linter_logger.info(
                    "      - Skipping. Source space Value Error. i.e. attempted insertion within templated section."
                )
                # If we try and slice within a templated section, then we may fail
                # in which case, we should skip this patch.
                continue

            # Check for duplicates
            dedupe_tuple = (source_slice, patch.fixed_raw)
            if dedupe_tuple in dedupe_buffer:
                linter_logger.info(
                    "      - Skipping. Source space Duplicate: %s",
                    dedupe_tuple)
                continue

            # We now evaluate patches in the source-space for whether they overlap
            # or disrupt any templated sections.
            # The intent here is that unless explicitly stated, a fix should never
            # disrupt a templated section.
            # NOTE: We rely here on the patches being sorted.
            # TODO: Implement a mechanism for doing templated section fixes. For
            # now it's just not allowed.

            # Get the affected raw slices.
            local_raw_slices = self.templated_file.raw_slices_spanning_source_slice(
                source_slice)
            local_type_list = [slc.slice_type for slc in local_raw_slices]

            enriched_patch = EnrichedFixPatch(
                source_slice=source_slice,
                templated_slice=patch.templated_slice,
                patch_category=patch.patch_category,
                fixed_raw=patch.fixed_raw,
                templated_str=self.templated_file.templated_str[
                    patch.templated_slice],
                source_str=self.templated_file.source_str[source_slice],
            )

            # Deal with the easy case of only literals
            if set(local_type_list) == {"literal"}:
                linter_logger.info(
                    "      * Keeping patch on literal-only section: %s",
                    enriched_patch)
                filtered_source_patches.append(enriched_patch)
                dedupe_buffer.append(enriched_patch.dedupe_tuple())
            # Is it a zero length patch.
            elif (enriched_patch.source_slice.start
                  == enriched_patch.source_slice.stop
                  and enriched_patch.source_slice.start
                  == local_raw_slices[0].source_idx):
                linter_logger.info(
                    "      * Keeping insertion patch on slice boundary: %s",
                    enriched_patch,
                )
                filtered_source_patches.append(enriched_patch)
                dedupe_buffer.append(enriched_patch.dedupe_tuple())
            # If it's ONLY templated then we should skip it.
            elif "literal" not in local_type_list:
                linter_logger.info(
                    "      - Skipping patch over templated section: %s",
                    enriched_patch)
            # If we span more than two slices then we should just skip it. Too Hard.
            elif len(local_raw_slices) > 2:
                linter_logger.info(
                    "      - Skipping patch over more than two raw slices: %s",
                    enriched_patch,
                )
            # If it's an insertion (i.e. the string in the pre-fix template is '') then we
            # won't be able to place it, so skip.
            elif not enriched_patch.templated_str:
                linter_logger.info(
                    "      - Skipping insertion patch in templated section: %s",
                    enriched_patch,
                )
            # If the string from the templated version isn't in the source, then we can't fix it.
            elif enriched_patch.templated_str not in enriched_patch.source_str:
                linter_logger.info(
                    "      - Skipping edit patch on templated content: %s",
                    enriched_patch,
                )
            else:
                # Identify all the places the string appears in the source content.
                positions = list(
                    findall(enriched_patch.templated_str,
                            enriched_patch.source_str))
                if len(positions) != 1:
                    linter_logger.debug(
                        "        - Skipping edit patch on non-unique templated content: %s",
                        enriched_patch,
                    )
                    continue
                # We have a single occurrences of the thing we want to patch. This
                # means we can use its position to place our patch.
                new_source_slice = slice(
                    enriched_patch.source_slice.start + positions[0],
                    enriched_patch.source_slice.start + positions[0] +
                    len(enriched_patch.templated_str),
                )
                enriched_patch = EnrichedFixPatch(
                    source_slice=new_source_slice,
                    templated_slice=enriched_patch.templated_slice,
                    patch_category=enriched_patch.patch_category,
                    fixed_raw=enriched_patch.fixed_raw,
                    templated_str=enriched_patch.templated_str,
                    source_str=enriched_patch.source_str,
                )
                linter_logger.debug(
                    "      * Keeping Tricky Case. Positions: %s, New Slice: %s, Patch: %s",
                    positions,
                    new_source_slice,
                    enriched_patch,
                )
                filtered_source_patches.append(enriched_patch)
                dedupe_buffer.append(enriched_patch.dedupe_tuple())
                continue

        # Sort the patches before building up the file.
        filtered_source_patches = sorted(filtered_source_patches,
                                         key=lambda x: x.source_slice.start)
        # We now slice up the file using the patches and any source only slices.
        # This gives us regions to apply changes to.
        slice_buff = []
        source_idx = 0
        for patch in filtered_source_patches:
            # Are there templated slices at or before the start of this patch?
            while (source_only_slices and source_only_slices[0].source_idx <
                   patch.source_slice.start):
                next_so_slice = source_only_slices.pop(0).source_slice()
                # Add a pre-slice before the next templated slices if needed.
                if next_so_slice.start > source_idx:
                    slice_buff.append(slice(source_idx, next_so_slice.start))
                # Add the templated slice.
                slice_buff.append(next_so_slice)
                source_idx = next_so_slice.stop

            # Is there a gap between current position and this patch?
            if patch.source_slice.start > source_idx:
                # Add a slice up to this patch.
                slice_buff.append(slice(source_idx, patch.source_slice.start))

            # Is this patch covering an area we've already covered?
            if patch.source_slice.start < source_idx:
                linter_logger.info(
                    "Skipping overlapping patch at Index %s, Patch: %s",
                    source_idx,
                    patch,
                )
                # Ignore the patch for now...
                continue

            # Add this patch.
            slice_buff.append(patch.source_slice)
            source_idx = patch.source_slice.stop
        # Add a tail slice.
        if source_idx < len(self.templated_file.source_str):
            slice_buff.append(
                slice(source_idx, len(self.templated_file.source_str)))

        linter_logger.debug("Final slice buffer: %s", slice_buff)

        # Iterate through the patches, building up the new string.
        str_buff = ""
        for source_slice in slice_buff:
            # Is it one in the patch buffer:
            for patch in filtered_source_patches:
                if patch.source_slice == source_slice:
                    # Use the patched version
                    linter_logger.debug(
                        "%-30s    %s    %r > %r",
                        f"Appending {patch.patch_category} Patch:",
                        patch.source_slice,
                        patch.source_str,
                        patch.fixed_raw,
                    )
                    str_buff += patch.fixed_raw
                    break
            else:
                # Use the raw string
                linter_logger.debug(
                    "Appending Raw:                    %s     %r",
                    source_slice,
                    self.templated_file.source_str[source_slice],
                )
                str_buff += self.templated_file.source_str[source_slice]

        # The success metric here is whether anything ACTUALLY changed.
        return str_buff, str_buff != original_source
Beispiel #3
0
def test__parser__helper_findall(mainstr, substr, positions):
    """Test _findall."""
    assert list(findall(substr, mainstr)) == positions
Beispiel #4
0
    def _generate_source_patches(
            cls, tree: BaseSegment,
            templated_file: TemplatedFile) -> List[FixPatch]:
        """Use the fixed tree to generate source patches.

        Importantly here we deduplicate and sort the patches
        from their position in the templated file into their
        intended order in the source file.
        """
        # Iterate patches, filtering and translating as we go:
        linter_logger.debug("### Beginning Patch Iteration.")
        filtered_source_patches = []
        dedupe_buffer = []
        # We use enumerate so that we get an index for each patch. This is entirely
        # so when debugging logs we can find a given patch again!
        for idx, patch in enumerate(
                tree.iter_patches(templated_file=templated_file)):
            linter_logger.debug("  %s Yielded patch: %s", idx, patch)
            cls._log_hints(patch, templated_file)

            # Check for duplicates
            if patch.dedupe_tuple() in dedupe_buffer:
                linter_logger.info(
                    "      - Skipping. Source space Duplicate: %s",
                    patch.dedupe_tuple(),
                )
                continue

            # We now evaluate patches in the source-space for whether they overlap
            # or disrupt any templated sections.
            # The intent here is that unless explicitly stated, a fix should never
            # disrupt a templated section.
            # NOTE: We rely here on the patches being generated in order.
            # TODO: Implement a mechanism for doing templated section fixes. Given
            # these patches are currently generated from fixed segments, there will
            # likely need to be an entirely different mechanism

            # Get the affected raw slices.
            local_raw_slices = templated_file.raw_slices_spanning_source_slice(
                patch.source_slice)
            local_type_list = [slc.slice_type for slc in local_raw_slices]

            # Deal with the easy cases of 1) New code at end 2) only literals
            if not local_type_list or set(local_type_list) == {"literal"}:
                linter_logger.info(
                    "      * Keeping patch on new or literal-only section: %s",
                    patch,
                )
                filtered_source_patches.append(patch)
                dedupe_buffer.append(patch.dedupe_tuple())
            # Handle the easy case of an explicit source fix
            elif patch.patch_category == "source":
                linter_logger.info(
                    "      * Keeping explicit source fix patch: %s",
                    patch,
                )
                filtered_source_patches.append(patch)
                dedupe_buffer.append(patch.dedupe_tuple())
            # Is it a zero length patch.
            elif (patch.source_slice.start == patch.source_slice.stop and
                  patch.source_slice.start == local_raw_slices[0].source_idx):
                linter_logger.info(
                    "      * Keeping insertion patch on slice boundary: %s",
                    patch,
                )
                filtered_source_patches.append(patch)
                dedupe_buffer.append(patch.dedupe_tuple())
            else:
                # We've got a situation where the ends of our patch need to be
                # more carefully mapped. Likely because we're greedily including
                # a section of source templating with our fix and we need to work
                # around it gracefully.

                # Identify all the places the string appears in the source content.
                positions = list(findall(patch.templated_str,
                                         patch.source_str))
                if len(positions) != 1:
                    # NOTE: This section is not covered in tests. While we
                    # don't have an example of it's use (we should), the
                    # code after this relies on there being only one
                    # instance found - so the safety check remains.
                    linter_logger.debug(  # pragma: no cover
                        "        - Skipping edit patch on non-unique templated "
                        "content: %s",
                        patch,
                    )
                    continue  # pragma: no cover

                # We have a single occurrence of the thing we want to patch. This
                # means we can use its position to place our patch.
                new_source_slice = slice(
                    patch.source_slice.start + positions[0],
                    patch.source_slice.start + positions[0] +
                    len(patch.templated_str),
                )
                linter_logger.debug(
                    "      * Keeping Tricky Case. Positions: %s, New Slice: %s, "
                    "Patch: %s",
                    positions,
                    new_source_slice,
                    patch,
                )
                patch.source_slice = new_source_slice
                filtered_source_patches.append(patch)
                dedupe_buffer.append(patch.dedupe_tuple())
                continue

        # Sort the patches before building up the file.
        return sorted(filtered_source_patches,
                      key=lambda x: x.source_slice.start)