Beispiel #1
0
class ZincAnalysisParser(AnalysisParser):
    """Parses a zinc analysis file.

  Implemented by delegating to an underlying zincutils.ZincAnalysisParser instance.
  """

    # Implement AnalysisParser properties.
    empty_test_header = b'products'
    current_test_header = ZincAnalysis.FORMAT_VERSION_LINE

    def __init__(self):
        self._underlying_parser = UnderlyingParser()

    # Implement AnalysisParser methods.

    def parse(self, infile):
        """Parse a ZincAnalysis instance from an open text file."""
        with raise_on_eof(infile):
            try:
                return ZincAnalysis(self._underlying_parser.parse(infile))
            except UnderlyingParser.ParseError as e:
                raise ParseError(e)

    def parse_products(self, infile, classes_dir):
        """An efficient parser of just the products section."""
        with raise_on_eof(infile):
            try:
                return self._underlying_parser.parse_products(infile)
            except UnderlyingParser.ParseError as e:
                raise ParseError(e)

    def parse_deps(self, infile, classpath_indexer, classes_dir):
        with raise_on_eof(infile):
            try:
                return self._underlying_parser.parse_deps(infile, classes_dir)
            except UnderlyingParser.ParseError as e:
                raise ParseError(e)

    def rebase(self,
               infile,
               outfile,
               pants_home_from,
               pants_home_to,
               java_home=None):
        with raise_on_eof(infile):
            try:
                self._underlying_parser.rebase(infile, outfile,
                                               pants_home_from, pants_home_to,
                                               java_home)
            except UnderlyingParser.ParseError as e:
                raise ParseError(e)
  def test_large(self):
    parser = ZincAnalysisParser()

    with _temp_test_dir('large.zip') as testdir:
      print('Operating in test dir: {}'.format(testdir))
      # Parse analysis files.
      analysis_file_names = [b'downstream.analysis', b'upstream.analysis']
      analysis_files = [os.path.join(testdir, f) for f in analysis_file_names]

      def msg(prefix):
        return '{0} [{1}]'.format(prefix, ', '.join(analysis_file_names))

      analyses = self._time(lambda: [parser.parse_from_path(f) for f in analysis_files],
                            msg('Parsed'))

      # Write them back out individually.
      writeout_dir = os.path.join(testdir, b'write')
      os.mkdir(writeout_dir)
      def write(file_name, analysis):
        outpath = os.path.join(writeout_dir, file_name)
        analysis.write_to_path(outpath)

      def _write_all():
        for analysis_file, analysis in zip(analysis_files, analyses):
          write(os.path.basename(analysis_file), analysis)

      self._time(_write_all, msg('Wrote'))

      # Merge them.
      merged_analysis = self._time(lambda: ZincAnalysis.merge(analyses), msg('Merged'))

      # Write merged analysis to file.
      merged_analysis_path = os.path.join(testdir, b'merged.analysis')
      self._time(lambda: merged_analysis.write_to_path(merged_analysis_path), msg('Wrote merge of'))

      # Split the merged analysis.
      sources_per_analysis = [a.stamps.sources.keys() for a in analyses]
      self._time(lambda: merged_analysis.split(sources_per_analysis, catchall=True), msg('Split'))

      # Rebase the merged analysis.
      rebased_analysis_path = os.path.join(testdir, b'rebased.merged.analysis')
      self._time(lambda: ZincAnalysisParser().rebase_from_path(merged_analysis_path, rebased_analysis_path,
          b'/Users/kermit/src/acme.web', b'$PANTS_HOME'), msg('Rebase'))

    print('Total time: %f seconds' % self.total_time)
Beispiel #3
0
  def test_large(self):
    parser = ZincAnalysisParser()

    with _temp_test_dir('large.zip') as testdir:
      print('Operating in test dir: {}'.format(testdir))
      # Parse analysis files.
      analysis_file_names = [b'downstream.analysis', b'upstream.analysis']
      analysis_files = [os.path.join(testdir, f) for f in analysis_file_names]

      def msg(prefix):
        return '{0} [{1}]'.format(prefix, ', '.join(analysis_file_names))

      analyses = self._time(lambda: [parser.parse_from_path(f) for f in analysis_files],
                            msg('Parsed'))

      # Write them back out individually.
      writeout_dir = os.path.join(testdir, b'write')
      os.mkdir(writeout_dir)
      def write(file_name, analysis):
        outpath = os.path.join(writeout_dir, file_name)
        analysis.write_to_path(outpath)

      def _write_all():
        for analysis_file, analysis in zip(analysis_files, analyses):
          write(os.path.basename(analysis_file), analysis)

      self._time(_write_all, msg('Wrote'))

      # Merge them.
      merged_analysis = self._time(lambda: ZincAnalysis.merge(analyses), msg('Merged'))

      # Write merged analysis to file.
      merged_analysis_path = os.path.join(testdir, b'merged.analysis')
      self._time(lambda: merged_analysis.write_to_path(merged_analysis_path), msg('Wrote merge of'))

      # Split the merged analysis.
      sources_per_analysis = [a.stamps.sources.keys() for a in analyses]
      self._time(lambda: merged_analysis.split(sources_per_analysis, catchall=True), msg('Split'))

      # Rebase the merged analysis.
      rebased_analysis_path = os.path.join(testdir, b'rebased.merged.analysis')
      self._time(lambda: ZincAnalysisParser().rebase_from_path(merged_analysis_path, rebased_analysis_path,
          b'/Users/kermit/src/acme.web', b'$PANTS_HOME'), msg('Rebase'))

    print('Total time: %f seconds' % self.total_time)
class ZincAnalysisParser(AnalysisParser):
  """Parses a zinc analysis file.

  Implemented by delegating to an underlying zincutils.ZincAnalysisParser instance.
  """

  # Implement AnalysisParser properties.
  empty_test_header = b'products'
  current_test_header = ZincAnalysis.FORMAT_VERSION_LINE

  def __init__(self):
    self._underlying_parser = UnderlyingParser()

  # Implement AnalysisParser methods.

  def parse(self, infile):
    """Parse a ZincAnalysis instance from an open text file."""
    with raise_on_eof(infile):
      try:
        return ZincAnalysis(self._underlying_parser.parse(infile))
      except UnderlyingParser.ParseError as e:
        raise ParseError(e)

  def parse_products(self, infile, classes_dir):
    """An efficient parser of just the products section."""
    with raise_on_eof(infile):
      try:
        return self._underlying_parser.parse_products(infile)
      except UnderlyingParser.ParseError as e:
        raise ParseError(e)

  def parse_deps(self, infile, classpath_indexer, classes_dir):
    with raise_on_eof(infile):
      try:
        return self._underlying_parser.parse_deps(infile, classes_dir)
      except UnderlyingParser.ParseError as e:
        raise ParseError(e)

  def rebase(self, infile, outfile, pants_home_from, pants_home_to, java_home=None):
    with raise_on_eof(infile):
      try:
        self._underlying_parser.rebase(infile, outfile, pants_home_from, pants_home_to, java_home)
      except UnderlyingParser.ParseError as e:
        raise ParseError(e)
  def _generate_testworthy_splits(self):
    """Take some non-canonical analysis files and generate test data from them.

    The resulting files will be "canonical". That is, merging and re-splitting them will yield
    the same files. Therefore the resulting files can be used as test data (after eyeballing them
    to ensure no pathologies).

    An easy way to generate input for this function is to run a scala compile on some targets using
    --strategy=isolated. Then .pants.d/compile/jvm/scala/isolated-analysis/ will contain a bunch
    of per-target analysis files.

    Those files can be anonymized (see anonymize_analysis.py), ideally with some non-ASCII words
    thrown in (as explained there), and then you can point this function to those anonymized
    files by setting ZINC_ANALYSIS_TEST_DATA_SOURCE=<dir> in the environment and running this test.

    Note: Yes, it's slightly problematic that we're using the very code we're testing to generate
    the test inputs. Hence the need to spot-check for obvious pathologies.
    """
    original_splits_dir = os.environ.get(_TEST_DATA_SOURCE_ENV_VAR)

    canonical_dir = os.path.join(original_splits_dir, 'canonical')
    if os.path.exists(canonical_dir):
      shutil.rmtree(canonical_dir, True)
    os.mkdir(canonical_dir)

    original_split_filenames = [f.decode('utf-8') for f in os.listdir(original_splits_dir)]
    original_splits_files = [os.path.join(original_splits_dir, f)
                             for f in original_split_filenames if f.endswith('.analysis')]

    parser = ZincAnalysisParser()
    original_split_analyses = [parser.parse_from_path(f) for f in original_splits_files]
    merged_analysis = ZincAnalysis.merge(original_split_analyses)
    merged_analysis.write_to_path(os.path.join(canonical_dir, 'all.merged.analysis'))

    # Split the merged analysis back to individual analyses.
    sources_per_analysis = [a.stamps.sources.keys() for a in original_split_analyses]
    split_analyses = merged_analysis.split(sources_per_analysis, os.path.dirname(__file__))
    for original_split_file, split_analysis in zip(original_splits_files, split_analyses):
      outpath = os.path.join(canonical_dir, os.path.basename(original_split_file))
      split_analysis.write_to_path(outpath)

    print('Wrote canonical analysis data to {}'.format(canonical_dir))
Beispiel #6
0
  def _generate_testworthy_splits(self):
    """Take some non-canonical analysis files and generate test data from them.

    The resulting files will be "canonical". That is, merging and re-splitting them will yield
    the same files. Therefore the resulting files can be used as test data (after eyeballing them
    to ensure no pathologies).

    An easy way to generate input for this function is to run a scala compile on some targets using
    --strategy=isolated. Then .pants.d/compile/jvm/scala/isolated-analysis/ will contain a bunch
    of per-target analysis files.

    Those files can be anonymized (see anonymize_analysis.py), ideally with some non-ASCII words
    thrown in (as explained there), and then you can point this function to those anonymized
    files by setting ZINC_ANALYSIS_TEST_DATA_SOURCE=<dir> in the environment and running this test.

    Note: Yes, it's slightly problematic that we're using the very code we're testing to generate
    the test inputs. Hence the need to spot-check for obvious pathologies.
    """
    original_splits_dir = os.environ.get(_TEST_DATA_SOURCE_ENV_VAR)

    canonical_dir = os.path.join(original_splits_dir, 'canonical')
    if os.path.exists(canonical_dir):
      shutil.rmtree(canonical_dir, True)
    os.mkdir(canonical_dir)

    original_split_filenames = [f.decode('utf-8') for f in os.listdir(original_splits_dir)]
    original_splits_files = [os.path.join(original_splits_dir, f)
                             for f in original_split_filenames if f.endswith('.analysis')]

    parser = ZincAnalysisParser()
    original_split_analyses = [parser.parse_from_path(f) for f in original_splits_files]
    merged_analysis = ZincAnalysis.merge(original_split_analyses)
    merged_analysis.write_to_path(os.path.join(canonical_dir, 'all.merged.analysis'))

    # Split the merged analysis back to individual analyses.
    sources_per_analysis = [a.stamps.sources.keys() for a in original_split_analyses]
    split_analyses = merged_analysis.split(sources_per_analysis, os.path.dirname(__file__))
    for original_split_file, split_analysis in zip(original_splits_files, split_analyses):
      outpath = os.path.join(canonical_dir, os.path.basename(original_split_file))
      split_analysis.write_to_path(outpath)

    print('Wrote canonical analysis data to {}'.format(canonical_dir))
 def __init__(self):
     self._underlying_parser = UnderlyingParser()
  def test_complex(self):
    with environment_as(ZINCUTILS_SORTED_ANALYSIS='1'):
      if os.environ.get(_TEST_DATA_SOURCE_ENV_VAR):
        print('\n>>>>>>>>> {} set: skipping test, generating canonical test data instead.'.format(
          _TEST_DATA_SOURCE_ENV_VAR))
        self._generate_testworthy_splits()
        return

      parser = ZincAnalysisParser()

      with _temp_test_dir('complex.zip') as testdir:
        # Parse analysis files.
        analysis_files = [os.path.join(testdir, f)
                          for f in os.listdir(testdir)
                          if f.endswith(b'.analysis') and not f.endswith(b'.merged.analysis')]
        num_analyses = len(analysis_files)

        def parse(f):
          return parser.parse_from_path(f)

        analyses = self._time(lambda: [parse(f) for f in analysis_files],
                              'Parsed %d files' % num_analyses)

        # Write them back out individually.
        writeout_dir = os.path.join(testdir, b'write')
        os.mkdir(writeout_dir)
        def write(file_name, analysis):
          outpath = os.path.join(writeout_dir, file_name)
          analysis.write_to_path(outpath)

        def _write_all():
          for analysis_file, analysis in zip(analysis_files, analyses):
            write(os.path.basename(analysis_file), analysis)

        self._time(_write_all, 'Wrote %d files' % num_analyses)

        # Merge them.
        merged_analysis = self._time(lambda: ZincAnalysis.merge(analyses),
                                     'Merged %d files' % num_analyses)

        # Write merged analysis to file.
        merged_analysis_path = os.path.join(writeout_dir, b'merged.analysis')
        self._time(lambda: merged_analysis.write_to_path(merged_analysis_path),
                   'Wrote merged analysis to %s' % merged_analysis_path)

        # Read merged analysis from file.
        merged_analysis2 = self._time(lambda: parser.parse_from_path(merged_analysis_path),
                                      'Read merged analysis from %s' % merged_analysis_path)

        # Read the expected merged analysis from file.
        expected_merged_analysis_path = os.path.join(testdir, b'all.merged.analysis')
        expected_merged_analysis = self._time(
          lambda: parser.parse_from_path(expected_merged_analysis_path),
          'Read expected merged analysis from %s' % expected_merged_analysis_path)

        # Compare the merge result with the re-read one.
        diffs = merged_analysis.diff(merged_analysis2)
        self.assertTrue(merged_analysis.is_equal_to(merged_analysis2), ''.join(
          [unicode(diff) for diff in diffs]))

        # Compare the merge result with the expected.
        diffs = expected_merged_analysis.diff(merged_analysis2)
        self.assertTrue(expected_merged_analysis.is_equal_to(merged_analysis2), ''.join(
          [unicode(diff) for diff in diffs]))

        # Split the merged analysis back to individual analyses.
        sources_per_analysis = [a.stamps.sources.keys() for a in analyses]
        split_analyses = self._time(lambda: merged_analysis2.split(
          sources_per_analysis, catchall=True),
          'Split back into %d analyses' % num_analyses)

        self.assertEquals(num_analyses + 1, len(split_analyses))  # +1 for the catchall.
        catchall_analysis = split_analyses[-1]

        # We expect an empty catchall.
        self.assertEquals(0, len(catchall_analysis.stamps.sources))

        # Diff the original analyses and the split ones.

        # Write the split to the tmpdir, for ease of debugging on failure.
        splits_dir = os.path.join(testdir, b'splits')
        os.mkdir(splits_dir)
        for analysis_file, analysis, split_analysis in zip(analysis_files, analyses, split_analyses):
          outfile_path = os.path.join(splits_dir, os.path.basename(analysis_file))
          split_analysis.write_to_path(outfile_path)
          diffs = analysis.diff(split_analysis)
          # Note that it's not true in general that merging splits and then splitting them back out
          # should yield the exact same analysis. Some small differences can happen. For example:
          # splitA may have an external src->class on a class from a source file in splitB; When
          # merging, that becomes a src->src dependency; And when splitting back out that src
          # dependency becomes a dependency on a representative class, which may not be
          # the original class SplitA depended on.
          #
          # This comparison works here only because we've taken care to prepare test data for which
          # it should hold. See _generate_testworthy_splits below for how to do so.
          self.assertTrue(analysis.is_equal_to(split_analysis),
                          ''.join([unicode(diff) for diff in diffs]))

      print('Total time: %f seconds' % self.total_time)
 def __init__(self):
   self._underlying_parser = UnderlyingParser()
Beispiel #10
0
 def parse_analyis(name):
   return ZincAnalysisParser().parse_from_path(get_test_analysis_path(name))
Beispiel #11
0
  def test_simple(self):
    with environment_as(ZINCUTILS_SORTED_ANALYSIS='1'):
      def get_test_analysis_path(name):
        return os.path.join(os.path.dirname(__file__), 'testdata', 'simple', name)

      def get_analysis_text(name):
        with open(get_test_analysis_path(name), 'r') as fp:
          return fp.read()

      def parse_analyis(name):
        return ZincAnalysisParser().parse_from_path(get_test_analysis_path(name))

      def analysis_to_string(analysis):
        buf = StringIO.StringIO()
        analysis.write(buf)
        return buf.getvalue()

      full_analysis = parse_analyis('simple.analysis')

      analysis_splits = full_analysis.split([
        [b'/src/pants/examples/src/scala/org/pantsbuild/example/hello/welcome/Welcome.scala'],
        [b'/src/pants/examples/src/scala/org/pantsbuild/example/hello/exe/Exe.scala'],
      ])
      self.assertEquals(len(analysis_splits), 2)

      def compare_split(i):
        expected_filename = 'simple_split{0}.analysis'.format(i)

        # First compare as objects.  This verifies that __eq__ works, but is weaker than the
        # text comparison because in some cases there can be small text differences that don't
        # affect logical equivalence.
        expected_analyis = parse_analyis(expected_filename)
        self.assertTrue(expected_analyis.is_equal_to(analysis_splits[i]))

        # Then compare as text.  In this simple case we expect them to be byte-for-byte equal.
        expected = get_analysis_text(expected_filename)
        actual = analysis_to_string(analysis_splits[i])
        self.assertMultiLineEqual(expected, actual)

      compare_split(0)
      compare_split(1)

      # Now merge and check that we get what we started with.
      merged_analysis = ZincAnalysis.merge(analysis_splits)
      # Check that they compare as objects.
      self.assertTrue(full_analysis.is_equal_to(merged_analysis))
      # Check that they compare as text.
      expected = get_analysis_text('simple.analysis')
      actual = analysis_to_string(merged_analysis)
      self.assertMultiLineEqual(expected, actual)

      # Now check rebasing.
      orig = iter(get_analysis_text('simple.analysis').splitlines(True))
      expected_rebased = get_analysis_text('simple.rebased.analysis')
      buf = StringIO.StringIO()
      ZincAnalysisParser().rebase(orig, buf, b'/src/pants', b'$PANTS_HOME')
      rebased = buf.getvalue()
      self.assertMultiLineEqual(expected_rebased, rebased)

      # And rebasing+filtering.
      orig = iter(get_analysis_text('simple.analysis').splitlines(True))
      expected_filtered_rebased = get_analysis_text('simple.rebased.filtered.analysis')
      buf = StringIO.StringIO()
      ZincAnalysisParser().rebase(orig, buf, b'/src/pants', b'$PANTS_HOME',
                                  b'/Library/Java/JavaVirtualMachines/jdk1.8.0_40.jdk')
      filtered_rebased = buf.getvalue()
      self.assertMultiLineEqual(expected_filtered_rebased, filtered_rebased)
Beispiel #12
0
  def test_complex(self):
    with environment_as(ZINCUTILS_SORTED_ANALYSIS='1'):
      if os.environ.get(_TEST_DATA_SOURCE_ENV_VAR):
        print('\n>>>>>>>>> {} set: skipping test, generating canonical test data instead.'.format(
          _TEST_DATA_SOURCE_ENV_VAR))
        self._generate_testworthy_splits()
        return

      parser = ZincAnalysisParser()

      with _temp_test_dir('complex.zip') as testdir:
        # Parse analysis files.
        analysis_files = [os.path.join(testdir, f)
                          for f in os.listdir(testdir)
                          if f.endswith(b'.analysis') and not f.endswith(b'.merged.analysis')]
        num_analyses = len(analysis_files)

        def parse(f):
          return parser.parse_from_path(f)

        analyses = self._time(lambda: [parse(f) for f in analysis_files],
                              'Parsed %d files' % num_analyses)

        # Write them back out individually.
        writeout_dir = os.path.join(testdir, b'write')
        os.mkdir(writeout_dir)
        def write(file_name, analysis):
          outpath = os.path.join(writeout_dir, file_name)
          analysis.write_to_path(outpath)

        def _write_all():
          for analysis_file, analysis in zip(analysis_files, analyses):
            write(os.path.basename(analysis_file), analysis)

        self._time(_write_all, 'Wrote %d files' % num_analyses)

        # Merge them.
        merged_analysis = self._time(lambda: ZincAnalysis.merge(analyses),
                                     'Merged %d files' % num_analyses)

        # Write merged analysis to file.
        merged_analysis_path = os.path.join(writeout_dir, b'merged.analysis')
        self._time(lambda: merged_analysis.write_to_path(merged_analysis_path),
                   'Wrote merged analysis to %s' % merged_analysis_path)

        # Read merged analysis from file.
        merged_analysis2 = self._time(lambda: parser.parse_from_path(merged_analysis_path),
                                      'Read merged analysis from %s' % merged_analysis_path)

        # Read the expected merged analysis from file.
        expected_merged_analysis_path = os.path.join(testdir, b'all.merged.analysis')
        expected_merged_analysis = self._time(
          lambda: parser.parse_from_path(expected_merged_analysis_path),
          'Read expected merged analysis from %s' % expected_merged_analysis_path)

        # Compare the merge result with the re-read one.
        diffs = merged_analysis.diff(merged_analysis2)
        self.assertTrue(merged_analysis.is_equal_to(merged_analysis2), ''.join(
          [unicode(diff) for diff in diffs]))

        # Compare the merge result with the expected.
        diffs = expected_merged_analysis.diff(merged_analysis2)
        self.assertTrue(expected_merged_analysis.is_equal_to(merged_analysis2), ''.join(
          [unicode(diff) for diff in diffs]))

        # Split the merged analysis back to individual analyses.
        sources_per_analysis = [a.stamps.sources.keys() for a in analyses]
        split_analyses = self._time(lambda: merged_analysis2.split(
          sources_per_analysis, catchall=True),
          'Split back into %d analyses' % num_analyses)

        self.assertEquals(num_analyses + 1, len(split_analyses))  # +1 for the catchall.
        catchall_analysis = split_analyses[-1]

        # We expect an empty catchall.
        self.assertEquals(0, len(catchall_analysis.stamps.sources))

        # Diff the original analyses and the split ones.

        # Write the split to the tmpdir, for ease of debugging on failure.
        splits_dir = os.path.join(testdir, b'splits')
        os.mkdir(splits_dir)
        for analysis_file, analysis, split_analysis in zip(analysis_files, analyses, split_analyses):
          outfile_path = os.path.join(splits_dir, os.path.basename(analysis_file))
          split_analysis.write_to_path(outfile_path)
          diffs = analysis.diff(split_analysis)
          # Note that it's not true in general that merging splits and then splitting them back out
          # should yield the exact same analysis. Some small differences can happen. For example:
          # splitA may have an external src->class on a class from a source file in splitB; When
          # merging, that becomes a src->src dependency; And when splitting back out that src
          # dependency becomes a dependency on a representative class, which may not be
          # the original class SplitA depended on.
          #
          # This comparison works here only because we've taken care to prepare test data for which
          # it should hold. See _generate_testworthy_splits below for how to do so.
          self.assertTrue(analysis.is_equal_to(split_analysis),
                          ''.join([unicode(diff) for diff in diffs]))

      print('Total time: %f seconds' % self.total_time)