def _normalize_complexity_report_output(output, source_path):
        """Normalize complexity_report output.

        See https://github.com/escomplex/escomplex/blob/master/README.md#metrics

        :param output: output dict to be normalized
        :param source_path: path to sources that was used
        :return: normalized output
        """
        # For metrics meaning see:
        wanted_keys = (('maintainability', 'project_maintainability'),
                       ('changeCost', 'cost_change'),
                       ('cyclomatic', 'average_cyclomatic_complexity'),
                       ('effort',
                        'average_halstead_effort'), ('firstOrderDensity',
                                                     'first_order_density'),
                       ('loc', 'average_function_lines_of_code'),
                       ('params',
                        'average_function_parameters_count'), ('reports',
                                                               'modules'))
        output = DataNormalizer.transform_keys(output, wanted_keys)

        wanted_module_keys = (('maintainability',
                               'module_maintainability'), ('dependencies', ),
                              ('loc',
                               'average_function_lines_of_code'), ('path', ),
                              ('params', 'average_function_parameters_count'),
                              ('functions', ))

        for idx, module in enumerate(output.get('modules', [])):
            output['modules'][idx] = DataNormalizer.transform_keys(
                module, wanted_module_keys)

            source_path_len = len(source_path) + 1
            if 'path' in module:
                output['modules'][idx]['path'] = module['path'][
                    source_path_len:]

            for fun_idx, function in enumerate(module.get('functions')):
                if 'cyclomaticDensity' in function:
                    function['cyclomatic_density'] = function.pop(
                        'cyclomaticDensity')

        return output
    def _get_generic_result(self, source_path):
        """Get core result of CodeMetricsTask task that is based on cloc tool.

        This output is later enriched with output of tools based on languages
        that were found by cloc

        :param source_path: path to sources where analyzed artefact resists
        :return: tuple where generic information with ecosystem specific dict
        """
        command = ['cloc', '--json', source_path]
        status, output, error = self._run_analyzer(command)

        if status != 0:
            # Let the whole task fail
            raise RuntimeError("Running cloc command failed: '%s'" % error)

        # cloc places generic summary here, we will maintain it in top level so
        # remove misleading key
        header = {
            'total_files': output['header'].pop('n_files'),
            'total_lines': output['header'].pop('n_lines')
        }
        output.pop('header')

        if 'SUM' in output:
            header['blank_lines'] = output['SUM']['blank']
            header['comment_lines'] = output['SUM']['comment']
            header['code_lines'] = output['SUM']['code']
            output.pop('SUM', None)

        # rename to be more precise with naming
        wanted_keys = (('blank', 'blank_lines'), ('code', 'code_lines'),
                       ('comment', 'comment_lines'), ('nFiles', 'files_count'))
        for key in output.keys():
            # filter only language-specific results, leave statistics untouched
            if isinstance(output[key], dict):
                output[key] = DataNormalizer.transform_keys(
                    output[key], wanted_keys)

        return header, output
예제 #3
0
class MercatorTask(BaseTask):
    """Collects `Release` specific information from Mercator."""

    _analysis_name = 'metadata'
    _dependency_tree_lock = '_dependency_tree_lock'
    schema_ref = SchemaRef(_analysis_name, '3-3-0')
    _data_normalizer = DataNormalizer()

    def _parse_requires_txt(self, path):
        requires = []
        try:
            with open(path, 'r') as f:
                for line in f.readlines():
                    line = line.strip()
                    if line.startswith('['):
                        # the first named ini-like [section] ends the runtime requirements
                        break
                    elif line:
                        requires.append(line)
        except Exception as e:
            self.log.warning('Failed to process "{p}": {e}'.format(p=path,
                                                                   e=str(e)))

        return requires

    def _merge_python_items(self, topdir, data):
        # TODO: reduce cyclomatic complexity
        metadata_json = None
        pkg_info = None
        requirements_txt = None

        def get_depth(path):
            return path.rstrip('/').count('/')

        def is_deeper(item1, item2):
            """Return True if item1 is deeper in directory hierarchy than item2."""
            if item1 is None:
                return True
            return get_depth(item1['path']) > get_depth(item2['path'])

        if not data.get('items'):
            return None

        # find outermost PKG_INFO/metadata.json/requirements.txt - there can be
        #  testing ones etc.
        for item in data['items']:
            if item['ecosystem'] == 'Python-Dist' and item['path'].endswith(
                    '.json'):
                if is_deeper(metadata_json, item):
                    metadata_json = item
            elif item['ecosystem'] == 'Python-Dist':  # PKG-INFO
                # we prefer PKG_INFO files from .egg-info directories,
                #  since these have the very useful `requires.txt` next to them
                if pkg_info is None:
                    pkg_info = item
                else:
                    pkg_info_in_egg = pkg_info['path'].endswith(
                        '.egg-info/PKG-INFO')
                    item_in_egg = item['path'].endswith('.egg-info/PKG-INFO')
                    # rather than one insane condition, we use several less complex ones
                    if pkg_info_in_egg and item_in_egg and is_deeper(
                            pkg_info, item):
                        # if both are in .egg-info, but current pkg_info is deeper
                        pkg_info = item
                    elif item_in_egg and not pkg_info_in_egg:
                        # if item is in .egg-info and current pkg_info is not
                        pkg_info = item
                    elif not (item_in_egg or pkg_info_in_egg) and is_deeper(
                            pkg_info, item):
                        # if none of them are in .egg-info, but current pkg_info is deeper
                        pkg_info = item
            elif item['ecosystem'] == 'Python-RequirementsTXT':
                if not requirements_txt or is_deeper(requirements_txt, item):
                    requirements_txt = item

        if pkg_info:
            self.log.info('Found PKG-INFO at {p}'.format(p=pkg_info['path']))
        if metadata_json:
            self.log.info(
                'Found metadata.json at {p}'.format(p=metadata_json['path']))
        if requirements_txt:
            self.log.info('Found requirements.txt at {p}'.format(
                p=requirements_txt['path']))

        ret = None
        # figure out if this was packaged as wheel => metadata.json would
        #  have depth of topdir + 2
        if metadata_json and get_depth(
                metadata_json['path']) == get_depth(topdir) + 2:
            self.log.info('Seems like this is wheel, using metadata.json ...')
            ret = metadata_json
        # figure out if this was packaged as sdist => PKG_INFO would
        #  have depth of topdir + 3 (e.g. requests-2.18.1/requests.egg-info/PKG-INFO)
        #             or topdir + 4 (e.g. pydocstyle-2.0.0/src/pydocstyle.egg-info/PKG-INFO)
        #             or topdir + 5 (dxl-cluster-0.0.2/src/python/dxl_cluster.egg-info/PKG-INFO)
        #  (and perhaps there are requires.txt or requirements.txt that we could use)
        # NOTE: for now, we always treat requirements.txt as requires_dist
        elif pkg_info and get_depth(pkg_info['path']) <= get_depth(topdir) + 5:
            self.log.info(
                'Seems like this is sdist or egg, using PKG-INFO ...')
            requires_dist = []
            # in well-made sdists, there are requires.txt next to PKG_INFO
            #  (this is something different that requirements.txt)
            #  TODO: maybe mercator could do this in future
            requires = os.path.join(os.path.dirname(pkg_info['path']),
                                    'requires.txt')
            if os.path.exists(requires):
                self.log.info(
                    'Found a "requires.txt" file next to PKG-INFO, going to use it ...'
                )
                requires_dist = self._parse_requires_txt(requires)
            elif requirements_txt:
                self.log.info(
                    'No "requires.txt" file found next to PKG-INFO, but requirements.txt'
                    ' found, going to use it')
                # if requires.txt can't be found, try requirements.txt
                requires_dist = requirements_txt['result']['dependencies']
            else:
                self.log.info(
                    'Found no usable source of requirements for PKG-INFO :(')
            pkg_info['result']['requires_dist'] = requires_dist
            ret = pkg_info
        elif requirements_txt:
            self.log.info('Only requirements.txt found, going to use it ...')
            requirements_txt['result']['requires_dist'] = \
                requirements_txt['result'].get('dependencies')
            ret = requirements_txt

        return ret

    def execute(self, arguments):
        """Execute mercator and convert it's output to JSON object."""
        self._strict_assert(arguments.get('ecosystem'))

        if 'url' in arguments:
            # run mercator on a git repo
            return self.run_mercator_on_git_repo(arguments)

        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        # TODO: make this even uglier; looks like we didn't get the abstraction quite right
        #       when we were adding support for Java/Maven.
        if self.storage.get_ecosystem(arguments['ecosystem']).is_backed_by(
                EcosystemBackend.maven):
            # cache_path now points directly to the pom
            cache_path = ObjectCache.get_from_dict(arguments).get_pom_xml()
        else:
            cache_path = ObjectCache.get_from_dict(
                arguments).get_extracted_source_tarball()
        return self.run_mercator(arguments, cache_path)

    def run_mercator_on_git_repo(self, arguments):
        """Clone specified git url and run mercator on it."""
        self._strict_assert(arguments.get('url'))

        with TemporaryDirectory() as workdir:
            repo_url = arguments.get('url')
            repo = Git.clone(repo_url, path=workdir, depth=str(1))
            metadata = self.run_mercator(arguments,
                                         workdir,
                                         keep_path=True,
                                         outermost_only=False,
                                         timeout=900)
            if metadata.get('status', None) != 'success':
                self.log.error('Mercator failed on %s', repo_url)
                return None

            # add some auxiliary information so we can later find the manifest file
            head = repo.rev_parse(['HEAD'])[0]
            for detail in metadata['details']:
                path = detail['path'][len(workdir):]
                # path should look like this:
                # <git-sha1>/path/to/manifest.file
                detail['path'] = head + path

            return metadata

    def run_mercator(self,
                     arguments,
                     cache_path,
                     keep_path=False,
                     outermost_only=True,
                     timeout=300,
                     resolve_poms=True):
        """Run mercator tool."""
        # TODO: reduce cyclomatic complexity
        result_data = {'status': 'unknown', 'summary': [], 'details': []}
        mercator_target = arguments.get('cache_sources_path', cache_path)

        tc = TimedCommand(['mercator', mercator_target])
        update_env = {
            'MERCATOR_JAVA_RESOLVE_POMS': 'true'
        } if resolve_poms else {}
        status, data, err = tc.run(timeout=timeout,
                                   is_json=True,
                                   update_env=update_env)
        if status != 0:
            self.log.error(err)
            raise FatalTaskError(err)

        ecosystem_object = self.storage.get_ecosystem(arguments['ecosystem'])
        if ecosystem_object.is_backed_by(EcosystemBackend.pypi):
            # TODO: attempt static setup.py parsing with mercator
            items = [self._merge_python_items(mercator_target, data)]
            if items == [None]:
                raise NotABugFatalTaskError(
                    'Found no usable PKG-INFO/metadata.json/requirements.txt')
        else:
            if outermost_only:
                # process only root level manifests (or the ones closest to the root level)
                items = self._data_normalizer.get_outermost_items(
                    data.get('items') or [])
            else:
                items = data.get('items') or []
            self.log.debug('mercator found %i projects, outermost %i',
                           len(data), len(items))

            if ecosystem_object.is_backed_by(EcosystemBackend.maven):
                # for maven we download both Jar and POM, we consider POM to be *the*
                #  source of information and don't want to duplicate info by including
                #  data from pom included in artifact (assuming it's included)
                items = [
                    d for d in items if d['ecosystem'].lower() == 'java-pom'
                ]
            elif ecosystem_object.is_backed_by(EcosystemBackend.npm):
                # ignore other metadata files, e.g. requirements.txt
                items = [d for d in items if d['ecosystem'].lower() == 'npm']
            elif arguments['ecosystem'] == 'go':
                items = [
                    d for d in items if d['ecosystem'].lower() == 'go-glide'
                ]
                if not items:
                    # Mercator found no Go Glide files, run gofedlib
                    items = self.run_gofedlib(topdir=mercator_target,
                                              name=arguments.get('name'),
                                              version=arguments.get('version'),
                                              timeout=timeout)

        result_data['details'] = [
            self._data_normalizer.handle_data(d, keep_path=keep_path)
            for d in items
        ]
        result_data['status'] = 'success'
        return result_data

    def run_gofedlib(self, topdir, name, version, timeout):
        """Run gofedlib-cli to extract dependencies from golang sources."""
        tc = TimedCommand([
            'gofedlib-cli', '--dependencies-main', '--dependencies-packages',
            '--dependencies-test', '--skip-errors', topdir
        ])
        status, data, err = tc.run(timeout=timeout)
        result = json.loads(data[0])
        main_deps_count = len(result.get('deps-main', []))
        packages_count = len(result.get('deps-packages', []))
        self.log.debug('gofedlib found %i dependencies',
                       main_deps_count + packages_count)

        result['code_repository'] = {
            'type': 'git',
            'url': 'https://{name}'.format(name=name)
        }
        result['name'] = name
        result['version'] = version
        return [{'ecosystem': 'gofedlib', 'result': result}]
 def setup_method(self, method):
     """Set up any state tied to the execution of the given method in a class."""
     self.data = os.path.join(
         os.path.dirname(
             os.path.abspath(__file__)), 'data', 'dataNormalizer')
     self._dataNormalizer = DataNormalizer()
class TestDataNormalizer(object):
    """Test DataNormalizer."""

    def setup_method(self, method):
        """Set up any state tied to the execution of the given method in a class."""
        self.data = os.path.join(
            os.path.dirname(
                os.path.abspath(__file__)), 'data', 'dataNormalizer')
        self._dataNormalizer = DataNormalizer()

    def _load_json(self, f):
        """Load json from f."""
        with open(os.path.join(self.data, f), encoding='utf-8') as f:
            return json.load(f)

    @pytest.mark.parametrize('args, expected', [
        ({'keywords': None},
         []),
        ({'keywords': []},
         []),
        ({'keywords': ['x', 'y']},
         ['x', 'y']),
        ({'keywords': ''},
         ['']),
        ({'keywords': 'one'},
         ['one']),
        ({'keywords': 'one, two'},
         ['one', 'two']),
        ({'keywords': 'one two'},
         ['one', 'two']),
        ({'keywords': 'one two', 'separator': ' '},
         ['one', 'two']),
        ({'keywords': 'one, two', 'separator': ','},
         ['one', 'two']),
    ])
    def test__split_keywords(self, args, expected):
        """Test DataNormalizer._split_keywords()."""
        assert self._dataNormalizer._split_keywords(**args) == expected

    @pytest.mark.parametrize('args, expected', [
        # pick one key which IS there
        ({'data': {'author': 'me', 'version': '0.1.2'}, 'keymap': (('author',),)},
         {'author': 'me'}),
        # pick one key which IS NOT there
        ({'data': {'author-name': 'me', 'version': '0.1.2'}, 'keymap': (('author',),)},
         {'author': None}),
        # pick & and rename one key which IS there
        ({'data': {'author-name': 'me'}, 'keymap': (('author-name', 'author',),)},
         {'author': 'me'}),
        # pick & and rename one key which IS NOT there
        ({'data': {'authors': 'they'}, 'keymap': (('author-name', 'author',),)},
         {'author': None}),
        # pick one of keys
        ({'data': {'license': 'MIT'}, 'keymap': ((('license', 'licenses',), ),)},
         {'license': 'MIT'}),
        # pick one of keys
        ({'data': {'licenses': ['MIT', 'BSD']}, 'keymap': ((('license', 'licenses',),),)},
         {'licenses': ['MIT', 'BSD']}),
        # pick one of keys and rename it
        ({'data': {'license': 'MIT'}, 'keymap': ((('license', 'licenses',), 'declared_licenses'),)},
         {'declared_licenses': 'MIT'}),
    ])
    def test__transform_keys(self, args, expected):
        """Test DataNormalizer.transform_keys()."""
        assert self._dataNormalizer.transform_keys(**args) == expected

    @pytest.mark.parametrize('args, expected', [
        ({'name_email_dict': {'name': 'A', 'email': '*****@*****.**'}},
         "A <*****@*****.**>"),
        ({'name_email_dict': {'name': 'A'}},
         "A"),
        ({'name_email_dict': {'email': '*****@*****.**'}},
         "<*****@*****.**>"),
        ({'name_email_dict': {'author': 'A', 'author-email': '*****@*****.**'},
          'name_key': 'author', 'email_key': 'author-email'},
         "A <*****@*****.**>"),
        ({'name_email_dict': {'url': 'https://github.com/o/p/issues', 'email': '*****@*****.**'},
          'name_key': 'url'},
         "https://github.com/o/p/issues <*****@*****.**>"),
    ])
    def test__join_name_email(self, args, expected):
        """Test DataNormalizer._join_name_email()."""
        assert self._dataNormalizer._join_name_email(**args) == expected

    @pytest.mark.parametrize('args, expected', [
        ({'data': {}},
         False),
        # package.json (nodejs), no 'scripts'
        ({'data': {"scripts": None}},
         False),
        # package.json (nodejs), missing "test"
        ({'data': {"scripts": {"docs": "jsdoc2md -t ..."}}},
         False),
        # package.json, default 'npm init' test script
        ({'data': {"scripts": {"test": "echo \"Error: no test specified\" && exit 1"}}},
         False),
        # package.json, ok
        ({'data': {"scripts": {"test": "tape test/*.js", "docs": "jsdoc2md -t"}}},
         True),
        # setup.py, ok
        ({'data': {'tests_require': ['mock']}},
         True),
        # metadata.json (Python)
        ({'data': {"test_requires": [{"requires": ["mock (==1.0.1)", "pytest (==2.9.1)"]}]}},
         True),
    ])
    def test__are_tests_implemented(self, args, expected):
        """Test DataNormalizer._are_tests_implemented()."""
        assert self._dataNormalizer._are_tests_implemented(**args) == expected

    def test_transforming_setup_py(self):
        """Test normalizing of data from setup.py."""
        data = self._load_json('setup-py-from-mercator')
        expected = self._load_json('setup-py-expected')
        assert self._dataNormalizer.handle_data(data['items'][0]) == expected

    def test_transforming_pkginfo(self):
        """Test normalizing of data from PKG-INFO."""
        data = self._load_json('PKG-INFO-from-mercator')
        expected = self._load_json('PKG-INFO-expected')
        assert self._dataNormalizer.handle_data(data['items'][0]) == expected

    def test_transforming_metadata_json(self):
        """Test normalizing of data from metadata.json."""
        data = self._load_json('metadata-json-from-mercator')
        expected = self._load_json('metadata-json-expected')
        assert self._dataNormalizer.handle_data(data['items'][0]) == expected

    def test_transforming_rubygems_metadata_yaml(self):
        """Test DataNormalizer.()."""
        data = self._load_json('rubygems-metadata-json-from-mercator')
        expected = self._load_json('rubygems-metadata-json-expected')
        assert self._dataNormalizer.handle_data(data['items'][0]) == expected

    @pytest.mark.parametrize('args, expected', [
        # correct
        ({'data': {'required_rubygem_version': {"requirements": [[">=", {"version": "1.9.2"}]]}},
          'key': 'required_rubygem_version'},
         '>=1.9.2'),
        # bad
        ({'data': {'required_ruby_version': {"requirement": [[">=", {"version": "1.9.2"}]]}},
          'key': 'required_ruby_version'},
         None),
        # bad
        ({'data': {'required_ruby_version': {"requirements": [[{"version": "1.9.2"}, ">="]]}},
          'key': 'required_ruby_version'},
         None),
    ])
    def test__extract_engine_requirements(self, args, expected):
        """Test DataNormalizer._extract_engine_requirements()."""
        assert self._dataNormalizer._extract_engine_requirements(**args) == expected

    @pytest.mark.parametrize('data, expected', [
        ({'author': {'name': 'Santa Claus', 'email': '*****@*****.**', 'url': 'north'}},
         {'author': 'Santa Claus <*****@*****.**>'}),
        ({'contributors': [{'email': '*****@*****.**', 'name': 'mscdex', 'url': 'there'},
                           {'email': '*****@*****.**', 'name': 'fishrock123'}]},
         {'contributors': ['mscdex <*****@*****.**>',
                           'fishrock123 <*****@*****.**>']}),
        ({'maintainers': [{'email': '*****@*****.**', 'name': 'mscdex', 'url': 'there'},
                          {'email': '*****@*****.**', 'name': 'fishrock123'}]},
         {'maintainers': ['mscdex <*****@*****.**>',
                          'fishrock123 <*****@*****.**>']}),
        ({'bugs': {'url': 'https://github.com/owner/project/issues', 'email': '*****@*****.**'}},
         {'bug_reporting': 'https://github.com/owner/project/issues <*****@*****.**>'}),
        ({'license': 'BSD-3-Clause'},
         {'declared_licenses': ['BSD-3-Clause']}),
        ({'license': '(ISC OR GPL-3.0)'},
         {'declared_licenses': ['ISC', 'GPL-3.0']}),
        # deprecated, but used in older packages
        ({'license': {'type': 'ISC',
                      'url': 'http://opensource.org/licenses/ISC'}},
         {'declared_licenses': ['ISC']}),
        # deprecated, but used in older packages
        ({'licenses': [{'type': 'MIT',
                        'url': 'http://www.opensource.org/licenses/mit-license.php'},
                       {'type': 'Apache-2.0',
                        'url': 'http://opensource.org/licenses/apache2.0.php'}]},
         {'declared_licenses': ['MIT', 'Apache-2.0']}),
        ({'repository': {'type': 'git', 'url': 'https://github.com/npm/npm.git'}},
         {'code_repository': {'type': 'git', 'url': 'https://github.com/npm/npm.git'}}),
        ({'repository': 'expressjs/express'},
         {'code_repository': {'type': 'git', 'url': 'https://github.com/expressjs/express.git'}}),
        ({'repository': 'bitbucket:exmpl/repo'},
         {'code_repository': {'type': 'git', 'url': 'https://[email protected]/exmpl/repo.git'}}),
        ({'repository': 'gitlab:another/repo'},
         {'code_repository': {'type': 'git', 'url': 'https://gitlab.com/another/repo.git'}}),
        ({'dependencies': {"escape-html": "1.0.1"}},
         {'dependencies': ["escape-html 1.0.1"]}),
        ({'devDependencies': {'mocha': '~2.0.0'}},
         {'devel_dependencies': ['mocha ~2.0.0']}),
    ])
    def test_transforming_javascript_data(self, data, expected):
        """Test normalizing of npm package metadata."""
        transformed_data = self._dataNormalizer._handle_javascript(data)
        for key, value in expected.items():
            assert key in transformed_data
            assert transformed_data[key] == value

    def test_transforming_npm_shrinkwrap_data(self):
        """Test normalizing of npm's shrinkwrap.json data."""
        data = self._load_json('npm-with-shrinkwrap-json-from-mercator')
        expected = self._load_json('npm-with-shrinkwrap-json-expected')
        assert compare_dictionaries(self._dataNormalizer.handle_data(data), expected)

    def test_transforming_java(self):
        """Test normalizing of pom.xml data."""
        data = self._load_json('pom-xml-from-mercator')
        expected = self._load_json('pom-xml-expected')
        assert compare_dictionaries(self._dataNormalizer.handle_data(data['items'][0]), expected)

    def test_transforming_nuspec(self):
        """Test normalizing of nuspec data."""
        data = self._load_json('nuspec-from-mercator')
        expected = self._load_json('nuspec-expected')
        assert compare_dictionaries(self._dataNormalizer.handle_data(data['items'][0]), expected)

    @pytest.mark.parametrize('transformed_data, expected', [
        ({'dependencies': ["escape-html 1.0.1"]},
         {'dependencies': ["escape-html 1.0.1"]}),
        ({'dependencies': None},
         {'dependencies': []}),
        ({'devel_dependencies': ['mocha ~2.0.0']},
         {'devel_dependencies': ['mocha ~2.0.0']}),
        ({'devel_dependencies': None},
         {'devel_dependencies': []}),
    ])
    def test_sanitizing_data(self, transformed_data, expected):
        """Test DataNormalizer._sanitize_data()."""
        sanitized_data = self._dataNormalizer._sanitize_data(transformed_data)
        for key, value in expected.items():
            assert key in sanitized_data
            assert sanitized_data[key] == value

    @staticmethod
    def sort_by_path(dict_):
        """Sort dict_ by length of 'path' of it's members."""
        return sorted(dict_, key=lambda a: len(a['path'].split(path.sep)))

    def test_get_outermost_items(self):
        """Test DataNormalizer.get_outermost_items()."""
        d = [{'path': '/a/b/c/d'}, {'path': '/a/b/c'}, {'path': '/a'}]
        assert self._dataNormalizer.get_outermost_items(d) == [{'path': '/a'}]

        d = [{'path': 'bbb'}, {'path': 'a/b/c/'}]
        assert self._dataNormalizer.get_outermost_items(d) == [{'path': 'bbb'}]

        d = [{'path': '/a/b'}, {'path': '/b/c'}, {'path': '/c/d/e'}]
        expected = self.sort_by_path([{'path': '/a/b'}, {'path': '/b/c'}])
        result = self.sort_by_path(self._dataNormalizer.get_outermost_items(d))
        assert len(result) == len(expected)
        for i in range(len(expected)):
            assert compare_dictionaries(result[i], expected[i])

    @pytest.mark.parametrize('data, expected', [
        ({'pom.xml': {'dependencies': {'compile': {'g:a::': '1.0'}}}},
         {'dependencies': ['g:a 1.0']}),
        ({'pom.xml': {'dependencies': {'runtime': {'g:a::': '1.0'}}}},
         {'dependencies': ['g:a 1.0']}),
        ({'pom.xml': {'dependencies': {'provided': {'g:a::': '1.0'}}}},
         {'dependencies': ['g:a 1.0']}),
        ({'pom.xml': {'dependencies': {'test': {'g:a::': '1.0'}}}},
         {'devel_dependencies': ['g:a 1.0']}),
        ({'pom.xml': {'dependencies': {'compile': {'g:a::': '1.0', 'g2:a2::': '1.0.3-SNAPSHOT'},
                                       'test': {'t:t::': '0'},
                                       'runtime': {'r:r::': 'version'},
                                       'provided': {'p:p::': '1000'}}}},
         {'dependencies': sorted(['g:a 1.0', 'g2:a2 1.0.3-SNAPSHOT', 'r:r version', 'p:p 1000']),
          'devel_dependencies': sorted(['t:t 0'])}),
        ({'pom.xml': {'scm_url': '[email protected]:fabric8-analytics/fabric8-analytics-worker.git'}},
         {'code_repository': {'url':
                              '[email protected]:fabric8-analytics/fabric8-analytics-worker.git',
                              'type': 'git'}}),
        ({'pom.xml': {'licenses': ['ASL 2.0', 'MIT']}},
         {'declared_licenses': ['ASL 2.0', 'MIT']}),
        ({'pom.xml': {'description': 'Ich bin ein Bayesianer'}},
         {'description': 'Ich bin ein Bayesianer'}),
        ({'pom.xml': {'url': 'https://github.com/fabric8-analytics/fabric8-analytics-worker'}},
         {'homepage': 'https://github.com/fabric8-analytics/fabric8-analytics-worker'}),
    ])
    def test_transforming_java_data(self, data, expected):
        """Test normalizing of pom.xml data."""
        transformed_data = self._dataNormalizer._handle_java(data)
        for key, value in expected.items():
            assert key in transformed_data
            transformed_value = sorted(transformed_data[key]) \
                if isinstance(transformed_data[key], list) else transformed_data[key]
            assert transformed_value == value

    @pytest.mark.parametrize('data, expected', [
        ({'ecosystem': 'gofedlib', 'result': {
            'deps-main': [],
            'deps-packages': ['https://github.com/gorilla/context']}},
         {'ecosystem': 'gofedlib', 'dependencies': ['github.com/gorilla/context']}),
        ({'ecosystem': 'gofedlib',
          'result': {'deps-main': ['https://github.com/gorilla/sessions',
                                   'https://github.com/gorilla/context'],
                     'deps-packages': ['https://github.com/gorilla/context']}},
         {'ecosystem': 'gofedlib', 'dependencies': ['github.com/gorilla/context',
                                                    'github.com/gorilla/sessions']}),
    ])
    def test_transforming_gofedlib_data(self, data, expected):
        """Test normalizing of gofedlib data."""
        transformed_data = self._dataNormalizer.handle_data(data)
        for key, value in expected.items():
            assert key in transformed_data
            actual_value = transformed_data[key]
            if isinstance(actual_value, list):
                actual_value.sort()
            assert actual_value == value
    def _normalize_javancss_output(output):
        """Parse and normalize JavaNCSS ASCII output.

        :param output: output dict to be normalized
        :return: normalized output
        """
        output = output.get('javancss', {})
        result = {'functions': {}, 'objects': {}, 'packages': {}}

        # The output of JavaNCSS is an XML, which is parsed using anymarkup.
        # This can introduce some pitfalls here if there is found exactly one
        # item of a type. E.g.:
        #
        #  <functions>
        #    <function>...<function/>
        #  <functions>
        #
        # Is parsed as object 'functions' containing *one object* 'function', whereas:
        #
        #  <functions>
        #    <function>...<function/>
        #    <function>...<function/>
        #  <functions>
        #
        # Is parsed as object 'functions' containing a *list of objects*
        # 'function'. Thus the isinstance(.., list) checks.

        # Parse functions section
        if 'functions' in output:
            functions = output['functions']

            wanted_function_keys = (('ccn', 'cyclomatic_complexity'),
                                    ('javadocs', ), ('name', ))

            result['functions']['function'] = []
            if 'function' in functions:
                if not isinstance(functions['function'], list):
                    functions['function'] = [functions['function']]

                for function in functions['function']:
                    result['functions']['function'].append(
                        DataNormalizer.transform_keys(function,
                                                      wanted_function_keys))

            function_averages = functions.get('function_averages', {})

            result['functions'][
                'average_cyclomatic_complexity'] = function_averages.get('ccn')
            result['functions']['average_javadocs'] = function_averages.get(
                'javadocs')

        # Parse objects section
        if 'objects' in output:
            objects = output['objects']

            wanted_objects_keys = (('classes', ), ('functions', ), ('name', ),
                                   ('javadocs', ))

            result['objects']['object'] = []
            if 'object' in objects:
                if not isinstance(objects['object'], list):
                    objects['object'] = [objects['object']]

                for obj in objects['object']:
                    result['objects']['object'].append(
                        DataNormalizer.transform_keys(obj,
                                                      wanted_objects_keys))

            object_averages = objects.get('averages', {})

            result['objects']['average_classes'] = object_averages.get(
                'classes')
            result['objects']['average_functions'] = object_averages.get(
                'functions')
            result['objects']['average_javadocs'] = object_averages.get(
                'javadocs')

        # Parse packages section
        if 'packages' in output:
            packages = output['packages']

            packages_total = packages.get('total', {})

            result['packages']['classes'] = packages_total.get('classes')
            result['packages']['functions'] = packages_total.get('functions')
            result['packages']['javadoc_lines'] = packages_total.get(
                'javadoc_lines')
            result['packages']['javadocs'] = packages_total.get('javadocs')
            result['packages']['multi_comment_lines'] = packages_total.get(
                'multi_comment_lines')
            result['packages']['single_comment_lines'] = packages_total.get(
                'single_comment_lines')

        return result
 def setup_method(self, method):
     """Set up any state tied to the execution of the given method in a class."""
     self.data = Path(__file__).parent / 'data/dataNormalizer'
     self._dataNormalizer = DataNormalizer()
 def setup_method(self, method):
     self.data = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              'data', 'dataNormalizer')
     self._dataNormalizer = DataNormalizer()