def _normalize_complexity_report_output(output, source_path):
        """ Normalize complexity_report output
        See https://github.com/escomplex/escomplex/blob/master/README.md#metrics

        :param output: output dict to be normalized
        :param source_path: path to sources that was used
        :return: normalized output
        """
        # For metrics meaning see:
        wanted_keys = (('maintainability', 'project_maintainability'),
                       ('changeCost', 'cost_change'),
                       ('cyclomatic', 'average_cyclomatic_complexity'),
                       ('effort',
                        'average_halstead_effort'), ('firstOrderDensity',
                                                     'first_order_density'),
                       ('loc', 'average_function_lines_of_code'),
                       ('params',
                        'average_function_parameters_count'), ('reports',
                                                               'modules'))
        output = DataNormalizer.transform_keys(output, wanted_keys)

        wanted_module_keys = (('maintainability',
                               'module_maintainability'), ('dependencies', ),
                              ('loc',
                               'average_function_lines_of_code'), ('path', ),
                              ('params', 'average_function_parameters_count'),
                              ('functions', ))

        for idx, module in enumerate(output.get('modules', [])):
            output['modules'][idx] = DataNormalizer.transform_keys(
                module, wanted_module_keys)

            source_path_len = len(source_path) + 1
            if 'path' in module:
                output['modules'][idx]['path'] = module['path'][
                    source_path_len:]

            for fun_idx, function in enumerate(module.get('functions')):
                if 'cyclomaticDensity' in function:
                    function['cyclomatic_density'] = function.pop(
                        'cyclomaticDensity')

        return output
    def _get_generic_result(self, source_path):
        """Get core result of CodeMetricsTask task that is based on cloc tool, this output is later enriched with
        output of tools based on languages that were found by cloc

        :param source_path: path to sources where analyzed artefact resists
        :return: tuple where generic information with ecosystem specific dict
        """
        command = ['cloc', '--json', source_path]
        status, output, error = self._run_analyzer(command)

        if status != 0:
            # Let the whole task fail
            raise RuntimeError("Running cloc command failed: '%s'" % error)

        # cloc places generic summary here, we will maintain it in top level so remove misleading key
        header = {
            'total_files': output['header'].pop('n_files'),
            'total_lines': output['header'].pop('n_lines')
        }
        output.pop('header')

        if 'SUM' in output:
            header['blank_lines'] = output['SUM']['blank']
            header['comment_lines'] = output['SUM']['comment']
            header['code_lines'] = output['SUM']['code']
            output.pop('SUM', None)

        # rename to be more precise with naming
        wanted_keys = (('blank', 'blank_lines'), ('code', 'code_lines'),
                       ('comment', 'comment_lines'), ('nFiles', 'files_count'))
        for key in output.keys():
            # filter only language-specific results, leave statistics untouched
            if isinstance(output[key], dict):
                output[key] = DataNormalizer.transform_keys(
                    output[key], wanted_keys)

        return header, output
    def _normalize_javancss_output(output):
        """Parse and normalize JavaNCSS ASCII output

        :param output: output dict to be normalized
        :return: normalized output
        """
        output = output.get('javancss', {})
        result = {'functions': {}, 'objects': {}, 'packages': {}}

        # The output of JavaNCSS is an XML, which is parsed using anymarkup. This can introduce some pitfalls here
        # if there is found exactly one item of a type. E.g.:
        #
        #  <functions>
        #    <function>...<function/>
        #  <functions>
        #
        # Is parsed as object 'functions' containing *one object* 'function', whereas:
        #
        #  <functions>
        #    <function>...<function/>
        #    <function>...<function/>
        #  <functions>
        #
        # Is parsed as object 'functions' containing a *list of objects* 'function'. Thus the isinstance(.., list)
        # checks.

        # Parse functions section
        if 'functions' in output:
            functions = output['functions']

            wanted_function_keys = (('ccn', 'cyclomatic_complexity'),
                                    ('javadocs', ), ('name', ))

            result['functions']['function'] = []
            if 'function' in functions:
                if not isinstance(functions['function'], list):
                    functions['function'] = [functions['function']]

                for function in functions['function']:
                    result['functions']['function'].append(
                        DataNormalizer.transform_keys(function,
                                                      wanted_function_keys))

            function_averages = functions.get('function_averages', {})

            result['functions'][
                'average_cyclomatic_complexity'] = function_averages.get('ccn')
            result['functions']['average_javadocs'] = function_averages.get(
                'javadocs')

        # Parse objects section
        if 'objects' in output:
            objects = output['objects']

            wanted_objects_keys = (('classes', ), ('functions', ), ('name', ),
                                   ('javadocs', ))

            result['objects']['object'] = []
            if 'object' in objects:
                if not isinstance(objects['object'], list):
                    objects['object'] = [objects['object']]

                for obj in objects['object']:
                    result['objects']['object'].append(
                        DataNormalizer.transform_keys(obj,
                                                      wanted_objects_keys))

            object_averages = objects.get('averages', {})

            result['objects']['average_classes'] = object_averages.get(
                'classes')
            result['objects']['average_functions'] = object_averages.get(
                'functions')
            result['objects']['average_javadocs'] = object_averages.get(
                'javadocs')

        # Parse packages section
        if 'packages' in output:
            packages = output['packages']

            packages_total = packages.get('total', {})

            result['packages']['classes'] = packages_total.get('classes')
            result['packages']['functions'] = packages_total.get('functions')
            result['packages']['javadoc_lines'] = packages_total.get(
                'javadoc_lines')
            result['packages']['javadocs'] = packages_total.get('javadocs')
            result['packages']['multi_comment_lines'] = packages_total.get(
                'multi_comment_lines')
            result['packages']['single_comment_lines'] = packages_total.get(
                'single_comment_lines')

        return result
Beispiel #4
0
 def setup_method(self, method):
     self.data = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              'data', 'dataNormalizer')
     self._dataNormalizer = DataNormalizer()
Beispiel #5
0
class TestDataNormalizer(object):
    def setup_method(self, method):
        self.data = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                 'data', 'dataNormalizer')
        self._dataNormalizer = DataNormalizer()

    def _load_json(self, f):
        with open(os.path.join(self.data, f), encoding='utf-8') as f:
            return json.load(f)

    @pytest.mark.parametrize(
        'args, expected',
        [
            # pick one key which IS there
            ({
                'data': {
                    'author': 'me',
                    'version': '0.1.2'
                },
                'keymap': (('author', ), )
            }, {
                'author': 'me'
            }),
            # pick one key which IS NOT there
            ({
                'data': {
                    'author-name': 'me',
                    'version': '0.1.2'
                },
                'keymap': (('author', ), )
            }, {
                'author': None
            }),
            # pick & and rename one key which IS there
            ({
                'data': {
                    'author-name': 'me'
                },
                'keymap': ((
                    'author-name',
                    'author',
                ), )
            }, {
                'author': 'me'
            }),
            # pick & and rename one key which IS NOT there
            ({
                'data': {
                    'authors': 'they'
                },
                'keymap': ((
                    'author-name',
                    'author',
                ), )
            }, {
                'author': None
            }),
            # pick one of keys
            ({
                'data': {
                    'license': 'MIT'
                },
                'keymap': (((
                    'license',
                    'licenses',
                ), ), )
            }, {
                'license': 'MIT'
            }),
            # pick one of keys
            ({
                'data': {
                    'licenses': ['MIT', 'BSD']
                },
                'keymap': (((
                    'license',
                    'licenses',
                ), ), )
            }, {
                'licenses': ['MIT', 'BSD']
            }),
            # pick one of keys and rename it
            ({
                'data': {
                    'license': 'MIT'
                },
                'keymap': (((
                    'license',
                    'licenses',
                ), 'declared_license'), )
            }, {
                'declared_license': 'MIT'
            }),
        ])
    def test__transform_keys(self, args, expected):
        assert self._dataNormalizer.transform_keys(**args) == expected

    @pytest.mark.parametrize('args, expected', [
        ({
            'name_email_dict': {
                'name': 'A',
                'email': '*****@*****.**'
            }
        }, "A <*****@*****.**>"),
        ({
            'name_email_dict': {
                'name': 'A'
            }
        }, "A"),
        ({
            'name_email_dict': {
                'email': '*****@*****.**'
            }
        }, "<*****@*****.**>"),
        ({
            'name_email_dict': {
                'author': 'A',
                'author-email': '*****@*****.**'
            },
            'name_key': 'author',
            'email_key': 'author-email'
        }, "A <*****@*****.**>"),
        ({
            'name_email_dict': {
                'url': 'https://github.com/o/p/issues',
                'email': '*****@*****.**'
            },
            'name_key': 'url'
        }, "https://github.com/o/p/issues <*****@*****.**>"),
    ])
    def test__join_name_email(self, args, expected):
        assert self._dataNormalizer._join_name_email(**args) == expected

    @pytest.mark.parametrize(
        'args, expected',
        [
            ({
                'data': {}
            }, False),
            # package.json (nodejs), no 'scripts'
            ({
                'data': {
                    "scripts": None
                }
            }, False),
            # package.json (nodejs), missing "test"
            ({
                'data': {
                    "scripts": {
                        "docs": "jsdoc2md -t ..."
                    }
                }
            }, False),
            # package.json, default 'npm init' test script
            ({
                'data': {
                    "scripts": {
                        "test": "echo \"Error: no test specified\" && exit 1"
                    }
                }
            }, False),
            # package.json, ok
            ({
                'data': {
                    "scripts": {
                        "test": "tape test/*.js",
                        "docs": "jsdoc2md -t"
                    }
                }
            }, True),
            # setup.py, ok
            ({
                'data': {
                    'tests_require': ['mock']
                }
            }, True),
            # metadata.json (Python)
            ({
                'data': {
                    "test_requires": [{
                        "requires": ["mock (==1.0.1)", "pytest (==2.9.1)"]
                    }]
                }
            }, True),
        ])
    def test__are_tests_implemented(self, args, expected):
        assert self._dataNormalizer._are_tests_implemented(**args) == expected

    def test_transforming_setup_py(self):
        data = self._load_json('setup-py-from-mercator')
        expected = self._load_json('setup-py-expected')
        assert self._dataNormalizer.handle_data(data['items'][0]) == expected

    def test_transforming_pkginfo(self):
        data = self._load_json('PKG-INFO-from-mercator')
        expected = self._load_json('PKG-INFO-expected')
        assert self._dataNormalizer.handle_data(data['items'][0]) == expected

    def test_transforming_metadata_json(self):
        data = self._load_json('metadata-json-from-mercator')
        expected = self._load_json('metadata-json-expected')
        assert self._dataNormalizer.handle_data(data['items'][0]) == expected

    def test_transforming_rubygems_metadata_yaml(self):
        data = self._load_json('rubygems-metadata-json-from-mercator')
        expected = self._load_json('rubygems-metadata-json-expected')
        assert self._dataNormalizer.handle_data(data['items'][0]) == expected

    @pytest.mark.parametrize(
        'args, expected',
        [
            # correct
            ({
                'data': {
                    'required_rubygem_version': {
                        "requirements": [[">=", {
                            "version": "1.9.2"
                        }]]
                    }
                },
                'key': 'required_rubygem_version'
            }, '>=1.9.2'),
            # bad
            ({
                'data': {
                    'required_ruby_version': {
                        "requirement": [[">=", {
                            "version": "1.9.2"
                        }]]
                    }
                },
                'key': 'required_ruby_version'
            }, None),
            # bad
            ({
                'data': {
                    'required_ruby_version': {
                        "requirements": [[{
                            "version": "1.9.2"
                        }, ">="]]
                    }
                },
                'key': 'required_ruby_version'
            }, None),
        ])
    def test__extract_engine_requirements(self, args, expected):
        assert self._dataNormalizer._extract_engine_requirements(
            **args) == expected

    @pytest.mark.parametrize(
        'data, expected',
        [
            ({
                'author': {
                    'name': 'Santa Claus',
                    'email': '*****@*****.**',
                    'url': 'north'
                }
            }, {
                'author': 'Santa Claus <*****@*****.**>'
            }),
            ({
                'contributors': [{
                    'email': '*****@*****.**',
                    'name': 'mscdex',
                    'url': 'there'
                }, {
                    'email': '*****@*****.**',
                    'name': 'fishrock123'
                }]
            }, {
                'contributors': [
                    'mscdex <*****@*****.**>',
                    'fishrock123 <*****@*****.**>'
                ]
            }),
            ({
                'maintainers': [{
                    'email': '*****@*****.**',
                    'name': 'mscdex',
                    'url': 'there'
                }, {
                    'email': '*****@*****.**',
                    'name': 'fishrock123'
                }]
            }, {
                'maintainers': [
                    'mscdex <*****@*****.**>',
                    'fishrock123 <*****@*****.**>'
                ]
            }),
            ({
                'bugs': {
                    'url': 'https://github.com/owner/project/issues',
                    'email': '*****@*****.**'
                }
            }, {
                'bug_reporting':
                'https://github.com/owner/project/issues <*****@*****.**>'
            }),
            ({
                'license': 'BSD-3-Clause'
            }, {
                'declared_license': 'BSD-3-Clause'
            }),
            ({
                'license': '(ISC OR GPL-3.0)'
            }, {
                'declared_license': '(ISC OR GPL-3.0)'
            }),
            # deprecated, but used in older packages
            ({
                'license': {
                    'type': 'ISC',
                    'url': 'http://opensource.org/licenses/ISC'
                }
            }, {
                'declared_license': 'ISC'
            }),
            # deprecated, but used in older packages
            ({
                'licenses':
                [{
                    'type': 'MIT',
                    'url': 'http://www.opensource.org/licenses/mit-license.php'
                }, {
                    'type': 'Apache-2.0',
                    'url': 'http://opensource.org/licenses/apache2.0.php'
                }]
            }, {
                'declared_license': 'MIT, Apache-2.0'
            }),
            ({
                'repository': {
                    'type': 'git',
                    'url': 'https://github.com/npm/npm.git'
                }
            }, {
                'code_repository': {
                    'type': 'git',
                    'url': 'https://github.com/npm/npm.git'
                }
            }),
            ({
                'repository': 'expressjs/express'
            }, {
                'code_repository': {
                    'type': 'git',
                    'url': 'https://github.com/expressjs/express.git'
                }
            }),
            ({
                'repository': 'bitbucket:exmpl/repo'
            }, {
                'code_repository': {
                    'type': 'git',
                    'url': 'https://[email protected]/exmpl/repo.git'
                }
            }),
            ({
                'repository': 'gitlab:another/repo'
            }, {
                'code_repository': {
                    'type': 'git',
                    'url': 'https://gitlab.com/another/repo.git'
                }
            }),
            ({
                'dependencies': {
                    "escape-html": "1.0.1"
                }
            }, {
                'dependencies': ["escape-html 1.0.1"]
            }),
            ({
                'devDependencies': {
                    'mocha': '~2.0.0'
                }
            }, {
                'devel_dependencies': ['mocha ~2.0.0']
            }),
        ])
    def test_transforming_javascript_data(self, data, expected):
        transformed_data = self._dataNormalizer._handle_javascript(data)
        for key, value in expected.items():
            assert key in transformed_data
            assert transformed_data[key] == value

    def test_transforming_npm_shrinkwrap_data(self):
        data = self._load_json('npm-with-shrinkwrap-json-from-mercator')
        expected = self._load_json('npm-with-shrinkwrap-json-expected')
        assert compare_dictionaries(self._dataNormalizer.handle_data(data),
                                    expected)

    @pytest.mark.parametrize('transformed_data, expected', [
        ({
            'dependencies': ["escape-html 1.0.1"]
        }, {
            'dependencies': ["escape-html 1.0.1"]
        }),
        ({
            'dependencies': None
        }, {
            'dependencies': []
        }),
        ({
            'devel_dependencies': ['mocha ~2.0.0']
        }, {
            'devel_dependencies': ['mocha ~2.0.0']
        }),
        ({
            'devel_dependencies': None
        }, {
            'devel_dependencies': []
        }),
    ])
    def test_sanitizing_data(self, transformed_data, expected):
        sanitized_data = self._dataNormalizer._sanitize_data(transformed_data)
        for key, value in expected.items():
            assert key in sanitized_data
            assert sanitized_data[key] == value

    def sort_by_path(self, dict_):
        return sorted(dict_, key=lambda a: len(a['path'].split(path.sep)))

    def test_get_outermost_items(self):
        d = [{'path': '/a/b/c/d'}, {'path': '/a/b/c'}, {'path': '/a'}]
        assert self._dataNormalizer.get_outermost_items(d) == [{'path': '/a'}]

        d = [{'path': 'bbb'}, {'path': 'a/b/c/'}]
        assert self._dataNormalizer.get_outermost_items(d) == [{'path': 'bbb'}]

        d = [{'path': '/a/b'}, {'path': '/b/c'}, {'path': '/c/d/e'}]
        expected = self.sort_by_path([{'path': '/a/b'}, {'path': '/b/c'}])
        result = self.sort_by_path(self._dataNormalizer.get_outermost_items(d))
        assert len(result) == len(expected)
        for i in range(len(expected)):
            assert compare_dictionaries(result[i], expected[i]) == True

    @pytest.mark.parametrize('data, expected', [
        ({
            'pom.xml': {
                'dependencies': {
                    'compile': {
                        'g:a::': '1.0'
                    }
                }
            }
        }, {
            'dependencies': ['g:a 1.0']
        }),
        ({
            'pom.xml': {
                'dependencies': {
                    'runtime': {
                        'g:a::': '1.0'
                    }
                }
            }
        }, {
            'dependencies': ['g:a 1.0']
        }),
        ({
            'pom.xml': {
                'dependencies': {
                    'provided': {
                        'g:a::': '1.0'
                    }
                }
            }
        }, {
            'dependencies': ['g:a 1.0']
        }),
        ({
            'pom.xml': {
                'dependencies': {
                    'test': {
                        'g:a::': '1.0'
                    }
                }
            }
        }, {
            'devel_dependencies': ['g:a 1.0']
        }),
        ({
            'pom.xml': {
                'dependencies': {
                    'compile': {
                        'g:a::': '1.0',
                        'g2:a2::': '1.0.3-SNAPSHOT'
                    },
                    'test': {
                        't:t::': '0'
                    },
                    'runtime': {
                        'r:r::': 'version'
                    },
                    'provided': {
                        'p:p::': '1000'
                    }
                }
            }
        }, {
            'dependencies':
            sorted([
                'g:a 1.0', 'g2:a2 1.0.3-SNAPSHOT', 'r:r version', 'p:p 1000'
            ]),
            'devel_dependencies':
            sorted(['t:t 0'])
        }),
        ({
            'pom.xml': {
                'scm_url': '[email protected]:redhat-developer/Bayesian.git'
            }
        }, {
            'code_repository': {
                'url': '[email protected]:redhat-developer/Bayesian.git',
                'type': 'unknown'
            }
        }),
        ({
            'pom.xml': {
                'licenses': ['ASL 2.0', 'MIT']
            }
        }, {
            'declared_license': 'ASL 2.0, MIT'
        }),
        ({
            'pom.xml': {
                'description': 'Ich bin ein Bayesianer'
            }
        }, {
            'description': 'Ich bin ein Bayesianer'
        }),
        ({
            'pom.xml': {
                'url': 'https://github.com/redhat-developer/Bayesian'
            }
        }, {
            'homepage': 'https://github.com/redhat-developer/Bayesian'
        }),
    ])
    def test_transforming_java_data(self, data, expected):
        transformed_data = self._dataNormalizer._handle_java(data)
        for key, value in expected.items():
            assert key in transformed_data
            transformed_value = sorted(transformed_data[key]) if isinstance(
                transformed_data[key], list) else transformed_data[key]
            assert transformed_value == value
Beispiel #6
0
class MercatorTask(BaseTask):
    _analysis_name = 'metadata'
    _dependency_tree_lock = '_dependency_tree_lock'
    description = 'Collects `Release` specific information from Mercator'
    schema_ref = SchemaRef(_analysis_name, '3-1-1')
    _data_normalizer = DataNormalizer()

    def _parse_requires_txt(self, path):
        requires = []
        try:
            with open(path, 'r') as f:
                for l in f.readlines():
                    l = l.strip()
                    if l.startswith('['):
                        # the first named ini-like [section] ends the runtime requirements
                        break
                    elif l:
                        requires.append(l)
        except Exception as e:
            self.log.warning('Failed to process "{p}": {e}'.format(p=path,
                                                                   e=str(e)))

        return requires

    def _merge_python_items(self, topdir, data):
        metadata_json = None
        pkg_info = None
        requirements_txt = None

        def get_depth(path):
            return path.rstrip('/').count('/')

        def is_deeper(item1, item2):
            """ Returns True if item1 is deeper in directory hierarchy than item2 """
            if item1 is None:
                return True
            return get_depth(item1['path']) > get_depth(item2['path'])

        # find outermost PKG_INFO/metadata.json/requirements.txt - there can be
        #  testing ones etc.
        for item in data['items']:
            if item['ecosystem'] == 'Python-Dist' and item['path'].endswith(
                    '.json'):
                if is_deeper(metadata_json, item):
                    metadata_json = item
            elif item['ecosystem'] == 'Python-Dist':  # PKG-INFO
                # we prefer PKG_INFO files from .egg-info directories,
                #  since these have the very useful `requires.txt` next to them
                if pkg_info is None:
                    pkg_info = item
                else:
                    pkg_info_in_egg = pkg_info['path'].endswith(
                        '.egg-info/PKG-INFO')
                    item_in_egg = item['path'].endswith('.egg-info/PKG-INFO')
                    # rather than one insane condition, we use several less complex ones
                    if pkg_info_in_egg and item_in_egg and is_deeper(
                            pkg_info, item):
                        # if both are in .egg-info, but current pkg_info is deeper
                        pkg_info = item
                    elif item_in_egg and not pkg_info_in_egg:
                        # if item is in .egg-info and current pkg_info is not
                        pkg_info = item
                    elif not (item_in_egg or pkg_info_in_egg) and is_deeper(
                            pkg_info, item):
                        # if none of them are in .egg-info, but current pkg_info is deeer
                        pkg_info = item
            elif item['ecosystem'] == 'Python-RequirementsTXT' and is_deeper(
                    pkg_info, item):
                requirements_txt = item

        if pkg_info:
            self.log.info('Found PKG-INFO at {p}'.format(p=pkg_info['path']))
        if metadata_json:
            self.log.info(
                'Found metadata.json at {p}'.format(p=metadata_json['path']))
        if requirements_txt:
            self.log.info('Found requirements.txt at {p}'.format(
                p=requirements_txt['path']))

        ret = None
        # figure out if this was packaged as wheel => metadata.json would
        #  have depth of topdir + 2
        if metadata_json and get_depth(
                metadata_json['path']) == get_depth(topdir) + 2:
            self.log.info('Seems like this is wheel, using metadata.json ...')
            ret = metadata_json
        # figure out if this was packaged as sdist => PKG_INFO would
        #  have depth of topdir + 2 or topdir + 3
        #  (and perhaps there are requires.txt or requirements.txt that we could use)
        # NOTE: for now, we always treat requirements.txt as requires_dist
        elif pkg_info and get_depth(pkg_info['path']) <= get_depth(topdir) + 3:
            self.log.info(
                'Seems like this is sdist or egg, using PKG-INFO ...')
            requires_dist = []
            # in well-made sdists, there are requires.txt next to PKG_INFO
            #  (this is something different that requirements.txt)
            #  TODO: maybe mercator could do this in future
            requires = os.path.join(os.path.dirname(pkg_info['path']),
                                    'requires.txt')
            if os.path.exists(requires):
                self.log.info(
                    'Found a "requires.txt" file next to PKG-INFO, going to use it ...'
                )
                requires_dist = self._parse_requires_txt(requires)
            elif requirements_txt:
                self.log.info(
                    'No "requires.txt" file found next to PKG-INFO, but requirements.txt'
                    ' found, going to use it')
                # if requires.txt can't be found, try requirements.txt
                requires_dist = requirements_txt['result']['dependencies']
            else:
                self.log.info(
                    'Found no usable source of requirements for PKG-INFO :(')
            pkg_info['result']['requires_dist'] = requires_dist
            ret = pkg_info
        elif requirements_txt:
            self.log.info('Only requirements.txt found, going to use it ...')
            requirements_txt['result']['requires_dist'] = \
                requirements_txt['result'].pop('dependencies')
            ret = requirements_txt

        return ret

    def execute(self, arguments):
        "Execute mercator and convert it's output to JSON object"
        self._strict_assert(arguments.get('ecosystem'))

        if 'url' in arguments:
            # run mercator on a git repo
            return self.run_mercator_on_git_repo(arguments)

        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        # TODO: make this even uglier; looks like we didn't get the abstraction quite right
        #       when we were adding support for Java/Maven.
        if self.storage.get_ecosystem(arguments['ecosystem']).is_backed_by(
                EcosystemBackend.maven):
            # cache_path now points directly to the pom
            cache_path = ObjectCache.get_from_dict(arguments).get_pom_xml()
        else:
            cache_path = ObjectCache.get_from_dict(
                arguments).get_extracted_source_tarball()
        return self.run_mercator(arguments, cache_path)

    def run_mercator_on_git_repo(self, arguments):
        self._strict_assert(arguments.get('url'))

        workdir = None
        try:
            workdir = tempfile.mkdtemp()
            repo_url = arguments.get('url')
            repo = Git.clone(repo_url, path=workdir, depth=str(1))
            metadata = self.run_mercator(arguments,
                                         workdir,
                                         keep_path=True,
                                         outermost_only=False,
                                         timeout=900)
            if metadata.get('status', None) != 'success':
                self.log.error('Mercator failed on %s', repo_url)
                return None

            # add some auxiliary information so we can later find the manifest file
            head = repo.rev_parse(['HEAD'])[0]
            for detail in metadata['details']:
                path = detail['path'][len(workdir):]
                # path should look like this:
                # <git-sha1>/path/to/manifest.file
                detail['path'] = head + path

            return metadata
        finally:
            if workdir:
                shutil.rmtree(workdir)

    def run_mercator(self,
                     arguments,
                     cache_path,
                     keep_path=False,
                     outermost_only=True,
                     timeout=300):
        result_data = {'status': 'unknown', 'summary': [], 'details': []}

        mercator_target = arguments.get('cache_sources_path', cache_path)
        tc = TimedCommand(['mercator', mercator_target])
        status, data, err = tc.run(
            timeout=timeout,
            is_json=True,
            update_env={'MERCATOR_JAVA_RESOLVE_POMS': 'true'})
        if status != 0:
            self.log.error(err)
            result_data['status'] = 'error'
            return result_data
        ecosystem_object = self.storage.get_ecosystem(arguments['ecosystem'])
        if ecosystem_object.is_backed_by(EcosystemBackend.pypi):
            # TODO: attempt static setup.py parsing with mercator
            items = [self._merge_python_items(mercator_target, data)]
        else:
            if outermost_only:
                # process only root level manifests (or the ones closest to the root level)
                items = self._data_normalizer.get_outermost_items(
                    data.get('items') or [])
            else:
                items = data.get('items') or []
            self.log.debug('mercator found %i projects, outermost %i',
                           len(data), len(items))

            if ecosystem_object.is_backed_by(EcosystemBackend.maven):
                # for maven we download both Jar and POM, we consider POM to be *the*
                #  source of information and don't want to duplicate info by including
                #  data from pom included in artifact (assuming it's included)
                items = [
                    data for data in items
                    if data['ecosystem'].lower() == 'java-pom'
                ]
        result_data['details'] = [
            self._data_normalizer.handle_data(data, keep_path=keep_path)
            for data in items
        ]

        result_data['status'] = 'success'
        return result_data