def _identify_gh_repo(homepage):
     """Returns code repository dict filled with homepage, if homepage is GH repo
     (None otherwise)
     """
     if parse_gh_repo(homepage):
         return {'url': homepage, 'type': 'git'}
     return None
Example #2
0
    def get_files_github_url(self, github_url):
        """Clone the repository from GitHub and retrieve manifest files from it."""
        manifest_data = []
        repo_suffix = parse_gh_repo(github_url)
        try:
            self.del_temp_files()
            repo_url = urljoin(self.PREFIX_URL, repo_suffix)
            check_valid_repo = get(repo_url)
            if check_valid_repo.status_code == 200:
                repo_clone_url = urljoin(self.PREFIX_GIT_URL, repo_suffix,
                                         '.git')
                Git.clone(repo_clone_url, self.CLONED_DIR)
                for file_obj in self.get_manifest_files():
                    file_content = None
                    filename = file_obj.get('filename')
                    filepath = file_obj.get('filepath')
                    with open(filepath, 'rb') as m_file:
                        file_content = m_file.read().decode('utf-8')
                    manifest_data.append({
                        "filename":
                        filename,
                        "content":
                        file_content,
                        "filepath":
                        filepath.replace(self.CLONED_DIR, '')
                    })
        except Exception:
            raise HTTPError(500, "Error in reading repo from github.")
        finally:
            self.del_temp_files()

        return manifest_data
Example #3
0
 def _get_repo_name(self, url):
     """Retrieve GitHub repo from a preceding Mercator scan."""
     parsed = parse_gh_repo(url)
     if not parsed:
         self.log.debug('Could not parse Github repo URL %s', url)
     else:
         self._repo_url = 'https://github.com/' + parsed
     return parsed
Example #4
0
 def _get_repo_name(self, url):
     """Get GitHub repo URL."""
     parsed = parse_gh_repo(url)
     if not parsed:
         logger.debug('Could not parse Github repo URL %s', url)
     else:
         self._repo_url = 'https://github.com/' + parsed
     return parsed
Example #5
0
def isGhRepo(node_args, key):
    """Predicate if the repository is on GitHub."""
    try:
        val = reduce(lambda m, k: m[k], key if isinstance(key, list) else [key], node_args)
        if parse_gh_repo(val):
            return True
        else:
            return False
    except Exception:
        return False
def isGhRepo(node_args, key):
    try:
        val = reduce(lambda m, k: m[k],
                     key if isinstance(key, list) else [key], node_args)
        if parse_gh_repo(val):
            return True
        else:
            return False
    except Exception:
        return False
    def _handle_java(self, data):
        """Handle data from pom.xml."""
        # we expect pom.xml to be there, since it's always downloaded to top level by InitTask
        pom = data.get('pom.xml')
        if pom is None:
            return None

        key_map = (('name', ), ('version', ), ('description', ),
                   ('url', 'homepage'), ('licenses', 'declared_licenses'))
        # handle licenses
        transformed = self.transform_keys(pom, key_map)
        if transformed['name'] is None:
            transformed['name'] = "{}:{}".format(pom.get('groupId'),
                                                 pom.get('artifactId'))
        # dependencies with scope 'compile' and 'runtime' are needed at runtime;
        # dependencies with scope 'provided' are not necessarily runtime dependencies,
        # but they are commonly used for example in web applications
        dependencies_dict = pom.get('dependencies', {}).get('compile', {})
        dependencies_dict.update(
            pom.get('dependencies', {}).get('runtime', {}))
        dependencies_dict.update(
            pom.get('dependencies', {}).get('provided', {}))
        # dependencies with scope 'test' are only needed for testing;
        dev_dependencies_dict = pom.get('dependencies', {}).get('test', {})

        transformed['dependencies'] = [
            k.rstrip(':') + ' ' + v for k, v in dependencies_dict.items()
        ]

        transformed['devel_dependencies'] = [
            k.rstrip(':') + ' ' + v for k, v in dev_dependencies_dict.items()
        ]

        # handle code_repository
        if 'scm_url' in pom:
            # TODO: there's no way we can tell 100 % what the type is, but we could
            #  try to handle at least some cases, e.g. github will always be git etc
            repo_type = 'git' if parse_gh_repo(pom['scm_url']) else 'unknown'
            transformed['code_repository'] = {
                'url': pom['scm_url'],
                'type': repo_type
            }

        return transformed
Example #8
0
    def normalize(self):
        """Normalize output from Mercator for pom.xml (Maven)."""
        if not self._raw_data:
            return {}

        if self._data['name'] is None:
            self._data['name'] = "{}:{}".format(
                self._raw_data.get('groupId'),
                self._raw_data.get('artifactId'))
        # dependencies with scope 'compile' and 'runtime' are needed at runtime;
        # dependencies with scope 'provided' are not necessarily runtime dependencies,
        # but they are commonly used for example in web applications
        dependencies_dict = self._raw_data.get('dependencies',
                                               {}).get('compile', {})
        dependencies_dict.update(
            self._raw_data.get('dependencies', {}).get('runtime', {}))
        dependencies_dict.update(
            self._raw_data.get('dependencies', {}).get('provided', {}))
        # dependencies with scope 'test' are only needed for testing;
        dev_dependencies_dict = self._raw_data.get('dependencies',
                                                   {}).get('test', {})

        self._data['dependencies'] = [
            k.rstrip(':') + ' ' + v for k, v in dependencies_dict.items()
        ]

        self._data['devel_dependencies'] = [
            k.rstrip(':') + ' ' + v for k, v in dev_dependencies_dict.items()
        ]

        # handle code_repository
        if 'scm_url' in self._raw_data:
            # TODO: there's no way we can tell 100 % what the type is, but we could
            #  try to handle at least some cases, e.g. github will always be git etc
            repo_type = 'git' if parse_gh_repo(
                self._raw_data['scm_url']) else 'unknown'
            self._data['code_repository'] = {
                'url': self._raw_data['scm_url'],
                'type': repo_type
            }

        return self._data
    def _get_github_readme(self, url):
        repo_tuple = parse_gh_repo(url)
        if repo_tuple:
            project, repo = repo_tuple.split('/')
        else:
            return None

        for readme_type, extensions in self.README_TYPES.items():
            for extension in extensions:
                if extension:
                    extension = '.' + extension
                url = self._GITHUB_README_PATH.format(project=project, repo=repo,
                                                      extension=extension)
                response = requests.get(url)
                if response.status_code != 200:
                    self.log.debug('No README%s found for type "%s" at "%s"', extension,
                                   readme_type, url)
                    continue

                self.log.debug('README%s found for type "%s" at "%s"', extension, readme_type, url)
                return {'type': readme_type, 'content': response.text}
 def _identify_gh_repo(homepage):
     """Return code repository dict filled with homepage."""
     if parse_gh_repo(homepage):
         return {'url': homepage, 'type': 'git'}
     return None
    def get_manifest_details(self, github_url):
        """Retrieve manifest files from cloned repository."""
        manifest_data = []
        supported_manifests = {
            'requirements.txt': True,
            'pom.xml': True,
            'package.json': True
        }
        repo_tuple = parse_gh_repo(github_url)
        if repo_tuple:
            project, repo = repo_tuple.split('/')
        else:
            return None

        last_commit_url = 'https://api.github.com/repos/{project}/{repo}/git/refs/heads/' \
                          'master'.format(project=project, repo=repo)
        trees_url = 'https://api.github.com/repos/{project}/{repo}/git/trees/{sha}?recursive=1'
        raw_content_path = 'https://raw.githubusercontent.com/{project}/{repo}/master/{filename}'

        # Fetch the latest commit of the repo
        try:
            resp = requests.get(last_commit_url)
        except exceptions.RequestException as e:
            print(e)
            return None

        last_commit = ''
        if resp.status_code == 200:
            try:
                last_commit = resp.json()['object']['sha']
            except KeyError as e:
                print(e)
                return None

        # Fetch the contents tree using the last commit sha
        try:
            resp = requests.get(trees_url.format(project=project, repo=repo, sha=last_commit))
        except exceptions.RequestException as e:
            print(e)
            return None

        if resp.status_code == 200:
            try:
                tree = resp.json()['tree']
            except KeyError as e:
                print(e)
                return None

        for t in tree:
            try:
                if supported_manifests[os.path.basename(t['path'])]:
                    manifest_data.append({
                        'filename': os.path.basename(t['path']),
                        'download_url': raw_content_path.format(
                            project=project, repo=repo, filename=t['path']),
                        'filepath': os.path.dirname(t['path'])
                    })
            except KeyError as e:
                print(e)
                continue

        print(manifest_data)
        return manifest_data
Example #12
0
 def test_parse_gh_repo_nok(self, url):
     """Test parse_gh_repo()."""
     assert parse_gh_repo(url) is None
Example #13
0
 def test_parse_gh_repo_ok(self, url):
     """Test parse_gh_repo()."""
     assert parse_gh_repo(url) == 'foo/bar'
 def test_parse_gh_repo_nok(self, url):
     assert parse_gh_repo(url) is None
 def test_parse_gh_repo_ok(self, url):
     assert parse_gh_repo(url) == 'foo/bar'