Exemple #1
0
def api_zip_url_from_github_url(url: str,
                                branch: Optional[str] = None,
                                token: Optional[str] = None) -> str:
    """
    Get an API URL to download the repository as a zip archive
    https://docs.github.com/en/rest/reference/repos#download-a-repository-archive-zip
    @param url: Repository URL, optionally containing a branch spec
    @param branch: Optional branch spec, otherwise branch from `url` will be used
    @param token: Optional GitHub token to include with request
    @return: GitHub API URL to query for a zip archive
    """
    # TODO: `token` is not used?
    # specific file
    try:
        url = blob2raw(url)
        if requests.get(url).ok:
            return url
    except GithubInvalidUrl:
        pass

    # full git repo
    branch = branch or get_branch_from_github_url(url)
    owner, repo = author_repo_from_github_url(url)
    url = GithubAPI.REPO_ZIP.format(owner=owner, branch=branch, repo=repo)
    if requests.get(url).status_code == 200:
        return url

    raise GithubInvalidUrl
def match_url_template(url:str, template:str, branch:str=None):
    branch = branch or get_branch_from_github_url(url)
    author, repo = author_repo_from_github_url(url)
    url = template.format(author=author, branch=branch, repo=repo)
    if requests.get(url).status_code == 200:
        if "<title>Rate limit &middot; GitHub</title>" in requests.get(url).text:
            raise GithubHTTPRateLimited
        return blob2raw(url)
    raise GithubInvalidUrl(url)
Exemple #3
0
def _parse_pling(skill):
    if isinstance(skill, str):
        json_data = json.loads(skill)
    else:
        json_data = skill

    # TODO is it a safe assumption downloadlink1 is always the skill.json ?
    # this can be made smarter
    url = json_data["downloadlink1"]
    try:
        skill_json = requests.get(url).json()
    except JSONDecodeError:
        return {}

    # rename
    skill_json["skillname"] = skill_json.pop("name")

    # save useful data to skill.meta_info
    skill_json["logo"] = json_data["previewpic1"]
    skill_json["category"] = json_data['typename']
    skill_json["created"] = json_data['created']
    skill_json["modified"] = json_data['changed']
    skill_json["description"] = json_data["description"]
    skill_json["tags"] = json_data['tags'].split(",")
    skill_json["authorname"] = json_data['personid']
    skill_json["version"] = json_data["version"]

    # appstore data
    # TODO also provide this from mycroft appstore
    skill_json["appstore"] = "pling.opendesktop"
    skill_json["appstore_url"] = json_data["detailpage"]

    return skill_json
Exemple #4
0
def match_url_template(url, template, branch=None):
    branch = branch or get_branch_from_github_url(url)
    author, repo = author_repo_from_github_url(url)
    url = template.format(author=author, branch=branch, repo=repo)
    if requests.get(url).status_code == 200:
        return blob2raw(url)
    raise GithubInvalidUrl
Exemple #5
0
def get_readme_from_github_api(url, branch=None):
    author, repo = author_repo_from_github_url(url)
    default_url = GithubAPI.REPO_README.format(owner=author, repo=repo)
    try:
        data = requests.get(default_url, params={"ref": branch}).json()
        if "API rate limit exceeded" in data.get("message", ""):
            raise GithubAPIRateLimited
        readme = data["content"]
        if data["encoding"] == "base64":
            return base64.b64decode(readme).decode("utf-8")
        # TODO Raise UnknownEncoding?
        return readme
    except GithubAPIRateLimited:
        raise
    except Exception as e:
        pass

    # check files individually
    for dst in GITHUB_README_FILES:
        try:
            data = get_file_from_github_api(url, dst, branch)
        except GithubAPIFileNotFound:
            continue
        if "API rate limit exceeded" in data.get("message", ""):
            raise GithubAPIRateLimited
        readme = data.get("content")
        if readme:
            if data["encoding"] == "base64":
                return base64.b64decode(readme).decode("utf-8")
            # TODO Raise UnknownEncoding?
            return readme
    raise GithubAPIReadmeNotFound
Exemple #6
0
def get_marketplace_json(branch=None):
    branch = branch or get_current_marketplace_branch()
    url = "https://raw.githubusercontent.com/MycroftAI/mycroft-skills-data/{branch}/skill-metadata.json".format(branch=branch)
    r = requests.get(url)
    if not r.status_code == 200:
        raise GithubInvalidBranch
    return r.json()
Exemple #7
0
def get_readme_url_from_github_api(url: str,
                                   branch: Optional[str] = None) -> str:
    """
    Get the readme file url for the specified repository
    https://docs.github.com/en/rest/reference/repos#get-a-repository-readme
    @param url: Repository URL
    @param branch: Optional branch to query, otherwise default branch will be used
    @return: url of repository README file
    """
    author, repo = author_repo_from_github_url(url)
    default_url = GithubAPI.REPO_README.format(owner=author, repo=repo)
    try:
        data = requests.get(default_url, params={"ref": branch}).json()
        if "API rate limit exceeded" in data.get("message", ""):
            raise GithubAPIRateLimited
        return data["html_url"]
    except Exception as e:
        pass  # check files individually

    for dst in GITHUB_README_FILES:
        try:
            data = get_file_from_github_api(url, dst, branch)
        except GithubAPIFileNotFound:
            continue
        if "API rate limit exceeded" in data.get("message", ""):
            raise GithubAPIRateLimited
        if data.get("html_url"):
            return data["html_url"]
    raise GithubAPIReadmeNotFound
Exemple #8
0
def get_repo_releases_from_github_api(url: str,
                                      branch: Optional[str] = None) -> list:
    """
    Get releases data for the repository at the specified URL and branch
    https://docs.github.com/en/rest/reference/repos#list-repository-tags
    @param url: Repository URL
    @param branch: Optional branch spec, otherwise default branch will be used
    @return: repo tag data
    """
    # TODO: There is a releases API, but this uses the tags API
    try:
        author, repo = author_repo_from_github_url(url)
        url = GithubAPI.REPO_RELEASES.format(owner=author, repo=repo)
        data = requests.get(url, params={"ref": branch}).json()
    except Exception as e:
        raise GithubAPIReleasesNotFound(str(e))
    if isinstance(data, dict):
        # result is usually a list, unless api call fails
        if "API rate limit exceeded" in data.get("message", ""):
            raise GithubAPIRateLimited
    # let's fix api urls
    for idx, r in enumerate(data):
        data[idx]["tarball_url"] = GithubUrls.DOWNLOAD_TARBALL.format(
            author=author, repo=repo, branch=r["name"])
        data[idx]["zipball_url"] = GithubUrls.DOWNLOAD.format(author=author,
                                                              repo=repo,
                                                              branch=r["name"])
        data[idx].pop('node_id')
    return data
def download_url_from_github_url(url: str, branch: str = None):
    # specific file
    try:
        url = blob2raw(url)
        if requests.get(url).status_code == 200:
            return url
    except GithubInvalidUrl:
        pass

    # full git repo
    branch = branch or get_branch_from_github_url(url)
    author, repo = author_repo_from_github_url(url)
    url = GithubUrls.DOWNLOAD.format(author=author, branch=branch, repo=repo)
    if requests.get(url).status_code == 200:
        return url

    raise GithubInvalidUrl(url)
Exemple #10
0
def api_zip_url_from_github_url(url, branch=None, token=None):
    # specific file
    try:
        url = blob2raw(url)
        if requests.get(url).status_code == 200:
            return url
    except GithubInvalidUrl:
        pass

    # full git repo
    branch = branch or get_branch_from_github_url(url)
    owner, repo = author_repo_from_github_url(url)
    url = GithubAPI.REPO_ZIP.format(owner=owner, branch=branch, repo=repo)
    if requests.get(url).status_code == 200:
        return url

    raise GithubInvalidUrl
Exemple #11
0
def get_file_from_github_api(url, filepath, branch=None):
    author, repo = author_repo_from_github_url(url)
    branch = branch or get_main_branch_from_github_api(url)
    url = GithubAPI.REPO_FILE.format(owner=author, repo=repo, file=filepath)
    data = requests.get(url, params={"ref": branch}).json()
    if "API rate limit exceeded" in data.get("message", ""):
        raise GithubAPIRateLimited
    if data.get("message", "") != 'Not Found':
        return data
    raise GithubAPIFileNotFound
Exemple #12
0
def get_mycroft_marketplace_skill_urls_from_submodules(branch=None):
    branch = branch or get_current_marketplace_branch()
    url = "https://raw.githubusercontent.com/MycroftAI/mycroft-skills/{branch}/.gitmodules".format(branch=branch)
    r = requests.get(url)
    if not r.status_code == 200:
        raise GithubInvalidBranch
    for l in r.text.split("[submodule "):
        if not l:
            continue
        yield l.split("url = ")[-1].strip()
Exemple #13
0
def get_neon_skills(parse_github=False, skiplist=None):
    skiplist = skiplist or []
    skills_url = "https://raw.githubusercontent.com/NeonGeckoCom/neon-skills-submodules/master/skill-metadata.json"
    skill_json = requests.get(skills_url).json()
    for skill in skill_json.values():
        if skill["url"] in skiplist:
            continue
        skill["appstore"] = "Neon"
        skill["appstore_url"] = skills_url
        yield SkillEntry.from_json(skill, parse_github=parse_github)
Exemple #14
0
def get_license_data_from_github_api(url, branch=None):
    author, repo = author_repo_from_github_url(url)
    url = GithubAPI.REPO_LICENSE.format(owner=author, repo=repo)
    try:
        data = requests.get(url, params={"ref": branch}).json()
    except Exception as e:
        raise GithubAPILicenseNotFound
    if "API rate limit exceeded" in data.get("message", ""):
        raise GithubAPIRateLimited
    return data
def blob2raw(url:str, validate:bool=False):
    if not url.startswith("https://github.com") and \
            not url.startswith("https://raw.githubusercontent.com"):
        raise GithubInvalidUrl(url)
    url = url.replace("/blob", ""). \
        replace("https://github.com", "https://raw.githubusercontent.com")
    if validate:
        if requests.get(url).status_code != 200:
            raise GithubRawUrlNotFound(url)
    return url
Exemple #16
0
def get_pling_skills(parse_github=False, skiplist=None):
    skiplist = skiplist or []
    url = "https://api.kde-look.org/ocs/v1/content/data"
    params = {"categories": "608", "page": 0}
    xml = requests.get(url, params=params).text

    data = xml2dict(xml)
    meta = data["ocs"]["meta"]
    n_pages = int(meta["totalitems"]) // int(meta["itemsperpage"])

    for n in range(0, n_pages + 1):
        LOG.debug("Parsing pling page {i} out of {n}".format(i=n, n=n_pages))
        params = {"categories": "608", "page": n}
        xml = requests.get(url, params=params).text
        for skill in xml2dict(xml)["ocs"]["data"]["content"]:
            skill_json = _parse_pling(skill)
            if skill_json.get("url", "") in skiplist or not skill_json.get("url"):
                continue
            yield SkillEntry.from_json(skill_json, parse_github=parse_github)
def validate_github_skill_url(url:str, branch:str=None):
    branch = branch or get_branch_from_github_url(url)
    try:

        url = match_url_template(url, GithubUrls.SKILL, branch)
        data = requests.get(url).text

        if "def create_skill():" in data:
            return True
    except GithubInvalidUrl:
        pass
    raise GithubNotSkill
Exemple #18
0
def get_neon_skills_from_api(parse_github=False, skiplist=None):
    skiplist = skiplist or []
    skills_url = "https://api.github.com/repos/NeonGeckoCom/neon-skills-submodules/contents/skill-metadata.json"
    skill_json = requests.get(skills_url).json()
    if skill_json.get("message") == 'Not Found' or "API rate limit exceeded" \
            in skill_json.get("message", ""):
        raise AuthenticationError

    if skill_json.get("encoding") == "base64":
        json_str = base64.b64decode(skill_json["content"]).decode("utf-8")
        skill_json = json.loads(json_str)

    for skill in skill_json.values():
        if skill["url"] in skiplist:
            continue
        skill["appstore"] = "Neon"
        skill["appstore_url"] = skills_url
        yield SkillEntry.from_json(skill, parse_github=parse_github)
Exemple #19
0
def get_license_data_from_github_api(url: str,
                                     branch: Optional[str] = None) -> dict:
    """
    Get license data for the repository at the given URL and branch
    https://docs.github.com/en/rest/reference/licenses#get-the-license-for-a-repository
    @param url: Repository URL
    @param branch: Optional branch spec, otherwise default branch will be used
    @return: dict license data
    """
    author, repo = author_repo_from_github_url(url)
    url = GithubAPI.REPO_LICENSE.format(owner=author, repo=repo)
    try:
        data = requests.get(url, params={"ref": branch}).json()
    except Exception as e:
        raise GithubAPILicenseNotFound
    if "API rate limit exceeded" in data.get("message", ""):
        raise GithubAPIRateLimited
    return data
Exemple #20
0
def get_repo_releases_from_github_api(url, branch=None):
    try:
        author, repo = author_repo_from_github_url(url)
        url = GithubAPI.REPO_RELEASES.format(owner=author, repo=repo)
        data = requests.get(url, params={"ref": branch}).json()
    except Exception as e:
        raise GithubAPIReleasesNotFound(str(e))
    if isinstance(data, dict):
        # result is usually a list, unless api call fails
        if "API rate limit exceeded" in data.get("message", ""):
            raise GithubAPIRateLimited
    # let's fix api urls
    for idx, r in enumerate(data):
        data[idx]["tarball_url"] = GithubUrls.DOWNLOAD_TARBALL.format(
            author=author, repo=repo, branch=r["name"])
        data[idx]["zipball_url"] = GithubUrls.DOWNLOAD.format(
            author=author, repo=repo, branch=r["name"])
        data[idx].pop('node_id')
    return data
Exemple #21
0
def get_file_from_github_api(url: str,
                             filepath: str,
                             branch: Optional[str] = None) -> dict:
    """
    Get information for a file in a repository.
    https://docs.github.com/en/rest/reference/repos#get-repository-content
    @param url: Repository URL
    @param filepath: path to a file in the repository
    @param branch: Optional branch to query, otherwise branch from `url` will be used
    @return: parsed API data
    """
    author, repo = author_repo_from_github_url(url)
    branch = branch or get_main_branch_from_github_api(url)
    url = GithubAPI.REPO_FILE.format(owner=author, repo=repo, file=filepath)
    data = requests.get(url, params={"ref": branch}).json()
    if "API rate limit exceeded" in data.get("message", ""):
        raise GithubAPIRateLimited
    if data.get("message", "") != 'Not Found':
        return data
    raise GithubAPIFileNotFound
    def from_json(data: Union[str, dict], parse_github: bool = True):
        if isinstance(data, str):
            if data.startswith("http"):
                url = data
                if "github" in url:
                    data = {"url": url}
                    # repo is parsed in github info step below,
                    # branch detected when parsing data dict
                else:
                    try:
                        res = requests.get(url).text
                        data = json.loads(res)
                    except JSONDecodeError:
                        raise GithubFileNotFound
            elif isfile(data):
                with open(data) as f:
                    data = json.load(f)
            else:
                data = json.loads(data)

        if not isinstance(data, dict):
            # TODO new exception
            raise ValueError("unrecognized format")

        # augment with github info
        if parse_github:
            url = data.get("url", "")
            if "github" in url:
                try:
                    github_data = get_skill_data(url, data.get("branch"))
                    data = merge_dict(github_data,
                                      data,
                                      merge_lists=True,
                                      skip_empty=True,
                                      no_dupes=True)
                    parse_python_dependencies(
                        data["requirements"].get("python"),
                        requests.headers.get("Authorization"))
                except GithubInvalidUrl as e:
                    raise e
        return SkillEntry(data)
Exemple #23
0
def get_readme_url_from_github_api(url, branch=None):
    author, repo = author_repo_from_github_url(url)
    default_url = GithubAPI.REPO_README.format(owner=author, repo=repo)
    try:
        data = requests.get(default_url, params={"ref": branch}).json()
        if "API rate limit exceeded" in data.get("message", ""):
            raise GithubAPIRateLimited
        return data["html_url"]
    except Exception as e:
        pass  # check files individually

    for dst in GITHUB_README_FILES:
        try:
            data = get_file_from_github_api(url, dst, branch)
        except GithubAPIFileNotFound:
            continue
        if "API rate limit exceeded" in data.get("message", ""):
            raise GithubAPIRateLimited
        if data.get("html_url"):
            return data["html_url"]
    raise GithubAPIReadmeNotFound
Exemple #24
0
def get_readme_from_github_api(url: str, branch: Optional[str] = None) -> str:
    """
    Get the readme file contents for the specified repository
    @param url: Repository URL
    @param branch: Optional branch to query, otherwise default branch will be used
    @return: contents of repository README file
    """
    author, repo = author_repo_from_github_url(url)
    default_url = GithubAPI.REPO_README.format(owner=author, repo=repo)
    try:
        data = requests.get(default_url, params={"ref": branch}).json()
        if "API rate limit exceeded" in data.get("message", ""):
            raise GithubAPIRateLimited
        readme = data["content"]
        if data["encoding"] == "base64":
            return base64.b64decode(readme).decode("utf-8")
        # TODO Raise UnknownEncoding?
        return readme
    except GithubAPIRateLimited:
        raise
    except Exception as e:
        pass

    # check files individually
    for dst in GITHUB_README_FILES:
        try:
            data = get_file_from_github_api(url, dst, branch)
        except GithubAPIFileNotFound:
            continue
        if "API rate limit exceeded" in data.get("message", ""):
            raise GithubAPIRateLimited
        readme = data.get("content")
        if readme:
            if data["encoding"] == "base64":
                return base64.b64decode(readme).decode("utf-8")
            # TODO Raise UnknownEncoding?
            return readme
    raise GithubAPIReadmeNotFound
def get_andlos_list_skills(parse_github: bool = False, skiplist=None):
    skiplist = skiplist or []
    url = "https://raw.githubusercontent.com/andlo/mycroft-skills-list-gitbook/master/_data/skills.json"
    andlos_list = requests.get(url).json()
    for idx, skill in enumerate(andlos_list):
        LOG.debug("Parsing skill {i} out of {n}".format(i=idx,
                                                        n=len(andlos_list)))
        s = skill['skill_info']
        if s['repo'] in skiplist:
            continue
        cats = [s for s in s['categories'] if len(s) > 2]
        cat = cats[0] if len(cats) else None
        tags = list(set(s['tags'] + cats))
        license = skill.get('license') or {}
        data = {
            "created": skill['created_at'],
            'archived': skill['archived'],
            "license": license.get("key"),
            'modified': skill['updated_at'],
            "authorname": s['github_username'],
            "skillname": s['name'],
            "foldername": s['id'],
            "name": s['name'],
            "url": s['repo'],
            'category': cat,
            "description": s['description'],
            "short_description": s['short_desc'],
            "branch": s['branch'],
            "examples": s['examples'],
            'tags': tags,
            'platforms': s['platforms'],
            'stars': skill['stargazers_count']
        }
        try:
            yield SkillEntry.from_json(data, parse_github)
        except GithubInvalidUrl:
            LOG.error("this skill does not seem to be valid! " + s['repo'])
    def install(self,
                folder=None,
                default_branch="master",
                platform=None,
                update=True):
        if not update and self.is_previously_installed(folder):
            return False
        if self.branch_overrides:
            try:
                platform = platform or detect_enclosure()
            except Exception as e:
                LOG.error("Failed to detect platform")
                raise e
            if platform in self.branch_overrides:
                branch = self.branch_overrides[platform]
                if branch != self.branch:
                    LOG.info("Detected platform specific branch:" + branch)
                    skill = SkillEntry.from_github_url(self.url, branch)
                    return skill.install(folder, default_branch)

        LOG.info("Installing skill: {url} from branch: {branch}".format(
            url=self.url, branch=self.branch))

        # TODO: This is just patching a bug in requirements parsing DM
        if isinstance(self.requirements, list):
            LOG.warning(self.requirements)
            self._data["requirements"] = {"python": self.requirements}

        skills = self.requirements.get("skill", [])
        if skills:
            LOG.info('Installing required skills')
        for s in skills:
            skill = SkillEntry.from_github_url(s)
            skill.install(folder, default_branch)

        system = self.requirements.get("system")
        if system:
            LOG.info('Installing system requirements')
            install_system_deps(system)

        pyth = self.requirements.get("python")
        if pyth:
            LOG.info('Running pip install')
            pip_install(pyth)

        LOG.info("Downloading " + self.url)
        updated = self.download(folder)
        if self.json.get("desktopFile"):
            LOG.info("Creating desktop entry")
            # TODO support system wide? /usr/local/XXX ?
            desktop_dir = expanduser("~/.local/share/applications")
            icon_dir = expanduser("~/.local/share/icons")

            # copy the files to a unique path, this way duplicate file names
            # dont overwrite each other, eg, several skills with "icon.png"
            base_name = ".".join([self.skill_folder,
                                  self.skill_author]).lower()

            # copy icon file
            icon_file = join(icon_dir,
                             base_name + self.skill_icon.split(".")[-1])
            if self.skill_icon.startswith("http"):
                content = requests.get(self.skill_icon).content
                with open(icon_file, "wb") as f:
                    f.write(content)
            elif isfile(self.skill_icon):
                shutil.copyfile(self.skill_icon, icon_file)

            # copy .desktop file
            desktop_file = join(desktop_dir, base_name + ".desktop")
            with open(desktop_file, "w") as f:
                f.write(self.desktop_file)

        return updated
def validate_branch(branch:str, url:str):
    url = normalize_github_url(url) + "/tree/{branch}".format(branch=branch)
    return requests.get(url).status_code == 200