def api_zip_url_from_github_url(url: str, branch: Optional[str] = None, token: Optional[str] = None) -> str: """ Get an API URL to download the repository as a zip archive https://docs.github.com/en/rest/reference/repos#download-a-repository-archive-zip @param url: Repository URL, optionally containing a branch spec @param branch: Optional branch spec, otherwise branch from `url` will be used @param token: Optional GitHub token to include with request @return: GitHub API URL to query for a zip archive """ # TODO: `token` is not used? # specific file try: url = blob2raw(url) if requests.get(url).ok: return url except GithubInvalidUrl: pass # full git repo branch = branch or get_branch_from_github_url(url) owner, repo = author_repo_from_github_url(url) url = GithubAPI.REPO_ZIP.format(owner=owner, branch=branch, repo=repo) if requests.get(url).status_code == 200: return url raise GithubInvalidUrl
def match_url_template(url:str, template:str, branch:str=None): branch = branch or get_branch_from_github_url(url) author, repo = author_repo_from_github_url(url) url = template.format(author=author, branch=branch, repo=repo) if requests.get(url).status_code == 200: if "<title>Rate limit · GitHub</title>" in requests.get(url).text: raise GithubHTTPRateLimited return blob2raw(url) raise GithubInvalidUrl(url)
def _parse_pling(skill): if isinstance(skill, str): json_data = json.loads(skill) else: json_data = skill # TODO is it a safe assumption downloadlink1 is always the skill.json ? # this can be made smarter url = json_data["downloadlink1"] try: skill_json = requests.get(url).json() except JSONDecodeError: return {} # rename skill_json["skillname"] = skill_json.pop("name") # save useful data to skill.meta_info skill_json["logo"] = json_data["previewpic1"] skill_json["category"] = json_data['typename'] skill_json["created"] = json_data['created'] skill_json["modified"] = json_data['changed'] skill_json["description"] = json_data["description"] skill_json["tags"] = json_data['tags'].split(",") skill_json["authorname"] = json_data['personid'] skill_json["version"] = json_data["version"] # appstore data # TODO also provide this from mycroft appstore skill_json["appstore"] = "pling.opendesktop" skill_json["appstore_url"] = json_data["detailpage"] return skill_json
def match_url_template(url, template, branch=None): branch = branch or get_branch_from_github_url(url) author, repo = author_repo_from_github_url(url) url = template.format(author=author, branch=branch, repo=repo) if requests.get(url).status_code == 200: return blob2raw(url) raise GithubInvalidUrl
def get_readme_from_github_api(url, branch=None): author, repo = author_repo_from_github_url(url) default_url = GithubAPI.REPO_README.format(owner=author, repo=repo) try: data = requests.get(default_url, params={"ref": branch}).json() if "API rate limit exceeded" in data.get("message", ""): raise GithubAPIRateLimited readme = data["content"] if data["encoding"] == "base64": return base64.b64decode(readme).decode("utf-8") # TODO Raise UnknownEncoding? return readme except GithubAPIRateLimited: raise except Exception as e: pass # check files individually for dst in GITHUB_README_FILES: try: data = get_file_from_github_api(url, dst, branch) except GithubAPIFileNotFound: continue if "API rate limit exceeded" in data.get("message", ""): raise GithubAPIRateLimited readme = data.get("content") if readme: if data["encoding"] == "base64": return base64.b64decode(readme).decode("utf-8") # TODO Raise UnknownEncoding? return readme raise GithubAPIReadmeNotFound
def get_marketplace_json(branch=None): branch = branch or get_current_marketplace_branch() url = "https://raw.githubusercontent.com/MycroftAI/mycroft-skills-data/{branch}/skill-metadata.json".format(branch=branch) r = requests.get(url) if not r.status_code == 200: raise GithubInvalidBranch return r.json()
def get_readme_url_from_github_api(url: str, branch: Optional[str] = None) -> str: """ Get the readme file url for the specified repository https://docs.github.com/en/rest/reference/repos#get-a-repository-readme @param url: Repository URL @param branch: Optional branch to query, otherwise default branch will be used @return: url of repository README file """ author, repo = author_repo_from_github_url(url) default_url = GithubAPI.REPO_README.format(owner=author, repo=repo) try: data = requests.get(default_url, params={"ref": branch}).json() if "API rate limit exceeded" in data.get("message", ""): raise GithubAPIRateLimited return data["html_url"] except Exception as e: pass # check files individually for dst in GITHUB_README_FILES: try: data = get_file_from_github_api(url, dst, branch) except GithubAPIFileNotFound: continue if "API rate limit exceeded" in data.get("message", ""): raise GithubAPIRateLimited if data.get("html_url"): return data["html_url"] raise GithubAPIReadmeNotFound
def get_repo_releases_from_github_api(url: str, branch: Optional[str] = None) -> list: """ Get releases data for the repository at the specified URL and branch https://docs.github.com/en/rest/reference/repos#list-repository-tags @param url: Repository URL @param branch: Optional branch spec, otherwise default branch will be used @return: repo tag data """ # TODO: There is a releases API, but this uses the tags API try: author, repo = author_repo_from_github_url(url) url = GithubAPI.REPO_RELEASES.format(owner=author, repo=repo) data = requests.get(url, params={"ref": branch}).json() except Exception as e: raise GithubAPIReleasesNotFound(str(e)) if isinstance(data, dict): # result is usually a list, unless api call fails if "API rate limit exceeded" in data.get("message", ""): raise GithubAPIRateLimited # let's fix api urls for idx, r in enumerate(data): data[idx]["tarball_url"] = GithubUrls.DOWNLOAD_TARBALL.format( author=author, repo=repo, branch=r["name"]) data[idx]["zipball_url"] = GithubUrls.DOWNLOAD.format(author=author, repo=repo, branch=r["name"]) data[idx].pop('node_id') return data
def download_url_from_github_url(url: str, branch: str = None): # specific file try: url = blob2raw(url) if requests.get(url).status_code == 200: return url except GithubInvalidUrl: pass # full git repo branch = branch or get_branch_from_github_url(url) author, repo = author_repo_from_github_url(url) url = GithubUrls.DOWNLOAD.format(author=author, branch=branch, repo=repo) if requests.get(url).status_code == 200: return url raise GithubInvalidUrl(url)
def api_zip_url_from_github_url(url, branch=None, token=None): # specific file try: url = blob2raw(url) if requests.get(url).status_code == 200: return url except GithubInvalidUrl: pass # full git repo branch = branch or get_branch_from_github_url(url) owner, repo = author_repo_from_github_url(url) url = GithubAPI.REPO_ZIP.format(owner=owner, branch=branch, repo=repo) if requests.get(url).status_code == 200: return url raise GithubInvalidUrl
def get_file_from_github_api(url, filepath, branch=None): author, repo = author_repo_from_github_url(url) branch = branch or get_main_branch_from_github_api(url) url = GithubAPI.REPO_FILE.format(owner=author, repo=repo, file=filepath) data = requests.get(url, params={"ref": branch}).json() if "API rate limit exceeded" in data.get("message", ""): raise GithubAPIRateLimited if data.get("message", "") != 'Not Found': return data raise GithubAPIFileNotFound
def get_mycroft_marketplace_skill_urls_from_submodules(branch=None): branch = branch or get_current_marketplace_branch() url = "https://raw.githubusercontent.com/MycroftAI/mycroft-skills/{branch}/.gitmodules".format(branch=branch) r = requests.get(url) if not r.status_code == 200: raise GithubInvalidBranch for l in r.text.split("[submodule "): if not l: continue yield l.split("url = ")[-1].strip()
def get_neon_skills(parse_github=False, skiplist=None): skiplist = skiplist or [] skills_url = "https://raw.githubusercontent.com/NeonGeckoCom/neon-skills-submodules/master/skill-metadata.json" skill_json = requests.get(skills_url).json() for skill in skill_json.values(): if skill["url"] in skiplist: continue skill["appstore"] = "Neon" skill["appstore_url"] = skills_url yield SkillEntry.from_json(skill, parse_github=parse_github)
def get_license_data_from_github_api(url, branch=None): author, repo = author_repo_from_github_url(url) url = GithubAPI.REPO_LICENSE.format(owner=author, repo=repo) try: data = requests.get(url, params={"ref": branch}).json() except Exception as e: raise GithubAPILicenseNotFound if "API rate limit exceeded" in data.get("message", ""): raise GithubAPIRateLimited return data
def blob2raw(url:str, validate:bool=False): if not url.startswith("https://github.com") and \ not url.startswith("https://raw.githubusercontent.com"): raise GithubInvalidUrl(url) url = url.replace("/blob", ""). \ replace("https://github.com", "https://raw.githubusercontent.com") if validate: if requests.get(url).status_code != 200: raise GithubRawUrlNotFound(url) return url
def get_pling_skills(parse_github=False, skiplist=None): skiplist = skiplist or [] url = "https://api.kde-look.org/ocs/v1/content/data" params = {"categories": "608", "page": 0} xml = requests.get(url, params=params).text data = xml2dict(xml) meta = data["ocs"]["meta"] n_pages = int(meta["totalitems"]) // int(meta["itemsperpage"]) for n in range(0, n_pages + 1): LOG.debug("Parsing pling page {i} out of {n}".format(i=n, n=n_pages)) params = {"categories": "608", "page": n} xml = requests.get(url, params=params).text for skill in xml2dict(xml)["ocs"]["data"]["content"]: skill_json = _parse_pling(skill) if skill_json.get("url", "") in skiplist or not skill_json.get("url"): continue yield SkillEntry.from_json(skill_json, parse_github=parse_github)
def validate_github_skill_url(url:str, branch:str=None): branch = branch or get_branch_from_github_url(url) try: url = match_url_template(url, GithubUrls.SKILL, branch) data = requests.get(url).text if "def create_skill():" in data: return True except GithubInvalidUrl: pass raise GithubNotSkill
def get_neon_skills_from_api(parse_github=False, skiplist=None): skiplist = skiplist or [] skills_url = "https://api.github.com/repos/NeonGeckoCom/neon-skills-submodules/contents/skill-metadata.json" skill_json = requests.get(skills_url).json() if skill_json.get("message") == 'Not Found' or "API rate limit exceeded" \ in skill_json.get("message", ""): raise AuthenticationError if skill_json.get("encoding") == "base64": json_str = base64.b64decode(skill_json["content"]).decode("utf-8") skill_json = json.loads(json_str) for skill in skill_json.values(): if skill["url"] in skiplist: continue skill["appstore"] = "Neon" skill["appstore_url"] = skills_url yield SkillEntry.from_json(skill, parse_github=parse_github)
def get_license_data_from_github_api(url: str, branch: Optional[str] = None) -> dict: """ Get license data for the repository at the given URL and branch https://docs.github.com/en/rest/reference/licenses#get-the-license-for-a-repository @param url: Repository URL @param branch: Optional branch spec, otherwise default branch will be used @return: dict license data """ author, repo = author_repo_from_github_url(url) url = GithubAPI.REPO_LICENSE.format(owner=author, repo=repo) try: data = requests.get(url, params={"ref": branch}).json() except Exception as e: raise GithubAPILicenseNotFound if "API rate limit exceeded" in data.get("message", ""): raise GithubAPIRateLimited return data
def get_repo_releases_from_github_api(url, branch=None): try: author, repo = author_repo_from_github_url(url) url = GithubAPI.REPO_RELEASES.format(owner=author, repo=repo) data = requests.get(url, params={"ref": branch}).json() except Exception as e: raise GithubAPIReleasesNotFound(str(e)) if isinstance(data, dict): # result is usually a list, unless api call fails if "API rate limit exceeded" in data.get("message", ""): raise GithubAPIRateLimited # let's fix api urls for idx, r in enumerate(data): data[idx]["tarball_url"] = GithubUrls.DOWNLOAD_TARBALL.format( author=author, repo=repo, branch=r["name"]) data[idx]["zipball_url"] = GithubUrls.DOWNLOAD.format( author=author, repo=repo, branch=r["name"]) data[idx].pop('node_id') return data
def get_file_from_github_api(url: str, filepath: str, branch: Optional[str] = None) -> dict: """ Get information for a file in a repository. https://docs.github.com/en/rest/reference/repos#get-repository-content @param url: Repository URL @param filepath: path to a file in the repository @param branch: Optional branch to query, otherwise branch from `url` will be used @return: parsed API data """ author, repo = author_repo_from_github_url(url) branch = branch or get_main_branch_from_github_api(url) url = GithubAPI.REPO_FILE.format(owner=author, repo=repo, file=filepath) data = requests.get(url, params={"ref": branch}).json() if "API rate limit exceeded" in data.get("message", ""): raise GithubAPIRateLimited if data.get("message", "") != 'Not Found': return data raise GithubAPIFileNotFound
def from_json(data: Union[str, dict], parse_github: bool = True): if isinstance(data, str): if data.startswith("http"): url = data if "github" in url: data = {"url": url} # repo is parsed in github info step below, # branch detected when parsing data dict else: try: res = requests.get(url).text data = json.loads(res) except JSONDecodeError: raise GithubFileNotFound elif isfile(data): with open(data) as f: data = json.load(f) else: data = json.loads(data) if not isinstance(data, dict): # TODO new exception raise ValueError("unrecognized format") # augment with github info if parse_github: url = data.get("url", "") if "github" in url: try: github_data = get_skill_data(url, data.get("branch")) data = merge_dict(github_data, data, merge_lists=True, skip_empty=True, no_dupes=True) parse_python_dependencies( data["requirements"].get("python"), requests.headers.get("Authorization")) except GithubInvalidUrl as e: raise e return SkillEntry(data)
def get_readme_url_from_github_api(url, branch=None): author, repo = author_repo_from_github_url(url) default_url = GithubAPI.REPO_README.format(owner=author, repo=repo) try: data = requests.get(default_url, params={"ref": branch}).json() if "API rate limit exceeded" in data.get("message", ""): raise GithubAPIRateLimited return data["html_url"] except Exception as e: pass # check files individually for dst in GITHUB_README_FILES: try: data = get_file_from_github_api(url, dst, branch) except GithubAPIFileNotFound: continue if "API rate limit exceeded" in data.get("message", ""): raise GithubAPIRateLimited if data.get("html_url"): return data["html_url"] raise GithubAPIReadmeNotFound
def get_readme_from_github_api(url: str, branch: Optional[str] = None) -> str: """ Get the readme file contents for the specified repository @param url: Repository URL @param branch: Optional branch to query, otherwise default branch will be used @return: contents of repository README file """ author, repo = author_repo_from_github_url(url) default_url = GithubAPI.REPO_README.format(owner=author, repo=repo) try: data = requests.get(default_url, params={"ref": branch}).json() if "API rate limit exceeded" in data.get("message", ""): raise GithubAPIRateLimited readme = data["content"] if data["encoding"] == "base64": return base64.b64decode(readme).decode("utf-8") # TODO Raise UnknownEncoding? return readme except GithubAPIRateLimited: raise except Exception as e: pass # check files individually for dst in GITHUB_README_FILES: try: data = get_file_from_github_api(url, dst, branch) except GithubAPIFileNotFound: continue if "API rate limit exceeded" in data.get("message", ""): raise GithubAPIRateLimited readme = data.get("content") if readme: if data["encoding"] == "base64": return base64.b64decode(readme).decode("utf-8") # TODO Raise UnknownEncoding? return readme raise GithubAPIReadmeNotFound
def get_andlos_list_skills(parse_github: bool = False, skiplist=None): skiplist = skiplist or [] url = "https://raw.githubusercontent.com/andlo/mycroft-skills-list-gitbook/master/_data/skills.json" andlos_list = requests.get(url).json() for idx, skill in enumerate(andlos_list): LOG.debug("Parsing skill {i} out of {n}".format(i=idx, n=len(andlos_list))) s = skill['skill_info'] if s['repo'] in skiplist: continue cats = [s for s in s['categories'] if len(s) > 2] cat = cats[0] if len(cats) else None tags = list(set(s['tags'] + cats)) license = skill.get('license') or {} data = { "created": skill['created_at'], 'archived': skill['archived'], "license": license.get("key"), 'modified': skill['updated_at'], "authorname": s['github_username'], "skillname": s['name'], "foldername": s['id'], "name": s['name'], "url": s['repo'], 'category': cat, "description": s['description'], "short_description": s['short_desc'], "branch": s['branch'], "examples": s['examples'], 'tags': tags, 'platforms': s['platforms'], 'stars': skill['stargazers_count'] } try: yield SkillEntry.from_json(data, parse_github) except GithubInvalidUrl: LOG.error("this skill does not seem to be valid! " + s['repo'])
def install(self, folder=None, default_branch="master", platform=None, update=True): if not update and self.is_previously_installed(folder): return False if self.branch_overrides: try: platform = platform or detect_enclosure() except Exception as e: LOG.error("Failed to detect platform") raise e if platform in self.branch_overrides: branch = self.branch_overrides[platform] if branch != self.branch: LOG.info("Detected platform specific branch:" + branch) skill = SkillEntry.from_github_url(self.url, branch) return skill.install(folder, default_branch) LOG.info("Installing skill: {url} from branch: {branch}".format( url=self.url, branch=self.branch)) # TODO: This is just patching a bug in requirements parsing DM if isinstance(self.requirements, list): LOG.warning(self.requirements) self._data["requirements"] = {"python": self.requirements} skills = self.requirements.get("skill", []) if skills: LOG.info('Installing required skills') for s in skills: skill = SkillEntry.from_github_url(s) skill.install(folder, default_branch) system = self.requirements.get("system") if system: LOG.info('Installing system requirements') install_system_deps(system) pyth = self.requirements.get("python") if pyth: LOG.info('Running pip install') pip_install(pyth) LOG.info("Downloading " + self.url) updated = self.download(folder) if self.json.get("desktopFile"): LOG.info("Creating desktop entry") # TODO support system wide? /usr/local/XXX ? desktop_dir = expanduser("~/.local/share/applications") icon_dir = expanduser("~/.local/share/icons") # copy the files to a unique path, this way duplicate file names # dont overwrite each other, eg, several skills with "icon.png" base_name = ".".join([self.skill_folder, self.skill_author]).lower() # copy icon file icon_file = join(icon_dir, base_name + self.skill_icon.split(".")[-1]) if self.skill_icon.startswith("http"): content = requests.get(self.skill_icon).content with open(icon_file, "wb") as f: f.write(content) elif isfile(self.skill_icon): shutil.copyfile(self.skill_icon, icon_file) # copy .desktop file desktop_file = join(desktop_dir, base_name + ".desktop") with open(desktop_file, "w") as f: f.write(self.desktop_file) return updated
def validate_branch(branch:str, url:str): url = normalize_github_url(url) + "/tree/{branch}".format(branch=branch) return requests.get(url).status_code == 200