def show(self, trans, **kwd): """ GET /api/tool_shed/contents Display a list of categories in the selected toolshed. :param tool_shed_url: the url of the toolshed to get categories from """ tool_shed_url = urlunquote(kwd.get('tool_shed_url', '')) tool_shed_url = common_util.get_tool_shed_url_from_tool_shed_registry( trans.app, tool_shed_url) url = util.build_url(tool_shed_url, pathspec=['api', 'categories']) categories = [] try: for category in json.loads(util.url_get(url)): api_url = web.url_for(controller='api/tool_shed', action='category', tool_shed_url=urlquote(tool_shed_url), category_id=category['id'], qualified=True) category['url'] = api_url categories.append(category) except Exception: raise exceptions.ObjectNotFound("Tool Shed %s is not responding." % tool_shed_url) return categories
def get_url_straight_filename(url, strip=None, allowdir=False): """Get file/dir name of the last path component of the URL Parameters ---------- strip: list, optional If provided, listed names will not be considered and their parent directory will be selected allowdir: bool, optional If url points to a "directory" (ends with /), empty string would be returned unless allowdir is True, in which case the name of the directory would be returned """ path = urlunquote(urlsplit(url).path) path_parts = path.split('/') if allowdir: # strip empty ones while len(path_parts) > 1 and not path_parts[-1]: path_parts = path_parts[:-1] if strip: while path_parts and path_parts[-1] in strip: path_parts = path_parts[:-1] if path_parts: return path_parts[-1] else: return None
def category(self, trans, **kwd): """ GET /api/tool_shed/category Display a list of repositories in the selected category. :param tool_shed_url: the url of the toolshed to get repositories from :param category_id: the category to get repositories from """ tool_shed_url = urlunquote(kwd.get('tool_shed_url', '')) category_id = kwd.get('category_id', '') params = dict(installable=True) tool_shed_url = common_util.get_tool_shed_url_from_tool_shed_registry(trans.app, tool_shed_url) url = util.build_url(tool_shed_url, pathspec=['api', 'categories', category_id, 'repositories'], params=params) repositories = [] return_json = json.loads(util.url_get(url)) for repository in return_json['repositories']: api_url = web.url_for(controller='api/tool_shed', action='repository', tool_shed_url=urlquote(tool_shed_url), repository_id=repository['id'], qualified=True) repository['url'] = api_url repositories.append(repository) return_json['repositories'] = repositories return return_json
def is_url_quoted(url): """Return either URL looks being already quoted """ try: url_ = urlunquote(url) return url != url_ except: # problem with unquoting -- then it must be wasn't quoted (correctly) return False
def get_extracted_file(self, archive, afile): lgr.debug("Requested file {afile} from archive {archive}".format(**locals())) # TODO: That could be a good place to provide "compatibility" layer if # filenames within archive are too obscure for local file system. # We could somehow adjust them while extracting and here channel back # "fixed" up names since they are only to point to the load path = opj(self.get_extracted_archive(archive), urlunquote(afile)) # TODO: make robust lgr.log(1, "Verifying that %s exists" % abspath(path)) assert exists(path), "%s must exist" % path return path
def __executeHostCommand(self, command, options, ssh=None, host=None): if not ssh: ssh = SSH(host=host, parameters=self.ceParameters) options["BatchSystem"] = self.batchSystem.__class__.__name__ options["Method"] = command options["SharedDir"] = self.sharedArea options["OutputDir"] = self.batchOutput options["ErrorDir"] = self.batchError options["WorkDir"] = self.workArea options["InfoDir"] = self.infoArea options["ExecutionContext"] = self.execution options["User"] = self.user options["Queue"] = self.queue options = json.dumps(options) options = urlquote(options) cmd = ( "bash --login -c 'python %s/execute_batch %s || python3 %s/execute_batch %s || python2 %s/execute_batch %s'" % (self.sharedArea, options, self.sharedArea, options, self.sharedArea, options) ) self.log.verbose("CE submission command: %s" % cmd) result = ssh.sshCall(120, cmd) if not result["OK"]: self.log.error("%s CE job submission failed" % self.ceType, result["Message"]) return result sshStatus = result["Value"][0] sshStdout = result["Value"][1] sshStderr = result["Value"][2] # Examine results of the job submission if sshStatus == 0: output = sshStdout.strip().replace("\r", "").strip() try: index = output.index("============= Start output ===============") output = output[index + 42 :] except Exception: return S_ERROR("Invalid output from remote command: %s" % output) try: output = urlunquote(output) result = json.loads(output) if isinstance(result, six.string_types) and result.startswith("Exception:"): return S_ERROR(result) return S_OK(result) except Exception: return S_ERROR("Invalid return structure from job submission") else: return S_ERROR("\n".join([sshStdout, sshStderr]))
def __executeHostCommand(self, command, options, ssh=None, host=None): if not ssh: ssh = SSH(host=host, parameters=self.ceParameters) options['BatchSystem'] = self.batchSystem options['Method'] = command options['SharedDir'] = self.sharedArea options['OutputDir'] = self.batchOutput options['ErrorDir'] = self.batchError options['WorkDir'] = self.workArea options['InfoDir'] = self.infoArea options['ExecutionContext'] = self.execution options['User'] = self.user options['Queue'] = self.queue options = json.dumps(options) options = urlquote(options) cmd = "bash --login -c 'python %s/execute_batch %s'" % (self.sharedArea, options) self.log.verbose('CE submission command: %s' % cmd) result = ssh.sshCall(120, cmd) if not result['OK']: self.log.error('%s CE job submission failed' % self.ceType, result['Message']) return result sshStatus = result['Value'][0] sshStdout = result['Value'][1] sshStderr = result['Value'][2] # Examine results of the job submission if sshStatus == 0: output = sshStdout.strip().replace('\r', '').strip() try: index = output.index('============= Start output ===============') output = output[index + 42:] except BaseException: return S_ERROR("Invalid output from remote command: %s" % output) try: output = urlunquote(output) result = json.loads(output) if isinstance(result, six.string_types) and result.startswith('Exception:'): return S_ERROR(result) return S_OK(result) except BaseException: return S_ERROR('Invalid return structure from job submission') else: return S_ERROR('\n'.join([sshStdout, sshStderr]))
def get_local_path_from_url(url): """If given a file:// URL, returns a local path, if possible. Raises `ValueError` if not possible, for example, if the URL scheme is different, or if the `host` isn't empty or 'localhost' The returned path is always absolute. """ urlparts = urlsplit(url) if not urlparts.scheme == 'file': raise ValueError( "Non 'file://' URL cannot be resolved to a local path") if not (urlparts.netloc in ('', 'localhost', '::1') \ or urlparts.netloc.startswith('127.')): raise ValueError("file:// URL does not point to 'localhost'") return urlunquote(urlparts.path)
def _parse_url(cls, url, bucket_only=False): """Parses s3:// url and returns bucket name, prefix, additional query elements as a dict (such as VersionId)""" rec = urlsplit(url) if bucket_only: return rec.netloc assert(rec.scheme == 's3') # We are often working with urlencoded URLs so we could safely interact # with git-annex via its text based protocol etc. So, if URL looks like # it was urlencoded the filepath, we should revert back to an original key # name. Since we did not demarcate whether it was urlencoded, we will do # magical check, which would fail if someone had % followed by two digits filepath = rec.path.lstrip('/') if re.search('%[0-9a-fA-F]{2}', filepath): lgr.debug("URL unquoting S3 URL filepath %s", filepath) filepath = urlunquote(filepath) # TODO: needs replacement to assure_ since it doesn't # deal with non key=value return rec.netloc, filepath, assure_dict_from_str(rec.query, sep='&') or {}
def _parse_url(cls, url, bucket_only=False): """Parses s3:// url and returns bucket name, prefix, additional query elements as a dict (such as VersionId)""" rec = urlsplit(url) if bucket_only: return rec.netloc assert(rec.scheme == 's3') # We are often working with urlencoded URLs so we could safely interact # with git-annex via its text based protocol etc. So, if URL looks like # it was urlencoded the filepath, we should revert back to an original key # name. Since we did not demarkate either it was urlencoded, we will do # magical check, which would fail if someone had % followed by two digits filepath = rec.path.lstrip('/') if re.search('%[0-9a-fA-F]{2}', filepath): lgr.debug("URL unquoting S3 URL filepath %s", filepath) filepath = urlunquote(filepath) # TODO: needs replacement to assure_ since it doesn't # deal with non key=value return rec.netloc, filepath, assure_dict_from_str(rec.query, sep='&') or {}
def prettify(value): if isinstance(value, list): return [prettify(i) for i in value] if isinstance(value, dict): return {k: prettify(v) for k, v in sorted(value.items())} if isinstance(value, string_types): if value.startswith('%7B'): # URL encoded { decoded_value = urlunquote(value) else: decoded_value = value try: json_value = json.loads(decoded_value) except ValueError: return value else: return prettify(json_value) return value
def show(self, trans, **kwd): """ GET /api/tool_shed/contents Display a list of categories in the selected toolshed. :param tool_shed_url: the url of the toolshed to get categories from """ tool_shed_url = urlunquote(kwd.get('tool_shed_url', '')) tool_shed_url = common_util.get_tool_shed_url_from_tool_shed_registry(trans.app, tool_shed_url) url = util.build_url(tool_shed_url, pathspec=['api', 'categories']) categories = [] for category in json.loads(util.url_get(url)): api_url = web.url_for(controller='api/tool_shed', action='category', tool_shed_url=urlquote(tool_shed_url), category_id=category['id'], qualified=True) category['url'] = api_url categories.append(category) return categories
def category(self, trans, **kwd): """ GET /api/tool_shed/category Display a list of repositories in the selected category. :param tool_shed_url: the url of the toolshed to get repositories from :param category_id: the category to get repositories from :param sort_key: the field by which the repositories should be sorted :param sort_order: ascending or descending sort :param page: the page number to return """ sort_order = kwd.get('sort_order', 'asc') sort_key = kwd.get('sort_key', 'name') page = kwd.get('page', 1) tool_shed_url = urlunquote(kwd.get('tool_shed_url', '')) category_id = kwd.get('category_id', '') params = dict(installable=True, sort_order=sort_order, sort_key=sort_key, page=page) tool_shed_url = common_util.get_tool_shed_url_from_tool_shed_registry( trans.app, tool_shed_url) url = util.build_url( tool_shed_url, pathspec=['api', 'categories', category_id, 'repositories'], params=params) repositories = [] return_json = json.loads(util.url_get(url)) for repository in return_json['repositories']: api_url = web.url_for(controller='api/tool_shed', action='repository', tool_shed_url=urlquote(tool_shed_url), repository_id=repository['id'], qualified=True) repository['url'] = api_url repositories.append(repository) return_json['repositories'] = repositories return return_json
def _get_installationpath_from_url(url): """Returns a relative path derived from the trailing end of a URL This can be used to determine an installation path of a Dataset from a URL, analog to what `git clone` does. """ ri = RI(url) if isinstance(ri, (URL, DataLadRI)): # decode only if URL path = ri.path.rstrip('/') path = urlunquote(path) if path else ri.hostname else: path = url path = path.rstrip('/') if '/' in path: path = path.split('/') if path[-1] == '.git': path = path[-2] else: path = path[-1] if path.endswith('.git'): path = path[:-4] return path
def get_url_path(url): """Given a url, return the path component""" return urlunquote(urlsplit(url).path)
def _str_to_fields(cls, url_str): fields = URL._pr_to_fields(urlparse(url_str)) fields['path'] = urlunquote(fields['path']) return fields
def repository(self, trans, **kwd): """ GET /api/tool_shed/repository Get details about the specified repository from its shed. :param repository_id: the tool_shed_repository_id :param repository_id: str :param tool_shed_url: the URL of the toolshed whence to retrieve repository details :param tool_shed_url: str :param tool_ids: (optional) comma-separated list of tool IDs :param tool_ids: str """ tool_dependencies = dict() tools = dict() tool_shed_url = urlunquote(kwd.get('tool_shed_url', '')) log.debug(tool_shed_url) repository_id = kwd.get('repository_id', None) tool_ids = kwd.get('tool_ids', None) if tool_ids is not None: tool_ids = util.listify(tool_ids) tool_panel_section_select_field = tool_util.build_tool_panel_section_select_field(trans.app) tool_panel_section_dict = {'name': tool_panel_section_select_field.name, 'id': tool_panel_section_select_field.field_id, 'sections': []} for name, id, _ in tool_panel_section_select_field.options: tool_panel_section_dict['sections'].append(dict(id=id, name=name)) repository_data = dict() if tool_ids is not None: if len(tool_shed_url) == 0: # By design, this list should always be from the same toolshed. If # this is ever not the case, this code will need to be updated. tool_shed_url = common_util.get_tool_shed_url_from_tool_shed_registry(self.app, tool_ids[0].split('/')[0]) found_repository = json.loads(util.url_get(tool_shed_url, params=dict(tool_ids=','.join(tool_ids)), pathspec=['api', 'repositories'])) fr_first_key = next(iter(found_repository.keys())) repository_id = found_repository[fr_first_key]['repository_id'] repository_data['current_changeset'] = found_repository['current_changeset'] repository_data['repository'] = json.loads(util.url_get(tool_shed_url, pathspec=['api', 'repositories', repository_id])) del found_repository['current_changeset'] repository_data['tool_shed_url'] = tool_shed_url else: repository_data['repository'] = json.loads(util.url_get(tool_shed_url, pathspec=['api', 'repositories', repository_id])) repository_data['repository']['metadata'] = json.loads(util.url_get(tool_shed_url, pathspec=['api', 'repositories', repository_id, 'metadata'])) repository_data['shed_conf'] = tool_util.build_shed_tool_conf_select_field(trans.app).to_dict() repository_data['panel_section_dict'] = tool_panel_section_dict for changeset, metadata in repository_data['repository']['metadata'].items(): if changeset not in tool_dependencies: tool_dependencies[changeset] = [] if metadata['includes_tools_for_display_in_tool_panel']: if changeset not in tools: tools[changeset] = [] for tool_dict in metadata['tools']: tool_info = dict(clean=re.sub('[^a-zA-Z0-9]+', '_', tool_dict['name']).lower(), guid=tool_dict['guid'], name=tool_dict['name'], version=tool_dict['version'], description=tool_dict['description']) if tool_info not in tools[changeset]: tools[changeset].append(tool_info) if metadata['has_repository_dependencies']: for repository_dependency in metadata['repository_dependencies']: tools[changeset] = self.__get_tools(repository_dependency, tools[changeset]) repository_data['tools'] = tools for key, dependency_dict in metadata['tool_dependencies'].items(): if 'readme' in dependency_dict: del(dependency_dict['readme']) if dependency_dict not in tool_dependencies[changeset]: tool_dependencies[changeset].append(dependency_dict) if metadata['has_repository_dependencies']: for repository_dependency in metadata['repository_dependencies']: tool_dependencies[changeset] = self.__get_tool_dependencies(repository_dependency, tool_dependencies[changeset]) repository_data['tool_dependencies'] = tool_dependencies return repository_data
def get_extracted_filename(self, afile): """Return full path to the `afile` within extracted `archive` It does not actually extract any archive """ return opj(self.path, urlunquote(afile))
def get_url_path(url): """Given a file:// url, return the path itself""" return urlunquote(urlsplit(url).path)