def lookup_revision_message(rev_sha1_git): """Return the raw message of the revision with sha1 revision_sha1_git. Args: revision_sha1_git: The revision's sha1 as hexadecimal Returns: Decoded revision message as dict {'message': <the_message>} Raises: ValueError if the identifier provided is not of sha1 nature. NotFoundExc if the revision is not found, or if it has no message """ sha1_git_bin = _to_sha1_bin(rev_sha1_git) revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: raise NotFoundExc('Revision with sha1_git %s not found.' % rev_sha1_git) if 'message' not in revision: raise NotFoundExc('No message for revision with sha1_git %s.' % rev_sha1_git) res = {'message': revision['message']} return res
def lookup_revision_with_context(sha1_git_root, sha1_git, limit=100): """Return information about revision sha1_git, limited to the sub-graph of all transitive parents of sha1_git_root. In other words, sha1_git is an ancestor of sha1_git_root. Args: sha1_git_root: latest revision. The type is either a sha1 (as an hex string) or a non converted dict. sha1_git: one of sha1_git_root's ancestors limit: limit the lookup to 100 revisions back Returns: Information on sha1_git if it is an ancestor of sha1_git_root including children leading to sha1_git_root Raises: BadInputExc in case of unknown algo_hash or bad hash NotFoundExc if either revision is not found or if sha1_git is not an ancestor of sha1_git_root """ sha1_git_bin = _to_sha1_bin(sha1_git) revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: raise NotFoundExc('Revision %s not found' % sha1_git) if isinstance(sha1_git_root, str): sha1_git_root_bin = _to_sha1_bin(sha1_git_root) revision_root = _first_element(storage.revision_get([sha1_git_root_bin])) # noqa if not revision_root: raise NotFoundExc('Revision root %s not found' % sha1_git_root) else: sha1_git_root_bin = sha1_git_root['id'] revision_log = storage.revision_log([sha1_git_root_bin], limit) parents = {} children = defaultdict(list) for rev in revision_log: rev_id = rev['id'] parents[rev_id] = [] for parent_id in rev['parents']: parents[rev_id].append(parent_id) children[parent_id].append(rev_id) if revision['id'] not in parents: raise NotFoundExc('Revision %s is not an ancestor of %s' % (sha1_git, sha1_git_root)) revision['children'] = children[revision['id']] return converters.from_revision(revision)
def get_save_origin_requests(visit_type, origin_url): """ Get all save requests for a given software origin. Args: visit_type (str): the type of visit origin_url (str): the url of the origin Raises: BadInputExc: the visit type or origin url is invalid swh.web.common.exc.NotFoundExc: no save requests can be found for the given origin Returns: list: A list of save origin requests dict as described in :func:`swh.web.common.origin_save.create_save_origin_request` """ _check_visit_type_savable(visit_type) _check_origin_url_valid(origin_url) sors = SaveOriginRequest.objects.filter(visit_type=visit_type, origin_url=origin_url) if sors.count() == 0: raise NotFoundExc( ("No save requests found for visit of type " "%s on origin with url %s.") % (visit_type, origin_url)) return get_save_origin_requests_from_queryset(sors)
def lookup_directory_with_path(sha1_git, path_string): """Return directory information for entry with path path_string w.r.t. root directory pointed by directory_sha1_git Args: - directory_sha1_git: sha1_git corresponding to the directory to which we append paths to (hopefully) find the entry - the relative path to the entry starting from the directory pointed by directory_sha1_git Raises: NotFoundExc if the directory entry is not found """ sha1_git_bin = _to_sha1_bin(sha1_git) _check_directory_exists(sha1_git, sha1_git_bin) paths = path_string.strip(os.path.sep).split(os.path.sep) queried_dir = storage.directory_entry_get_by_path( sha1_git_bin, list(map(lambda p: p.encode('utf-8'), paths))) if not queried_dir: raise NotFoundExc(('Directory entry with path %s from %s not found') % (path_string, sha1_git)) return converters.from_directory_entry(queried_dir)
def _lookup_revision_id_by(origin_id, branch_name, timestamp): def _get_snapshot_branch(snapshot, branch_name): snapshot = lookup_snapshot(visit['snapshot'], branches_from=branch_name, branches_count=10) branch = None if branch_name in snapshot['branches']: branch = snapshot['branches'][branch_name] return branch visit = get_origin_visit({'id': origin_id}, visit_ts=timestamp) branch = _get_snapshot_branch(visit['snapshot'], branch_name) rev_id = None if branch and branch['target_type'] == 'revision': rev_id = branch['target'] elif branch and branch['target_type'] == 'alias': branch = _get_snapshot_branch(visit['snapshot'], branch['target']) if branch and branch['target_type'] == 'revision': rev_id = branch['target'] if not rev_id: raise NotFoundExc('Revision for origin %s and branch %s not found.' % (origin_id, branch_name)) return rev_id
def lookup_snapshot(snapshot_id, branches_from='', branches_count=1000, target_types=None): """Return information about a snapshot, aka the list of named branches found during a specific visit of an origin. Args: snapshot_id (str): sha1 identifier of the snapshot branches_from (str): optional parameter used to skip branches whose name is lesser than it before returning them branches_count (int): optional parameter used to restrain the amount of returned branches target_types (list): optional parameter used to filter the target types of branch to return (possible values that can be contained in that list are `'content', 'directory', 'revision', 'release', 'snapshot', 'alias'`) Returns: A dict filled with the snapshot content. """ snapshot_id_bin = _to_sha1_bin(snapshot_id) snapshot = storage.snapshot_get_branches(snapshot_id_bin, branches_from.encode(), branches_count, target_types) if not snapshot: raise NotFoundExc('Snapshot with id %s not found!' % snapshot_id) return converters.from_snapshot(snapshot)
def get_origin_info(origin_url, origin_type=None): """ Get info about a software origin. Its main purpose is to automatically find an origin type when it is not provided as parameter. Args: origin_url (str): complete url of a software origin origin_type (str): optional origin type Returns: A dict with the following entries: * type: the origin type * url: the origin url * id: the internal id of the origin """ if origin_type: return service.lookup_origin({'type': origin_type, 'url': origin_url}) else: for origin_type in _swh_origin_types: try: origin_info = service.lookup_origin({ 'type': origin_type, 'url': origin_url }) return origin_info except Exception: pass raise NotFoundExc('Origin with url %s not found!' % origin_url)
def lookup_directory_with_path(sha1_git: str, path: str) -> Dict[str, Any]: """Return directory information for entry with specified path w.r.t. root directory pointed by sha1_git Args: sha1_git: sha1_git corresponding to the directory to which we append paths to (hopefully) find the entry path: the relative path to the entry starting from the root directory pointed by sha1_git Returns: Directory entry information as dict. Raises: NotFoundExc if the directory entry is not found """ sha1_git_bin = _to_sha1_bin(sha1_git) _check_directory_exists(sha1_git, sha1_git_bin) paths = path.strip(os.path.sep).split(os.path.sep) queried_dir = storage.directory_entry_get_by_path( sha1_git_bin, [p.encode("utf-8") for p in paths]) if not queried_dir: raise NotFoundExc( f"Directory entry with path {path} from root directory {sha1_git} not found" ) return converters.from_directory_entry(queried_dir)
def lookup_content_raw(q: str) -> Dict[str, Any]: """Lookup the content defined by q. Args: q: query string of the form <hash_algo:hash> Returns: dict with 'sha1' and 'data' keys. data representing its raw data decoded. Raises: NotFoundExc if the requested content is not found or if the content bytes are not available in the storage """ c = lookup_content(q) content_sha1_bytes = hashutil.hash_to_bytes(c["checksums"]["sha1"]) content_data = storage.content_get_data(content_sha1_bytes) if content_data is None: algo, hash_ = query.parse_hash(q) raise NotFoundExc( f"Bytes of content with {algo} checksum equals " f"to {hashutil.hash_to_hex(hash_)} are not available!") return converters.from_content({ "sha1": content_sha1_bytes, "data": content_data })
def lookup_directory_through_revision(revision, path=None, limit=100, with_data=False): """Retrieve the directory information from the revision. Args: revision: dictionary of criterion representing a revision to lookup path: directory's path to lookup. limit: optional query parameter to limit the revisions log (default to 100). For now, note that this limit could impede the transitivity conclusion about sha1_git not being an ancestor of. with_data: indicate to retrieve the content's raw data if path resolves to a content. Returns: The directory pointing to by the revision criterions at path. """ rev = lookup_revision_through(revision, limit) if not rev: raise NotFoundExc("Revision with criterion %s not found!" % revision) return (rev["id"], lookup_directory_with_revision(rev["id"], path, with_data))
def lookup_origin(origin: OriginInfo) -> OriginInfo: """Return information about the origin matching dict origin. Args: origin: origin's dict with 'url' key Returns: origin information as dict. """ origin_urls = [origin["url"]] if origin["url"]: # handle case when user provided an origin url with a trailing # slash while the url in storage does not have it (e.g. GitHub) if origin["url"].endswith("/"): origin_urls.append(origin["url"][:-1]) # handle case when user provided an origin url without a trailing # slash while the url in storage have it (e.g. Debian source package) else: origin_urls.append(f"{origin['url']}/") try: # handle case where the "://" character sequence was mangled into ":/" parsed_url = urlparse(origin["url"]) if (parsed_url.scheme and not parsed_url.netloc and origin["url"].startswith(f"{parsed_url.scheme}:/") and not origin["url"].startswith(f"{parsed_url.scheme}://")): origin_urls.append(origin["url"].replace( f"{parsed_url.scheme}:/", f"{parsed_url.scheme}://")) except Exception: pass origins = [o for o in storage.origin_get(origin_urls) if o is not None] if not origins: msg = "Origin with url %s not found!" % origin["url"] raise NotFoundExc(msg) return converters.from_origin(origins[0].to_dict())
def test_browse_directory_snapshot_not_found(client, mocker, origin): mock_get_snapshot_context = mocker.patch( "swh.web.browse.snapshot_context.get_snapshot_context" ) mock_get_snapshot_context.side_effect = NotFoundExc("Snapshot not found") url = reverse("browse-origin-directory", query_params={"origin_url": origin["url"]}) resp = check_html_get_response( client, url, status_code=404, template_used="error.html" ) assert_contains(resp, "Snapshot not found", status_code=404) assert mock_get_snapshot_context.called
def lookup_revision_message(rev_sha1_git) -> Dict[str, bytes]: """Return the raw message of the revision with sha1 revision_sha1_git. Args: revision_sha1_git: The revision's sha1 as hexadecimal Returns: Decoded revision message as dict {'message': <the_message>} Raises: ValueError if the identifier provided is not of sha1 nature. NotFoundExc if the revision is not found, or if it has no message """ sha1_git_bin = _to_sha1_bin(rev_sha1_git) revision = storage.revision_get([sha1_git_bin])[0] if not revision: raise NotFoundExc(f"Revision with sha1_git {rev_sha1_git} not found.") if not revision.message: raise NotFoundExc( f"No message for revision with sha1_git {rev_sha1_git}.") return {"message": revision.message}
def test_content_bytes_missing(client, archive_data, mocker, content): mock_archive = mocker.patch("swh.web.browse.utils.archive") content_data = archive_data.content_get(content["sha1"]) mock_archive.lookup_content.return_value = content_data mock_archive.lookup_content_filetype.side_effect = Exception() mock_archive.lookup_content_raw.side_effect = NotFoundExc( "Content bytes not available!") url = reverse("browse-content", url_args={"query_string": content["sha1"]}) check_html_get_response(client, url, status_code=404, template_used="browse/content.html")
def test_content_bytes_missing(self, mock_service, content): content_data = self.content_get_metadata(content['sha1']) content_data['data'] = None mock_service.lookup_content.return_value = content_data mock_service.lookup_content_raw.side_effect = NotFoundExc( 'Content bytes not available!') url = reverse('browse-content', url_args={'query_string': content['sha1']}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('browse/content.html')
def lookup_origin_visit(origin_id, visit_id): """Return information about visit visit_id with origin origin_id. Args: origin_id: origin concerned by the visit visit_id: the visit identifier to lookup Yields: The dict origin_visit concerned """ visit = storage.origin_visit_get_by(origin_id, visit_id) if not visit: raise NotFoundExc('Origin with id %s or its visit ' 'with id %s not found!' % (origin_id, visit_id)) return converters.from_origin_visit(visit)
def lookup_content(q): """Lookup the content designed by q. Args: q: The release's sha1 as hexadecimal Raises: NotFoundExc if the requested content is not found """ algo, hash = query.parse_hash(q) c = storage.content_find({algo: hash}) if not c: raise NotFoundExc('Content with %s checksum equals to %s not found!' % (algo, hashutil.hash_to_hex(hash))) return converters.from_content(c)
def api_lookup( lookup_fn: Callable[..., Any], *args: Any, notfound_msg: Optional[str] = "Object not found", enrich_fn: Optional[EnrichFunction] = None, request: Optional[HttpRequest] = None, **kwargs: Any, ): r""" Capture a redundant behavior of: - looking up the backend with a criteria (be it an identifier or checksum) passed to the function lookup_fn - if nothing is found, raise an NotFoundExc exception with error message notfound_msg. - Otherwise if something is returned: - either as list, map or generator, map the enrich_fn function to it and return the resulting data structure as list. - either as dict and pass to enrich_fn and return the dict enriched. Args: - lookup_fn: function expects one criteria and optional supplementary \*args. - \*args: supplementary arguments to pass to lookup_fn. - notfound_msg: if nothing matching the criteria is found, raise NotFoundExc with this error message. - enrich_fn: Function to use to enrich the result returned by lookup_fn. Default to the identity function if not provided. - request: Input HTTP request that will be provided as parameter to enrich_fn. Raises: NotFoundExp or whatever `lookup_fn` raises. """ def _enrich_fn_noop(x, request): return x if enrich_fn is None: enrich_fn = _enrich_fn_noop res = lookup_fn(*args, **kwargs) if res is None: raise NotFoundExc(notfound_msg) if isinstance(res, (list, GeneratorType)) or type(res) == map: return [enrich_fn(x, request=request) for x in res] return enrich_fn(res, request=request)
def lookup_content(q: str) -> Dict[str, Any]: """Lookup the content designed by q. Args: q: The release's sha1 as hexadecimal Raises: NotFoundExc if the requested content is not found """ algo, hash_ = query.parse_hash(q) c = _first_element(storage.content_find({algo: hash_})) if not c: hhex = hashutil.hash_to_hex(hash_) raise NotFoundExc( f"Content with {algo} checksum equals to {hhex} not found!") return converters.from_content(c.to_dict())
def _branch_not_found(branch_type, branch, branches, snapshot_id=None, origin_info=None, timestamp=None, visit_id=None): """ Utility function to raise an exception when a specified branch/release can not be found. """ if branch_type == 'branch': branch_type = 'Branch' branch_type_plural = 'branches' else: branch_type = 'Release' branch_type_plural = 'releases' if snapshot_id and len(branches) == 0: msg = 'Snapshot with id %s has an empty list' \ ' of %s!' % (snapshot_id, branch_type_plural) elif snapshot_id: msg = '%s %s for snapshot with id %s' \ ' not found!' % (branch_type, branch, snapshot_id) elif visit_id and len(branches) == 0: msg = 'Origin with type %s and url %s' \ ' for visit with id %s has an empty list' \ ' of %s!' % (origin_info['type'], origin_info['url'], visit_id, branch_type_plural) elif visit_id: msg = '%s %s associated to visit with' \ ' id %s for origin with type %s and url %s' \ ' not found!' % (branch_type, branch, visit_id, origin_info['type'], origin_info['url']) elif len(branches) == 0: msg = 'Origin with type %s and url %s' \ ' for visit with timestamp %s has an empty list' \ ' of %s!' % (origin_info['type'], origin_info['url'], timestamp, branch_type_plural) else: msg = '%s %s associated to visit with' \ ' timestamp %s for origin with type %s' \ ' and url %s not found!' % (branch_type, branch, timestamp, origin_info['type'], origin_info['url']) raise NotFoundExc(msg)
def lookup_person(person_id): """Return information about the person with id person_id. Args: person_id as string Returns: person information as dict. Raises: NotFoundExc if there is no person with the provided id. """ person = _first_element(storage.person_get([int(person_id)])) if not person: raise NotFoundExc('Person with id %s not found' % person_id) return converters.from_person(person)
def lookup_snapshot( snapshot_id: str, branches_from: str = "", branches_count: int = 1000, target_types: Optional[List[str]] = None, branch_name_include_substring: Optional[str] = None, branch_name_exclude_prefix: Optional[str] = "refs/pull/", ) -> Dict[str, Any]: """Return information about a snapshot, aka the list of named branches found during a specific visit of an origin. Args: snapshot_id: sha1 identifier of the snapshot branches_from: optional parameter used to skip branches whose name is lesser than it before returning them branches_count: optional parameter used to restrain the amount of returned branches target_types: optional parameter used to filter the target types of branch to return (possible values that can be contained in that list are `'content', 'directory', 'revision', 'release', 'snapshot', 'alias'`) branch_name_include_substring: if provided, only return branches whose name contains given substring branch_name_exclude_prefix: if provided, do not return branches whose name starts with given pattern Returns: A dict filled with the snapshot content. """ snapshot_id_bin = _to_sha1_bin(snapshot_id) partial_branches = storage.snapshot_get_branches( snapshot_id_bin, branches_from.encode(), branches_count, target_types, branch_name_include_substring.encode() if branch_name_include_substring else None, branch_name_exclude_prefix.encode() if branch_name_exclude_prefix else None, ) if not partial_branches: raise NotFoundExc(f"Snapshot with id {snapshot_id} not found!") return converters.from_partial_branches(partial_branches)
def api_content_raw(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/raw/ Get the raw content of a content object (aka a "blob"), as a byte sequence. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :query string filename: if provided, the downloaded content will get that filename :resheader Content-Type: application/octet-stream **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/raw/` """ # noqa def generate(content): yield content['data'] content_raw = service.lookup_content_raw(q) if not content_raw: raise NotFoundExc('Content %s is not found.' % q) filename = request.query_params.get('filename') if not filename: filename = 'content_%s_raw' % q.replace(':', '_') response = HttpResponse(generate(content_raw), content_type='application/octet-stream') response['Content-disposition'] = 'attachment; filename=%s' % filename return response
def test_api_revision_directory_ko_not_found(self, mock_rev_dir): # given mock_rev_dir.side_effect = NotFoundExc('Not found') # then rv = self.client.get('/api/1/revision/999/directory/some/path/to/dir/') self.assertEqual(rv.status_code, 404) self.assertEqual(rv['Content-Type'], 'application/json') self.assertEqual(rv.data, { 'exception': 'NotFoundExc', 'reason': 'Not found' }) mock_rev_dir.assert_called_once_with( {'sha1_git': '999'}, 'some/path/to/dir', '/api/1/revision/999/directory/some/path/to/dir/', with_data=False)
def lookup_release(release_sha1_git: str) -> Dict[str, Any]: """Return information about the release with sha1 release_sha1_git. Args: release_sha1_git: The release's sha1 as hexadecimal Returns: Release information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. """ sha1_git_bin = _to_sha1_bin(release_sha1_git) release = _first_element(storage.release_get([sha1_git_bin])) if not release: raise NotFoundExc( f"Release with sha1_git {release_sha1_git} not found.") return converters.from_release(release)
def lookup_revision(rev_sha1_git) -> Dict[str, Any]: """Return information about the revision with sha1 revision_sha1_git. Args: revision_sha1_git: The revision's sha1 as hexadecimal Returns: Revision information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. NotFoundExc if there is no revision with the provided sha1_git. """ sha1_git_bin = _to_sha1_bin(rev_sha1_git) revision = storage.revision_get([sha1_git_bin])[0] if not revision: raise NotFoundExc(f"Revision with sha1_git {rev_sha1_git} not found.") return converters.from_revision(revision)
def lookup_revision(rev_sha1_git): """Return information about the revision with sha1 revision_sha1_git. Args: revision_sha1_git: The revision's sha1 as hexadecimal Returns: Revision information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. NotFoundExc if there is no revision with the provided sha1_git. """ sha1_git_bin = _to_sha1_bin(rev_sha1_git) revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: raise NotFoundExc('Revision with sha1_git %s not found.' % rev_sha1_git) return converters.from_revision(revision)
def lookup_origin_visit(origin_url: str, visit_id: int) -> OriginVisitInfo: """Return information about visit visit_id with origin origin. Args: origin: origin concerned by the visit visit_id: the visit identifier to lookup Yields: The dict origin_visit concerned """ visit = storage.origin_visit_get_by(origin_url, visit_id) visit_status = storage.origin_visit_status_get_latest(origin_url, visit_id) if not visit: raise NotFoundExc( f"Origin {origin_url} or its visit with id {visit_id} not found!") return converters.from_origin_visit({ **visit_status.to_dict(), "type": visit.type })
def lookup_origin(origin): """Return information about the origin matching dict origin. Args: origin: origin's dict with keys either 'id' or ('type' AND 'url') Returns: origin information as dict. """ origin_info = storage.origin_get(origin) if not origin_info: if 'id' in origin and origin['id']: msg = 'Origin with id %s not found!' % origin['id'] else: msg = 'Origin with type %s and url %s not found!' % \ (origin['type'], origin['url']) raise NotFoundExc(msg) return converters.from_origin(origin_info)
def _lookup_revision_id_by(origin, branch_name, timestamp): def _get_snapshot_branch(snapshot, branch_name): snapshot = lookup_snapshot( visit["snapshot"], branches_from=branch_name, branches_count=10, branch_name_exclude_prefix=None, ) branch = None if branch_name in snapshot["branches"]: branch = snapshot["branches"][branch_name] return branch if isinstance(origin, int): origin = {"id": origin} elif isinstance(origin, str): origin = {"url": origin} else: raise TypeError('"origin" must be an int or a string.') from swh.web.common.origin_visits import get_origin_visit visit = get_origin_visit(origin, visit_ts=timestamp) branch = _get_snapshot_branch(visit["snapshot"], branch_name) rev_id = None if branch and branch["target_type"] == "revision": rev_id = branch["target"] elif branch and branch["target_type"] == "alias": branch = _get_snapshot_branch(visit["snapshot"], branch["target"]) if branch and branch["target_type"] == "revision": rev_id = branch["target"] if not rev_id: raise NotFoundExc("Revision for origin %s and branch %s not found." % (origin.get("url"), branch_name)) return rev_id