def lookup_release(release_sha1_git): """Return information about the release with sha1 release_sha1_git. Args: release_sha1_git: The release's sha1 as hexadecimal Returns: Release information as dict. Raises: ValueError if the identifier provided is not of sha1 nature. """ sha1_git_bin = _to_sha1_bin(release_sha1_git) release = _first_element(storage.release_get([sha1_git_bin])) if not release: raise NotFoundExc('Release with sha1_git %s not found.' % release_sha1_git) return converters.from_release(release)
def api_content_raw(request, q): """ .. http:get:: /api/1/content/[(hash_type):](hash)/raw/ Get the raw content of a content object (aka a "blob"), as a byte sequence. :param string hash_type: optional parameter specifying which hashing algorithm has been used to compute the content checksum. It can be either ``sha1``, ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not provided, it is assumed that the hashing algorithm used is ``sha1``. :param string hash: hexadecimal representation of the checksum value computed with the specified hashing algorithm. :query string filename: if provided, the downloaded content will get that filename :resheader Content-Type: application/octet-stream :statuscode 200: no error :statuscode 400: an invalid **hash_type** or **hash** has been provided :statuscode 404: requested content can not be found in the archive **Example:** .. parsed-literal:: :swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/raw/` """ def generate(content): yield content["data"] content_raw = archive.lookup_content_raw(q) if not content_raw: raise NotFoundExc("Content %s is not found." % q) filename = request.query_params.get("filename") if not filename: filename = "content_%s_raw" % q.replace(":", "_") response = HttpResponse(generate(content_raw), content_type="application/octet-stream") response["Content-disposition"] = "attachment; filename=%s" % filename return response
def lookup_origin_visit(origin_url: str, visit_id: int) -> OriginVisitInfo: """Return information about visit visit_id with origin origin. Args: origin: origin concerned by the visit visit_id: the visit identifier to lookup Yields: The dict origin_visit concerned """ visit = storage.origin_visit_get_by(origin_url, visit_id) visit_status = storage.origin_visit_status_get_latest(origin_url, visit_id) if not visit: raise NotFoundExc( f"Origin {origin_url} or its visit with id {visit_id} not found!") return converters.from_origin_visit({ **visit_status.to_dict(), "type": visit.type })
def lookup_origin(origin): """Return information about the origin matching dict origin. Args: origin: origin's dict with keys either 'id' or ('type' AND 'url') Returns: origin information as dict. """ origin_info = storage.origin_get(origin) if not origin_info: if 'id' in origin and origin['id']: msg = 'Origin with id %s not found!' % origin['id'] else: msg = 'Origin with type %s and url %s not found!' % \ (origin['type'], origin['url']) raise NotFoundExc(msg) return converters.from_origin(origin_info)
def lookup_directory_through_revision(revision, path=None, limit=100, with_data=False): """Retrieve the directory information from the revision. Args: revision: dictionary of criterion representing a revision to lookup path: directory's path to lookup. limit: optional query parameter to limit the revisions log (default to 100). For now, note that this limit could impede the transitivity conclusion about sha1_git not being an ancestor of. with_data: indicate to retrieve the content's raw data if path resolves to a content. Returns: The directory pointing to by the revision criterions at path. """ rev = lookup_revision_through(revision, limit) if not rev: raise NotFoundExc('Revision with criterion %s not found!' % revision) return (rev['id'], lookup_directory_with_revision(rev['id'], path, with_data))
def api_lookup(lookup_fn, *args, notfound_msg='Object not found', enrich_fn=None): """ Capture a redundant behavior of: - looking up the backend with a criteria (be it an identifier or checksum) passed to the function lookup_fn - if nothing is found, raise an NotFoundExc exception with error message notfound_msg. - Otherwise if something is returned: - either as list, map or generator, map the enrich_fn function to it and return the resulting data structure as list. - either as dict and pass to enrich_fn and return the dict enriched. Args: - lookup_fn: function expects one criteria and optional supplementary \*args. - notfound_msg: if nothing matching the criteria is found, raise NotFoundExc with this error message. - enrich_fn: Function to use to enrich the result returned by lookup_fn. Default to the identity function if not provided. - \*args: supplementary arguments to pass to lookup_fn. Raises: NotFoundExp or whatever `lookup_fn` raises. """ # noqa if enrich_fn is None: enrich_fn = (lambda x: x) res = lookup_fn(*args) if res is None: raise NotFoundExc(notfound_msg) if isinstance(res, (map, list, GeneratorType)): return [enrich_fn(x) for x in res] return enrich_fn(res)
def _lookup_revision_id_by(origin, branch_name, timestamp): def _get_snapshot_branch(snapshot, branch_name): snapshot = lookup_snapshot( visit["snapshot"], branches_from=branch_name, branches_count=10, branch_name_exclude_prefix=None, ) branch = None if branch_name in snapshot["branches"]: branch = snapshot["branches"][branch_name] return branch if isinstance(origin, int): origin = {"id": origin} elif isinstance(origin, str): origin = {"url": origin} else: raise TypeError('"origin" must be an int or a string.') from swh.web.common.origin_visits import get_origin_visit visit = get_origin_visit(origin, visit_ts=timestamp) branch = _get_snapshot_branch(visit["snapshot"], branch_name) rev_id = None if branch and branch["target_type"] == "revision": rev_id = branch["target"] elif branch and branch["target_type"] == "alias": branch = _get_snapshot_branch(visit["snapshot"], branch["target"]) if branch and branch["target_type"] == "revision": rev_id = branch["target"] if not rev_id: raise NotFoundExc("Revision for origin %s and branch %s not found." % (origin.get("url"), branch_name)) return rev_id
def lookup_origin_intrinsic_metadata(origin_url: str) -> Dict[str, Any]: """Return intrinsic metadata for origin whose origin matches given origin. Args: origin_url: origin url Raises: NotFoundExc when the origin is not found Returns: origin metadata. """ origins = [origin_url] origin_info = storage.origin_get(origins)[0] if not origin_info: raise NotFoundExc(f"Origin with url {origin_url} not found!") match = _first_element(idx_storage.origin_intrinsic_metadata_get(origins)) result = {} if match: result = match.metadata return result
def lookup_content_raw(q): """Lookup the content defined by q. Args: q: query string of the form <hash_algo:hash> Returns: dict with 'sha1' and 'data' keys. data representing its raw data decoded. Raises: NotFoundExc if the requested content is not found or if the content bytes are not available in the storage """ c = lookup_content(q) content_sha1_bytes = hashutil.hash_to_bytes(c['checksums']['sha1']) content = _first_element(storage.content_get([content_sha1_bytes])) if not content: algo, hash = query.parse_hash(q) raise NotFoundExc('Bytes of content with %s checksum equals to %s ' 'are not available!' % (algo, hashutil.hash_to_hex(hash))) return converters.from_content(content)
def directory_browse(request, sha1_git, path=None): """Django view for browsing the content of a directory identified by its sha1_git value. The url that points to it is :http:get:`/browse/directory/(sha1_git)/[(path)/]` """ # noqa root_sha1_git = sha1_git try: if path: dir_info = service.lookup_directory_with_path(sha1_git, path) # some readme files can reference assets reachable from the # browsed directory, handle that special case in order to # correctly displayed them if dir_info and dir_info['type'] == 'file': file_raw_url = reverse( 'browse-content-raw', url_args={'query_string': dir_info['checksums']['sha1']}) return redirect(file_raw_url) sha1_git = dir_info['target'] dirs, files = get_directory_entries(sha1_git) origin_type = request.GET.get('origin_type', None) origin_url = request.GET.get('origin_url', None) if not origin_url: origin_url = request.GET.get('origin', None) snapshot_context = None if origin_url: try: snapshot_context = get_snapshot_context( None, origin_type, origin_url) except Exception: raw_dir_url = reverse('browse-directory', url_args={'sha1_git': sha1_git}) error_message = \ ('The Software Heritage archive has a directory ' 'with the hash you provided but the origin ' 'mentioned in your request appears broken: %s. ' 'Please check the URL and try again.\n\n' 'Nevertheless, you can still browse the directory ' 'without origin information: %s' % (gen_link(origin_url), gen_link(raw_dir_url))) raise NotFoundExc(error_message) if snapshot_context: snapshot_context['visit_info'] = None except Exception as exc: return handle_view_exception(request, exc) path_info = gen_path_info(path) query_params = {'origin': origin_url} breadcrumbs = [] breadcrumbs.append({ 'name': root_sha1_git[:7], 'url': reverse('browse-directory', url_args={'sha1_git': root_sha1_git}, query_params=query_params) }) for pi in path_info: breadcrumbs.append({ 'name': pi['name'], 'url': reverse('browse-directory', url_args={ 'sha1_git': root_sha1_git, 'path': pi['path'] }, query_params=query_params) }) path = '' if path is None else (path + '/') for d in dirs: if d['type'] == 'rev': d['url'] = reverse('browse-revision', url_args={'sha1_git': d['target']}, query_params=query_params) else: d['url'] = reverse('browse-directory', url_args={ 'sha1_git': root_sha1_git, 'path': path + d['name'] }, query_params=query_params) sum_file_sizes = 0 readmes = {} for f in files: query_string = 'sha1_git:' + f['target'] f['url'] = reverse('browse-content', url_args={'query_string': query_string}, query_params={ 'path': root_sha1_git + '/' + path + f['name'], 'origin': origin_url }) if f['length'] is not None: sum_file_sizes += f['length'] f['length'] = filesizeformat(f['length']) if f['name'].lower().startswith('readme'): readmes[f['name']] = f['checksums']['sha1'] readme_name, readme_url, readme_html = get_readme_to_display(readmes) sum_file_sizes = filesizeformat(sum_file_sizes) dir_metadata = { 'id': sha1_git, 'number of regular files': len(files), 'number of subdirectories': len(dirs), 'sum of regular file sizes': sum_file_sizes } vault_cooking = { 'directory_context': True, 'directory_id': sha1_git, 'revision_context': False, 'revision_id': None } swh_ids = get_swh_persistent_ids([{'type': 'directory', 'id': sha1_git}]) heading = 'Directory - %s' % sha1_git if breadcrumbs: dir_path = '/'.join([bc['name'] for bc in breadcrumbs]) + '/' heading += ' - %s' % dir_path return render( request, 'browse/directory.html', { 'heading': heading, 'swh_object_id': swh_ids[0]['swh_id'], 'swh_object_name': 'Directory', 'swh_object_metadata': dir_metadata, 'dirs': dirs, 'files': files, 'breadcrumbs': breadcrumbs, 'top_right_link': None, 'readme_name': readme_name, 'readme_url': readme_url, 'readme_html': readme_html, 'snapshot_context': snapshot_context, 'vault_cooking': vault_cooking, 'show_actions_menu': True, 'swh_ids': swh_ids })
def release_browse(request, sha1_git): """ Django view that produces an HTML display of a release identified by its id. The url that points to it is :http:get:`/browse/release/(sha1_git)/`. """ try: release = service.lookup_release(sha1_git) snapshot_context = None origin_info = None snapshot_id = request.GET.get('snapshot_id', None) origin_type = request.GET.get('origin_type', None) origin_url = request.GET.get('origin_url', None) if not origin_url: origin_url = request.GET.get('origin', None) timestamp = request.GET.get('timestamp', None) visit_id = request.GET.get('visit_id', None) if origin_url: try: snapshot_context = \ get_snapshot_context(snapshot_id, origin_type, origin_url, timestamp, visit_id) except Exception: raw_rel_url = reverse('browse-release', url_args={'sha1_git': sha1_git}) error_message = \ ('The Software Heritage archive has a release ' 'with the hash you provided but the origin ' 'mentioned in your request appears broken: %s. ' 'Please check the URL and try again.\n\n' 'Nevertheless, you can still browse the release ' 'without origin information: %s' % (gen_link(origin_url), gen_link(raw_rel_url))) raise NotFoundExc(error_message) origin_info = snapshot_context['origin_info'] elif snapshot_id: snapshot_context = get_snapshot_context(snapshot_id) except Exception as exc: return handle_view_exception(request, exc) release_data = {} author_name = 'None' release_data['author'] = 'None' if release['author']: author_name = release['author']['name'] or \ release['author']['fullname'] release_data['author'] = \ gen_person_link(release['author']['id'], author_name, snapshot_context) release_data['date'] = format_utc_iso_date(release['date']) release_data['id'] = sha1_git release_data['name'] = release['name'] release_data['synthetic'] = release['synthetic'] release_data['target type'] = release['target_type'] if release['target_type'] == 'revision': release_data['target'] = \ gen_revision_link(release['target'], snapshot_context=snapshot_context) elif release['target_type'] == 'content': content_url = \ reverse('browse-content', url_args={'query_string': 'sha1_git:' + release['target']}) release_data['target'] = gen_link(content_url, release['target']) elif release['target_type'] == 'directory': directory_url = \ reverse('browse-directory', url_args={'sha1_git': release['target']}) release_data['target'] = gen_link(directory_url, release['target']) elif release['target_type'] == 'release': release_url = \ reverse('browse-release', url_args={'sha1_git': release['target']}) release_data['target'] = gen_link(release_url, release['target']) release_note_lines = [] if release['message']: release_note_lines = release['message'].split('\n') vault_cooking = None query_params = {} if snapshot_id: query_params = {'snapshot_id': snapshot_id} elif origin_info: query_params = {'origin': origin_info['url']} target_url = '' if release['target_type'] == 'revision': target_url = reverse('browse-revision', url_args={'sha1_git': release['target']}, query_params=query_params) try: revision = service.lookup_revision(release['target']) vault_cooking = { 'directory_context': True, 'directory_id': revision['directory'], 'revision_context': True, 'revision_id': release['target'] } except Exception: pass elif release['target_type'] == 'directory': target_url = reverse('browse-directory', url_args={'sha1_git': release['target']}, query_params=query_params) try: revision = service.lookup_directory(release['target']) vault_cooking = { 'directory_context': True, 'directory_id': revision['directory'], 'revision_context': False, 'revision_id': None } except Exception: pass elif release['target_type'] == 'content': target_url = reverse('browse-content', url_args={'query_string': release['target']}, query_params=query_params) elif release['target_type'] == 'release': target_url = reverse('browse-release', url_args={'sha1_git': release['target']}, query_params=query_params) release['target_url'] = target_url if snapshot_context: release_data['snapshot id'] = snapshot_context['snapshot_id'] if origin_info: release_url = reverse('browse-release', url_args={'sha1_git': release['id']}) release_data['context-independent release'] = \ gen_link(release_url, link_text='Browse', link_attrs={'class': 'btn btn-default btn-sm', 'role': 'button'}) release_data['origin id'] = origin_info['id'] release_data['origin type'] = origin_info['type'] release_data['origin url'] = gen_link(origin_info['url'], origin_info['url']) browse_snapshot_link = \ gen_snapshot_link(snapshot_context['snapshot_id'], link_text='Browse', link_attrs={'class': 'btn btn-default btn-sm', 'role': 'button'}) release_data['snapshot'] = browse_snapshot_link swh_objects = [{'type': 'release', 'id': sha1_git}] if snapshot_context: snapshot_id = snapshot_context['snapshot_id'] if snapshot_id: swh_objects.append({'type': 'snapshot', 'id': snapshot_id}) swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context) note_header = 'None' if len(release_note_lines) > 0: note_header = release_note_lines[0] release['note_header'] = note_header release['note_body'] = '\n'.join(release_note_lines[1:]) heading = 'Release - %s' % release['name'] if snapshot_context: context_found = 'snapshot: %s' % snapshot_context['snapshot_id'] if origin_info: context_found = 'origin: %s' % origin_info['url'] heading += ' - %s' % context_found return render(request, 'browse/release.html', {'heading': heading, 'swh_object_id': swh_ids[0]['swh_id'], 'swh_object_name': 'Release', 'swh_object_metadata': release_data, 'release': release, 'snapshot_context': snapshot_context, 'show_actions_menu': True, 'breadcrumbs': None, 'vault_cooking': vault_cooking, 'top_right_link': None, 'swh_ids': swh_ids})
def lookup_directory_with_revision(sha1_git, dir_path=None, with_data=False): """Return information on directory pointed by revision with sha1_git. If dir_path is not provided, display top level directory. Otherwise, display the directory pointed by dir_path (if it exists). Args: sha1_git: revision's hash. dir_path: optional directory pointed to by that revision. with_data: boolean that indicates to retrieve the raw data if the path resolves to a content. Default to False (for the api) Returns: Information on the directory pointed to by that revision. Raises: BadInputExc in case of unknown algo_hash or bad hash. NotFoundExc either if the revision is not found or the path referenced does not exist. NotImplementedError in case of dir_path exists but do not reference a type 'dir' or 'file'. """ sha1_git_bin = _to_sha1_bin(sha1_git) revision = storage.revision_get([sha1_git_bin])[0] if not revision: raise NotFoundExc(f"Revision {sha1_git} not found") dir_sha1_git_bin = revision.directory if dir_path: paths = dir_path.strip(os.path.sep).split(os.path.sep) entity = storage.directory_entry_get_by_path( dir_sha1_git_bin, list(map(lambda p: p.encode("utf-8"), paths))) if not entity: raise NotFoundExc( "Directory or File '%s' pointed to by revision %s not found" % (dir_path, sha1_git)) else: entity = {"type": "dir", "target": dir_sha1_git_bin} if entity["type"] == "dir": directory_entries = storage.directory_ls(entity["target"]) or [] return { "type": "dir", "path": "." if not dir_path else dir_path, "revision": sha1_git, "content": list(map(converters.from_directory_entry, directory_entries)), } elif entity["type"] == "file": # content content = _first_element( storage.content_find({"sha1_git": entity["target"]})) if not content: raise NotFoundExc(f"Content not found for revision {sha1_git}") content_d = content.to_dict() if with_data: data = storage.content_get_data(content.sha1) if data: content_d["data"] = data return { "type": "file", "path": "." if not dir_path else dir_path, "revision": sha1_git, "content": converters.from_content(content_d), } elif entity["type"] == "rev": # revision revision = storage.revision_get([entity["target"]])[0] return { "type": "rev", "path": "." if not dir_path else dir_path, "revision": sha1_git, "content": converters.from_revision(revision) if revision else None, } else: raise NotImplementedError("Entity of type %s not implemented." % entity["type"])
def lookup_revision_with_context(sha1_git_root: Union[str, Dict[str, Any], Revision], sha1_git: str, limit: int = 100) -> Dict[str, Any]: """Return information about revision sha1_git, limited to the sub-graph of all transitive parents of sha1_git_root. In other words, sha1_git is an ancestor of sha1_git_root. Args: sha1_git_root: latest revision. The type is either a sha1 (as an hex string) or a non converted dict. sha1_git: one of sha1_git_root's ancestors limit: limit the lookup to 100 revisions back Returns: Information on sha1_git if it is an ancestor of sha1_git_root including children leading to sha1_git_root Raises: BadInputExc in case of unknown algo_hash or bad hash NotFoundExc if either revision is not found or if sha1_git is not an ancestor of sha1_git_root """ sha1_git_bin = _to_sha1_bin(sha1_git) revision = storage.revision_get([sha1_git_bin])[0] if not revision: raise NotFoundExc(f"Revision {sha1_git} not found") if isinstance(sha1_git_root, str): sha1_git_root_bin = _to_sha1_bin(sha1_git_root) revision_root = storage.revision_get([sha1_git_root_bin])[0] if not revision_root: raise NotFoundExc(f"Revision root {sha1_git_root} not found") elif isinstance(sha1_git_root, Revision): sha1_git_root_bin = sha1_git_root.id else: sha1_git_root_bin = sha1_git_root["id"] revision_log = storage.revision_log([sha1_git_root_bin], limit) parents: Dict[str, List[str]] = {} children = defaultdict(list) for rev in revision_log: rev_id = rev["id"] parents[rev_id] = [] for parent_id in rev["parents"]: parents[rev_id].append(parent_id) children[parent_id].append(rev_id) if revision.id not in parents: raise NotFoundExc( f"Revision {sha1_git} is not an ancestor of {sha1_git_root}") revision_d = revision.to_dict() revision_d["children"] = children[revision.id] return converters.from_revision(revision_d)
def _check_directory_exists(sha1_git, sha1_git_bin): if len(list(storage.directory_missing([sha1_git_bin]))): raise NotFoundExc("Directory with sha1_git %s not found" % sha1_git)
def _directory_browse(request, sha1_git, path=None): root_sha1_git = sha1_git error_info = {"status_code": 200, "description": None} if path: try: dir_info = archive.lookup_directory_with_path(sha1_git, path) sha1_git = dir_info["target"] except NotFoundExc as e: error_info["status_code"] = 404 error_info["description"] = f"NotFoundExc: {str(e)}" sha1_git = None dirs, files = [], [] if sha1_git is not None: dirs, files = get_directory_entries(sha1_git) origin_url = request.GET.get("origin_url") if not origin_url: origin_url = request.GET.get("origin") snapshot_id = request.GET.get("snapshot") snapshot_context = None if origin_url is not None or snapshot_id is not None: try: snapshot_context = get_snapshot_context( snapshot_id=snapshot_id, origin_url=origin_url, branch_name=request.GET.get("branch"), release_name=request.GET.get("release"), revision_id=request.GET.get("revision"), path=path, ) except NotFoundExc as e: if str(e).startswith("Origin"): raw_dir_url = reverse( "browse-directory", url_args={"sha1_git": sha1_git} ) error_message = ( "The Software Heritage archive has a directory " "with the hash you provided but the origin " "mentioned in your request appears broken: %s. " "Please check the URL and try again.\n\n" "Nevertheless, you can still browse the directory " "without origin information: %s" % (gen_link(origin_url), gen_link(raw_dir_url)) ) raise NotFoundExc(error_message) else: raise e path_info = gen_path_info(path) query_params = snapshot_context["query_params"] if snapshot_context else {} breadcrumbs = [] breadcrumbs.append( { "name": root_sha1_git[:7], "url": reverse( "browse-directory", url_args={"sha1_git": root_sha1_git}, query_params={**query_params, "path": None}, ), } ) for pi in path_info: breadcrumbs.append( { "name": pi["name"], "url": reverse( "browse-directory", url_args={"sha1_git": root_sha1_git}, query_params={**query_params, "path": pi["path"],}, ), } ) path = "" if path is None else (path + "/") for d in dirs: if d["type"] == "rev": d["url"] = reverse( "browse-revision", url_args={"sha1_git": d["target"]}, query_params=query_params, ) else: d["url"] = reverse( "browse-directory", url_args={"sha1_git": root_sha1_git}, query_params={**query_params, "path": path + d["name"],}, ) sum_file_sizes = 0 readmes = {} for f in files: query_string = "sha1_git:" + f["target"] f["url"] = reverse( "browse-content", url_args={"query_string": query_string}, query_params={ **query_params, "path": root_sha1_git + "/" + path + f["name"], }, ) if f["length"] is not None: sum_file_sizes += f["length"] f["length"] = filesizeformat(f["length"]) if f["name"].lower().startswith("readme"): readmes[f["name"]] = f["checksums"]["sha1"] readme_name, readme_url, readme_html = get_readme_to_display(readmes) sum_file_sizes = filesizeformat(sum_file_sizes) dir_metadata = DirectoryMetadata( object_type=DIRECTORY, object_id=sha1_git, directory=root_sha1_git, nb_files=len(files), nb_dirs=len(dirs), sum_file_sizes=sum_file_sizes, root_directory=root_sha1_git, path=f"/{path}" if path else None, revision=None, revision_found=None, release=None, snapshot=None, ) vault_cooking = { "directory_context": True, "directory_id": sha1_git, "revision_context": False, "revision_id": None, } swh_objects = [SWHObjectInfo(object_type=DIRECTORY, object_id=sha1_git)] if snapshot_context: swh_objects.append( SWHObjectInfo( object_type=REVISION, object_id=snapshot_context["revision_id"] ) ) swh_objects.append( SWHObjectInfo( object_type=SNAPSHOT, object_id=snapshot_context["snapshot_id"] ) ) if snapshot_context["release_id"]: swh_objects.append( SWHObjectInfo( object_type=RELEASE, object_id=snapshot_context["release_id"] ) ) swhids_info = get_swhids_info(swh_objects, snapshot_context, dir_metadata) heading = "Directory - %s" % sha1_git if breadcrumbs: dir_path = "/".join([bc["name"] for bc in breadcrumbs]) + "/" heading += " - %s" % dir_path top_right_link = None if snapshot_context is not None and not snapshot_context["is_empty"]: history_url = reverse( "browse-revision-log", url_args={"sha1_git": snapshot_context["revision_id"]}, query_params=query_params, ) top_right_link = { "url": history_url, "icon": swh_object_icons["revisions history"], "text": "History", } return render( request, "browse/directory.html", { "heading": heading, "swh_object_id": swhids_info[0]["swhid"], "swh_object_name": "Directory", "swh_object_metadata": dir_metadata, "dirs": dirs, "files": files, "breadcrumbs": breadcrumbs, "top_right_link": top_right_link, "readme_name": readme_name, "readme_url": readme_url, "readme_html": readme_html, "snapshot_context": snapshot_context, "vault_cooking": vault_cooking, "show_actions": True, "swhids_info": swhids_info, "error_code": error_info["status_code"], "error_message": http_status_code_message.get(error_info["status_code"]), "error_description": error_info["description"], }, status=error_info["status_code"], )
def revision_browse(request, sha1_git, extra_path=None): """ Django view that produces an HTML display of a revision identified by its id. The url that points to it is :http:get:`/browse/revision/(sha1_git)/`. """ try: revision = service.lookup_revision(sha1_git) # some readme files can reference assets reachable from the # browsed directory, handle that special case in order to # correctly displayed them if extra_path: dir_info = \ service.lookup_directory_with_path(revision['directory'], extra_path) if dir_info and dir_info['type'] == 'file': file_raw_url = reverse( 'browse-content-raw', url_args={'query_string': dir_info['checksums']['sha1']}) return redirect(file_raw_url) origin_info = None snapshot_context = None origin_type = request.GET.get('origin_type', None) origin_url = request.GET.get('origin_url', None) if not origin_url: origin_url = request.GET.get('origin', None) timestamp = request.GET.get('timestamp', None) visit_id = request.GET.get('visit_id', None) snapshot_id = request.GET.get('snapshot_id', None) path = request.GET.get('path', None) dir_id = None dirs, files = None, None content_data = None if origin_url: try: snapshot_context = get_snapshot_context( None, origin_type, origin_url, timestamp, visit_id) except Exception: raw_rev_url = reverse('browse-revision', url_args={'sha1_git': sha1_git}) error_message = \ ('The Software Heritage archive has a revision ' 'with the hash you provided but the origin ' 'mentioned in your request appears broken: %s. ' 'Please check the URL and try again.\n\n' 'Nevertheless, you can still browse the revision ' 'without origin information: %s' % (gen_link(origin_url), gen_link(raw_rev_url))) raise NotFoundExc(error_message) origin_info = snapshot_context['origin_info'] snapshot_id = snapshot_context['snapshot_id'] elif snapshot_id: snapshot_context = get_snapshot_context(snapshot_id) if path: file_info = \ service.lookup_directory_with_path(revision['directory'], path) if file_info['type'] == 'dir': dir_id = file_info['target'] else: query_string = 'sha1_git:' + file_info['target'] content_data = request_content(query_string, raise_if_unavailable=False) else: dir_id = revision['directory'] if dir_id: path = '' if path is None else (path + '/') dirs, files = get_directory_entries(dir_id) except Exception as exc: return handle_view_exception(request, exc) revision_data = {} author_name = 'None' revision_data['author'] = 'None' if revision['author']: author_name = revision['author']['name'] or \ revision['author']['fullname'] revision_data['author'] = \ gen_person_link(revision['author']['id'], author_name, snapshot_context) revision_data['committer'] = 'None' if revision['committer']: revision_data['committer'] = \ gen_person_link(revision['committer']['id'], revision['committer']['name'], snapshot_context) revision_data['committer date'] = \ format_utc_iso_date(revision['committer_date']) revision_data['date'] = format_utc_iso_date(revision['date']) if snapshot_context: revision_data['snapshot id'] = snapshot_id revision_data['directory'] = \ gen_snapshot_directory_link(snapshot_context, sha1_git, link_text='Browse', link_attrs={'class': 'btn btn-default btn-sm', # noqa 'role': 'button'}) else: revision_data['directory'] = \ gen_directory_link(revision['directory'], link_text='Browse', link_attrs={'class': 'btn btn-default btn-sm', 'role': 'button'}) revision_data['id'] = sha1_git revision_data['merge'] = revision['merge'] revision_data['metadata'] = escape( json.dumps(revision['metadata'], sort_keys=True, indent=4, separators=(',', ': '))) if origin_info: revision_data['context-independent revision'] = \ gen_revision_link(sha1_git, link_text='Browse', link_attrs={'class': 'btn btn-default btn-sm', 'role': 'button'}) revision_data['origin id'] = origin_info['id'] revision_data['origin type'] = origin_info['type'] revision_data['origin url'] = gen_link(origin_info['url'], origin_info['url']) browse_snapshot_link = \ gen_snapshot_link(snapshot_id, link_text='Browse', link_attrs={'class': 'btn btn-default btn-sm', 'role': 'button'}) revision_data['snapshot'] = browse_snapshot_link parents = '' for p in revision['parents']: parent_link = gen_revision_link(p, snapshot_context=snapshot_context) parents += parent_link + '<br/>' revision_data['parents'] = mark_safe(parents) revision_data['synthetic'] = revision['synthetic'] revision_data['type'] = revision['type'] message_lines = ['None'] if revision['message']: message_lines = revision['message'].split('\n') parents = [] for p in revision['parents']: parent_url = gen_revision_url(p, snapshot_context) parents.append({'id': p, 'url': parent_url}) path_info = gen_path_info(path) query_params = { 'snapshot_id': snapshot_id, 'origin_type': origin_type, 'origin': origin_url, 'timestamp': timestamp, 'visit_id': visit_id } breadcrumbs = [] breadcrumbs.append({ 'name': revision['directory'][:7], 'url': reverse('browse-revision', url_args={'sha1_git': sha1_git}, query_params=query_params) }) for pi in path_info: query_params['path'] = pi['path'] breadcrumbs.append({ 'name': pi['name'], 'url': reverse('browse-revision', url_args={'sha1_git': sha1_git}, query_params=query_params) }) vault_cooking = { 'directory_context': False, 'directory_id': None, 'revision_context': True, 'revision_id': sha1_git } swh_objects = [{'type': 'revision', 'id': sha1_git}] content = None content_size = None mimetype = None language = None readme_name = None readme_url = None readme_html = None readmes = {} error_code = 200 error_message = '' error_description = '' if content_data: breadcrumbs[-1]['url'] = None content_size = content_data['length'] mimetype = content_data['mimetype'] if content_data['raw_data']: content_display_data = prepare_content_for_display( content_data['raw_data'], content_data['mimetype'], path) content = content_display_data['content_data'] language = content_display_data['language'] mimetype = content_display_data['mimetype'] query_params = {} if path: filename = path_info[-1]['name'] query_params['filename'] = path_info[-1]['name'] revision_data['filename'] = filename top_right_link = { 'url': reverse('browse-content-raw', url_args={'query_string': query_string}, query_params=query_params), 'icon': swh_object_icons['content'], 'text': 'Raw File' } swh_objects.append({'type': 'content', 'id': file_info['target']}) error_code = content_data['error_code'] error_message = content_data['error_message'] error_description = content_data['error_description'] else: for d in dirs: if d['type'] == 'rev': d['url'] = reverse('browse-revision', url_args={'sha1_git': d['target']}) else: query_params['path'] = path + d['name'] d['url'] = reverse('browse-revision', url_args={'sha1_git': sha1_git}, query_params=query_params) for f in files: query_params['path'] = path + f['name'] f['url'] = reverse('browse-revision', url_args={'sha1_git': sha1_git}, query_params=query_params) if f['length'] is not None: f['length'] = filesizeformat(f['length']) if f['name'].lower().startswith('readme'): readmes[f['name']] = f['checksums']['sha1'] readme_name, readme_url, readme_html = get_readme_to_display(readmes) top_right_link = { 'url': get_revision_log_url(sha1_git, snapshot_context), 'icon': swh_object_icons['revisions history'], 'text': 'History' } vault_cooking['directory_context'] = True vault_cooking['directory_id'] = dir_id swh_objects.append({'type': 'directory', 'id': dir_id}) diff_revision_url = reverse('diff-revision', url_args={'sha1_git': sha1_git}, query_params={ 'origin_type': origin_type, 'origin': origin_url, 'timestamp': timestamp, 'visit_id': visit_id }) if snapshot_id: swh_objects.append({'type': 'snapshot', 'id': snapshot_id}) swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context) heading = 'Revision - %s - %s' %\ (sha1_git[:7], textwrap.shorten(message_lines[0], width=70)) if snapshot_context: context_found = 'snapshot: %s' % snapshot_context['snapshot_id'] if origin_info: context_found = 'origin: %s' % origin_info['url'] heading += ' - %s' % context_found return render(request, 'browse/revision.html', { 'heading': heading, 'swh_object_id': swh_ids[0]['swh_id'], 'swh_object_name': 'Revision', 'swh_object_metadata': revision_data, 'message_header': message_lines[0], 'message_body': '\n'.join(message_lines[1:]), 'parents': parents, 'snapshot_context': snapshot_context, 'dirs': dirs, 'files': files, 'content': content, 'content_size': content_size, 'max_content_size': content_display_max_size, 'mimetype': mimetype, 'language': language, 'readme_name': readme_name, 'readme_url': readme_url, 'readme_html': readme_html, 'breadcrumbs': breadcrumbs, 'top_right_link': top_right_link, 'vault_cooking': vault_cooking, 'diff_revision_url': diff_revision_url, 'show_actions_menu': True, 'swh_ids': swh_ids, 'error_code': error_code, 'error_message': error_message, 'error_description': error_description }, status=error_code)
def request_content( query_string, max_size=content_display_max_size, re_encode=True, ): """Function that retrieves a content from the archive. Raw bytes content is first retrieved, then the content mime type. If the mime type is not stored in the archive, it will be computed using Python magic module. Args: query_string: a string of the form "[ALGO_HASH:]HASH" where optional ALGO_HASH can be either ``sha1``, ``sha1_git``, ``sha256``, or ``blake2s256`` (default to ``sha1``) and HASH the hexadecimal representation of the hash value max_size: the maximum size for a content to retrieve (default to 1MB, no size limit if None) Returns: A tuple whose first member corresponds to the content raw bytes and second member the content mime type Raises: NotFoundExc if the content is not found """ content_data = archive.lookup_content(query_string) filetype = None language = None # requests to the indexer db may fail so properly handle # those cases in order to avoid content display errors try: filetype = archive.lookup_content_filetype(query_string) language = archive.lookup_content_language(query_string) except Exception as exc: sentry_sdk.capture_exception(exc) mimetype = "unknown" encoding = "unknown" if filetype: mimetype = filetype["mimetype"] encoding = filetype["encoding"] # workaround when encountering corrupted data due to implicit # conversion from bytea to text in the indexer db (see T818) # TODO: Remove that code when all data have been correctly converted if mimetype.startswith("\\"): filetype = None if not max_size or content_data["length"] < max_size: try: content_raw = archive.lookup_content_raw(query_string) except Exception as exc: sentry_sdk.capture_exception(exc) raise NotFoundExc( "The bytes of the content are currently not available " "in the archive." ) else: content_data["raw_data"] = content_raw["data"] if not filetype: mimetype, encoding = get_mimetype_and_encoding_for_content( content_data["raw_data"] ) if re_encode: mimetype, encoding, raw_data = _re_encode_content( mimetype, encoding, content_data["raw_data"] ) content_data["raw_data"] = raw_data else: content_data["raw_data"] = None content_data["mimetype"] = mimetype content_data["encoding"] = encoding if language: content_data["language"] = language["lang"] else: content_data["language"] = "not detected" return content_data
def release_browse(request, sha1_git): """ Django view that produces an HTML display of a release identified by its id. The url that points to it is :http:get:`/browse/release/(sha1_git)/`. """ release = archive.lookup_release(sha1_git) snapshot_context = {} origin_info = None snapshot_id = request.GET.get("snapshot_id") if not snapshot_id: snapshot_id = request.GET.get("snapshot") origin_url = request.GET.get("origin_url") if not origin_url: origin_url = request.GET.get("origin") timestamp = request.GET.get("timestamp") visit_id = request.GET.get("visit_id") if origin_url: try: snapshot_context = get_snapshot_context( snapshot_id, origin_url, timestamp, visit_id, release_name=release["name"], ) except NotFoundExc as e: raw_rel_url = reverse("browse-release", url_args={"sha1_git": sha1_git}) error_message = ( "The Software Heritage archive has a release " "with the hash you provided but the origin " "mentioned in your request appears broken: %s. " "Please check the URL and try again.\n\n" "Nevertheless, you can still browse the release " "without origin information: %s" % (gen_link(origin_url), gen_link(raw_rel_url)) ) if str(e).startswith("Origin"): raise NotFoundExc(error_message) else: raise e origin_info = snapshot_context["origin_info"] elif snapshot_id: snapshot_context = get_snapshot_context( snapshot_id, release_name=release["name"] ) snapshot_id = snapshot_context.get("snapshot_id", None) release_metadata = ReleaseMetadata( object_type=RELEASE, object_id=sha1_git, release=sha1_git, author=release["author"]["fullname"] if release["author"] else "None", author_url=gen_person_mail_link(release["author"]) if release["author"] else "None", date=format_utc_iso_date(release["date"]), name=release["name"], synthetic=release["synthetic"], target=release["target"], target_type=release["target_type"], snapshot=snapshot_id, origin_url=origin_url, ) release_note_lines = [] if release["message"]: release_note_lines = release["message"].split("\n") swh_objects = [SWHObjectInfo(object_type=RELEASE, object_id=sha1_git)] vault_cooking = None rev_directory = None target_link = None if release["target_type"] == REVISION: target_link = gen_revision_link( release["target"], snapshot_context=snapshot_context, link_text=None, link_attrs=None, ) try: revision = archive.lookup_revision(release["target"]) rev_directory = revision["directory"] vault_cooking = { "directory_context": True, "directory_id": rev_directory, "revision_context": True, "revision_id": release["target"], } swh_objects.append( SWHObjectInfo(object_type=REVISION, object_id=release["target"]) ) swh_objects.append( SWHObjectInfo(object_type=DIRECTORY, object_id=rev_directory) ) except Exception as exc: sentry_sdk.capture_exception(exc) elif release["target_type"] == DIRECTORY: target_link = gen_directory_link( release["target"], snapshot_context=snapshot_context, link_text=None, link_attrs=None, ) try: # check directory exists archive.lookup_directory(release["target"]) vault_cooking = { "directory_context": True, "directory_id": release["target"], "revision_context": False, "revision_id": None, } swh_objects.append( SWHObjectInfo(object_type=DIRECTORY, object_id=release["target"]) ) except Exception as exc: sentry_sdk.capture_exception(exc) elif release["target_type"] == CONTENT: target_link = gen_content_link( release["target"], snapshot_context=snapshot_context, link_text=None, link_attrs=None, ) swh_objects.append( SWHObjectInfo(object_type=CONTENT, object_id=release["target"]) ) elif release["target_type"] == RELEASE: target_link = gen_release_link( release["target"], snapshot_context=snapshot_context, link_text=None, link_attrs=None, ) rev_directory_url = None if rev_directory is not None: if origin_info: rev_directory_url = reverse( "browse-origin-directory", query_params={ "origin_url": origin_info["url"], "release": release["name"], "snapshot": snapshot_id, }, ) elif snapshot_id: rev_directory_url = reverse( "browse-snapshot-directory", url_args={"snapshot_id": snapshot_id}, query_params={"release": release["name"]}, ) else: rev_directory_url = reverse( "browse-directory", url_args={"sha1_git": rev_directory} ) directory_link = None if rev_directory_url is not None: directory_link = gen_link(rev_directory_url, rev_directory) release["directory_link"] = directory_link release["target_link"] = target_link if snapshot_context: snapshot_id = snapshot_context["snapshot_id"] if snapshot_id: swh_objects.append(SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id)) swhids_info = get_swhids_info(swh_objects, snapshot_context) note_header = "None" if len(release_note_lines) > 0: note_header = release_note_lines[0] release["note_header"] = note_header release["note_body"] = "\n".join(release_note_lines[1:]) heading = "Release - %s" % release["name"] if snapshot_context: context_found = "snapshot: %s" % snapshot_context["snapshot_id"] if origin_info: context_found = "origin: %s" % origin_info["url"] heading += " - %s" % context_found return render( request, "browse/release.html", { "heading": heading, "swh_object_id": swhids_info[0]["swhid"], "swh_object_name": "Release", "swh_object_metadata": release_metadata, "release": release, "snapshot_context": snapshot_context, "show_actions": True, "breadcrumbs": None, "vault_cooking": vault_cooking, "top_right_link": None, "swhids_info": swhids_info, }, )
def content_display(request, query_string): """Django view that produces an HTML display of a content identified by its hash value. The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/` """ # noqa try: algo, checksum = query.parse_hash(query_string) checksum = hash_to_hex(checksum) content_data = request_content(query_string, raise_if_unavailable=False) origin_type = request.GET.get('origin_type', None) origin_url = request.GET.get('origin_url', None) if not origin_url: origin_url = request.GET.get('origin', None) snapshot_context = None if origin_url: try: snapshot_context = get_snapshot_context( None, origin_type, origin_url) except Exception: raw_cnt_url = reverse('browse-content', url_args={'query_string': query_string}) error_message = \ ('The Software Heritage archive has a content ' 'with the hash you provided but the origin ' 'mentioned in your request appears broken: %s. ' 'Please check the URL and try again.\n\n' 'Nevertheless, you can still browse the content ' 'without origin information: %s' % (gen_link(origin_url), gen_link(raw_cnt_url))) raise NotFoundExc(error_message) if snapshot_context: snapshot_context['visit_info'] = None except Exception as exc: return handle_view_exception(request, exc) path = request.GET.get('path', None) content = None language = None mimetype = None if content_data['raw_data'] is not None: content_display_data = prepare_content_for_display( content_data['raw_data'], content_data['mimetype'], path) content = content_display_data['content_data'] language = content_display_data['language'] mimetype = content_display_data['mimetype'] root_dir = None filename = None path_info = None query_params = {'origin': origin_url} breadcrumbs = [] if path: split_path = path.split('/') root_dir = split_path[0] filename = split_path[-1] if root_dir != path: path = path.replace(root_dir + '/', '') path = path[:-len(filename)] path_info = gen_path_info(path) dir_url = reverse('browse-directory', url_args={'sha1_git': root_dir}, query_params=query_params) breadcrumbs.append({'name': root_dir[:7], 'url': dir_url}) for pi in path_info: dir_url = reverse('browse-directory', url_args={ 'sha1_git': root_dir, 'path': pi['path'] }, query_params=query_params) breadcrumbs.append({'name': pi['name'], 'url': dir_url}) breadcrumbs.append({'name': filename, 'url': None}) query_params = {'filename': filename} content_raw_url = reverse('browse-content-raw', url_args={'query_string': query_string}, query_params=query_params) content_metadata = { 'sha1 checksum': content_data['checksums']['sha1'], 'sha1_git checksum': content_data['checksums']['sha1_git'], 'sha256 checksum': content_data['checksums']['sha256'], 'blake2s256 checksum': content_data['checksums']['blake2s256'], 'mime type': content_data['mimetype'], 'encoding': content_data['encoding'], 'size': filesizeformat(content_data['length']), 'language': content_data['language'], 'licenses': content_data['licenses'], 'filename': filename } if filename: content_metadata['filename'] = filename sha1_git = content_data['checksums']['sha1_git'] swh_ids = get_swh_persistent_ids([{'type': 'content', 'id': sha1_git}]) heading = 'Content - %s' % sha1_git if breadcrumbs: content_path = '/'.join([bc['name'] for bc in breadcrumbs]) heading += ' - %s' % content_path return render(request, 'browse/content.html', { 'heading': heading, 'swh_object_id': swh_ids[0]['swh_id'], 'swh_object_name': 'Content', 'swh_object_metadata': content_metadata, 'content': content, 'content_size': content_data['length'], 'max_content_size': content_display_max_size, 'mimetype': mimetype, 'language': language, 'breadcrumbs': breadcrumbs, 'top_right_link': { 'url': content_raw_url, 'icon': swh_object_icons['content'], 'text': 'Raw File' }, 'snapshot_context': snapshot_context, 'vault_cooking': None, 'show_actions_menu': True, 'swh_ids': swh_ids, 'error_code': content_data['error_code'], 'error_message': content_data['error_message'], 'error_description': content_data['error_description'] }, status=content_data['error_code'])
def lookup_directory_with_revision(sha1_git, dir_path=None, with_data=False): """Return information on directory pointed by revision with sha1_git. If dir_path is not provided, display top level directory. Otherwise, display the directory pointed by dir_path (if it exists). Args: sha1_git: revision's hash. dir_path: optional directory pointed to by that revision. with_data: boolean that indicates to retrieve the raw data if the path resolves to a content. Default to False (for the api) Returns: Information on the directory pointed to by that revision. Raises: BadInputExc in case of unknown algo_hash or bad hash. NotFoundExc either if the revision is not found or the path referenced does not exist. NotImplementedError in case of dir_path exists but do not reference a type 'dir' or 'file'. """ sha1_git_bin = _to_sha1_bin(sha1_git) revision = _first_element(storage.revision_get([sha1_git_bin])) if not revision: raise NotFoundExc('Revision %s not found' % sha1_git) dir_sha1_git_bin = revision['directory'] if dir_path: paths = dir_path.strip(os.path.sep).split(os.path.sep) entity = storage.directory_entry_get_by_path( dir_sha1_git_bin, list(map(lambda p: p.encode('utf-8'), paths))) if not entity: raise NotFoundExc( "Directory or File '%s' pointed to by revision %s not found" % (dir_path, sha1_git)) else: entity = {'type': 'dir', 'target': dir_sha1_git_bin} if entity['type'] == 'dir': directory_entries = storage.directory_ls(entity['target']) or [] return {'type': 'dir', 'path': '.' if not dir_path else dir_path, 'revision': sha1_git, 'content': list(map(converters.from_directory_entry, directory_entries))} elif entity['type'] == 'file': # content content = storage.content_find({'sha1_git': entity['target']}) if with_data: c = _first_element(storage.content_get([content['sha1']])) content['data'] = c['data'] return {'type': 'file', 'path': '.' if not dir_path else dir_path, 'revision': sha1_git, 'content': converters.from_content(content)} elif entity['type'] == 'rev': # revision revision = next(storage.revision_get([entity['target']])) return {'type': 'rev', 'path': '.' if not dir_path else dir_path, 'revision': sha1_git, 'content': converters.from_revision(revision)} else: raise NotImplementedError('Entity of type %s not implemented.' % entity['type'])
def test_origin_request_errors(self, mock_get_origin_info, mock_snapshot_service, mock_origin_service, mock_utils_service, mock_get_origin_visit_snapshot, mock_get_origin_visits, mock_request_content): mock_get_origin_info.side_effect = \ NotFoundExc('origin not found') url = reverse('browse-origin-visits', url_args={'origin_type': 'foo', 'origin_url': 'bar'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertContains(resp, 'origin not found', status_code=404) mock_utils_service.lookup_origin.side_effect = None mock_utils_service.lookup_origin.return_value = {'type': 'foo', 'url': 'bar', 'id': 457} mock_get_origin_visits.return_value = [] url = reverse('browse-origin-directory', url_args={'origin_type': 'foo', 'origin_url': 'bar'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertContains(resp, "No visit", status_code=404) mock_get_origin_visits.return_value = [{'visit': 1}] mock_get_origin_visit_snapshot.side_effect = \ NotFoundExc('visit not found') url = reverse('browse-origin-directory', url_args={'origin_type': 'foo', 'origin_url': 'bar'}, query_params={'visit_id': 2}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertRegex(resp.content.decode('utf-8'), 'Visit.*not found') mock_get_origin_visits.return_value = [{ 'date': '2015-09-26T09:30:52.373449+00:00', 'metadata': {}, 'origin': 457, 'snapshot': 'bdaf9ac436488a8c6cda927a0f44e172934d3f65', 'status': 'full', 'visit': 1 }] mock_get_origin_visit_snapshot.side_effect = None mock_get_origin_visit_snapshot.return_value = ( [{'directory': 'ae59ceecf46367e8e4ad800e231fc76adc3afffb', 'name': 'HEAD', 'revision': '7bc08e1aa0b08cb23e18715a32aa38517ad34672', 'date': '04 May 2017, 13:27 UTC', 'message': ''}], [] ) mock_utils_service.lookup_snapshot_size.return_value = { 'revision': 1, 'release': 0 } mock_utils_service.lookup_directory.side_effect = \ NotFoundExc('Directory not found') url = reverse('browse-origin-directory', url_args={'origin_type': 'foo', 'origin_url': 'bar'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertContains(resp, 'Directory not found', status_code=404) with patch('swh.web.browse.views.utils.snapshot_context.' 'get_snapshot_context') as mock_get_snapshot_context: mock_get_snapshot_context.side_effect = \ NotFoundExc('Snapshot not found') url = reverse('browse-origin-directory', url_args={'origin_type': 'foo', 'origin_url': 'bar'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertContains(resp, 'Snapshot not found', status_code=404) mock_origin_service.lookup_origin.side_effect = None mock_origin_service.lookup_origin.return_value = {'type': 'foo', 'url': 'bar', 'id': 457} mock_get_origin_visits.return_value = [] url = reverse('browse-origin-content', url_args={'origin_type': 'foo', 'origin_url': 'bar', 'path': 'foo'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertContains(resp, "No visit", status_code=404) mock_get_origin_visits.return_value = [{'visit': 1}] mock_get_origin_visit_snapshot.side_effect = \ NotFoundExc('visit not found') url = reverse('browse-origin-content', url_args={'origin_type': 'foo', 'origin_url': 'bar', 'path': 'foo'}, query_params={'visit_id': 2}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertRegex(resp.content.decode('utf-8'), 'Visit.*not found') mock_get_origin_visits.return_value = [{ 'date': '2015-09-26T09:30:52.373449+00:00', 'metadata': {}, 'origin': 457, 'snapshot': 'bdaf9ac436488a8c6cda927a0f44e172934d3f65', 'status': 'full', 'visit': 1 }] mock_get_origin_visit_snapshot.side_effect = None mock_get_origin_visit_snapshot.return_value = ([], []) url = reverse('browse-origin-content', url_args={'origin_type': 'foo', 'origin_url': 'bar', 'path': 'baz'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertRegex(resp.content.decode('utf-8'), 'Origin.*has an empty list of branches') mock_get_origin_visit_snapshot.return_value = ( [{'directory': 'ae59ceecf46367e8e4ad800e231fc76adc3afffb', 'name': 'HEAD', 'revision': '7bc08e1aa0b08cb23e18715a32aa38517ad34672', 'date': '04 May 2017, 13:27 UTC', 'message': ''}], [] ) mock_snapshot_service.lookup_directory_with_path.return_value = \ {'target': '5ecd9f37b7a2d2e9980d201acd6286116f2ba1f1'} mock_request_content.side_effect = \ NotFoundExc('Content not found') url = reverse('browse-origin-content', url_args={'origin_type': 'foo', 'origin_url': 'bar', 'path': 'baz'}) resp = self.client.get(url) self.assertEqual(resp.status_code, 404) self.assertTemplateUsed('error.html') self.assertContains(resp, 'Content not found', status_code=404)
def content_display(request, query_string): """Django view that produces an HTML display of a content identified by its hash value. The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/` """ algo, checksum = query.parse_hash(query_string) checksum = hash_to_hex(checksum) origin_url = request.GET.get("origin_url") selected_language = request.GET.get("language") if not origin_url: origin_url = request.GET.get("origin") snapshot_id = request.GET.get("snapshot") path = request.GET.get("path") content_data = {} error_info = {"status_code": 200, "description": None} try: content_data = request_content(query_string) except NotFoundExc as e: error_info["status_code"] = 404 error_info["description"] = f"NotFoundExc: {str(e)}" snapshot_context = None if origin_url is not None or snapshot_id is not None: try: snapshot_context = get_snapshot_context( origin_url=origin_url, snapshot_id=snapshot_id, branch_name=request.GET.get("branch"), release_name=request.GET.get("release"), revision_id=request.GET.get("revision"), path=path, browse_context=CONTENT, ) except NotFoundExc as e: if str(e).startswith("Origin"): raw_cnt_url = reverse("browse-content", url_args={"query_string": query_string}) error_message = ( "The Software Heritage archive has a content " "with the hash you provided but the origin " "mentioned in your request appears broken: %s. " "Please check the URL and try again.\n\n" "Nevertheless, you can still browse the content " "without origin information: %s" % (gen_link(origin_url), gen_link(raw_cnt_url))) raise NotFoundExc(error_message) else: raise e content = None language = None mimetype = None if content_data.get("raw_data") is not None: content_display_data = prepare_content_for_display( content_data["raw_data"], content_data["mimetype"], path) content = content_display_data["content_data"] language = content_display_data["language"] mimetype = content_display_data["mimetype"] # Override language with user-selected language if selected_language is not None: language = selected_language available_languages = None if mimetype and "text/" in mimetype: available_languages = highlightjs.get_supported_languages() filename = None path_info = None directory_id = None root_dir = None if snapshot_context: root_dir = snapshot_context.get("root_directory") query_params = snapshot_context["query_params"] if snapshot_context else {} breadcrumbs = [] if path: split_path = path.split("/") root_dir = root_dir or split_path[0] filename = split_path[-1] if root_dir != path: path = path.replace(root_dir + "/", "") path = path[:-len(filename)] path_info = gen_path_info(path) query_params.pop("path", None) dir_url = reverse( "browse-directory", url_args={"sha1_git": root_dir}, query_params=query_params, ) breadcrumbs.append({"name": root_dir[:7], "url": dir_url}) for pi in path_info: query_params["path"] = pi["path"] dir_url = reverse( "browse-directory", url_args={"sha1_git": root_dir}, query_params=query_params, ) breadcrumbs.append({"name": pi["name"], "url": dir_url}) breadcrumbs.append({"name": filename, "url": None}) if path and root_dir != path: dir_info = archive.lookup_directory_with_path(root_dir, path) directory_id = dir_info["target"] elif root_dir != path: directory_id = root_dir else: root_dir = None query_params = {"filename": filename} content_checksums = content_data.get("checksums", {}) content_url = reverse( "browse-content", url_args={"query_string": query_string}, ) content_raw_url = reverse( "browse-content-raw", url_args={"query_string": query_string}, query_params=query_params, ) content_metadata = ContentMetadata( object_type=CONTENT, object_id=content_checksums.get("sha1_git"), sha1=content_checksums.get("sha1"), sha1_git=content_checksums.get("sha1_git"), sha256=content_checksums.get("sha256"), blake2s256=content_checksums.get("blake2s256"), content_url=content_url, mimetype=content_data.get("mimetype"), encoding=content_data.get("encoding"), size=filesizeformat(content_data.get("length", 0)), language=content_data.get("language"), root_directory=root_dir, path=f"/{path}" if path else None, filename=filename or "", directory=directory_id, revision=None, release=None, snapshot=None, origin_url=origin_url, ) swh_objects = [ SWHObjectInfo(object_type=CONTENT, object_id=content_checksums.get("sha1_git")) ] if directory_id: swh_objects.append( SWHObjectInfo(object_type=DIRECTORY, object_id=directory_id)) if snapshot_context: swh_objects.append( SWHObjectInfo(object_type=REVISION, object_id=snapshot_context["revision_id"])) swh_objects.append( SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_context["snapshot_id"])) if snapshot_context["release_id"]: swh_objects.append( SWHObjectInfo(object_type=RELEASE, object_id=snapshot_context["release_id"])) swhids_info = get_swhids_info( swh_objects, snapshot_context, extra_context=content_metadata, ) heading = "Content - %s" % content_checksums.get("sha1_git") if breadcrumbs: content_path = "/".join([bc["name"] for bc in breadcrumbs]) heading += " - %s" % content_path return render( request, "browse/content.html", { "heading": heading, "swh_object_id": swhids_info[0]["swhid"], "swh_object_name": "Content", "swh_object_metadata": content_metadata, "content": content, "content_size": content_data.get("length"), "max_content_size": content_display_max_size, "filename": filename, "encoding": content_data.get("encoding"), "mimetype": mimetype, "language": language, "available_languages": available_languages, "breadcrumbs": breadcrumbs, "top_right_link": { "url": content_raw_url, "icon": swh_object_icons["content"], "text": "Raw File", }, "snapshot_context": snapshot_context, "vault_cooking": None, "show_actions": True, "swhids_info": swhids_info, "error_code": error_info["status_code"], "error_message": http_status_code_message.get(error_info["status_code"]), "error_description": error_info["description"], }, status=error_info["status_code"], )
def revision_browse(request, sha1_git): """ Django view that produces an HTML display of a revision identified by its id. The url that points to it is :http:get:`/browse/revision/(sha1_git)/`. """ revision = archive.lookup_revision(sha1_git) origin_info = None snapshot_context = None origin_url = request.GET.get("origin_url") if not origin_url: origin_url = request.GET.get("origin") timestamp = request.GET.get("timestamp") visit_id = request.GET.get("visit_id") snapshot_id = request.GET.get("snapshot_id") if not snapshot_id: snapshot_id = request.GET.get("snapshot") path = request.GET.get("path") dir_id = None dirs, files = [], [] content_data = {} if origin_url: try: snapshot_context = get_snapshot_context( snapshot_id=snapshot_id, origin_url=origin_url, timestamp=timestamp, visit_id=visit_id, branch_name=request.GET.get("branch"), release_name=request.GET.get("release"), revision_id=sha1_git, path=path, ) except NotFoundExc as e: raw_rev_url = reverse("browse-revision", url_args={"sha1_git": sha1_git}) error_message = ("The Software Heritage archive has a revision " "with the hash you provided but the origin " "mentioned in your request appears broken: %s. " "Please check the URL and try again.\n\n" "Nevertheless, you can still browse the revision " "without origin information: %s" % (gen_link(origin_url), gen_link(raw_rev_url))) if str(e).startswith("Origin"): raise NotFoundExc(error_message) else: raise e origin_info = snapshot_context["origin_info"] snapshot_id = snapshot_context["snapshot_id"] elif snapshot_id: snapshot_context = get_snapshot_context(snapshot_id) error_info = {"status_code": 200, "description": None} if path: try: file_info = archive.lookup_directory_with_path( revision["directory"], path) if file_info["type"] == "dir": dir_id = file_info["target"] else: query_string = "sha1_git:" + file_info["target"] content_data = request_content(query_string) except NotFoundExc as e: error_info["status_code"] = 404 error_info["description"] = f"NotFoundExc: {str(e)}" else: dir_id = revision["directory"] if dir_id: path = "" if path is None else (path + "/") dirs, files = get_directory_entries(dir_id) revision_metadata = RevisionMetadata( object_type=REVISION, object_id=sha1_git, revision=sha1_git, author=revision["author"]["fullname"] if revision["author"] else "None", author_url=gen_person_mail_link(revision["author"]) if revision["author"] else "None", committer=revision["committer"]["fullname"] if revision["committer"] else "None", committer_url=gen_person_mail_link(revision["committer"]) if revision["committer"] else "None", committer_date=format_utc_iso_date(revision["committer_date"]), date=format_utc_iso_date(revision["date"]), directory=revision["directory"], merge=revision["merge"], metadata=json.dumps(revision["metadata"], sort_keys=True, indent=4, separators=(",", ": ")), parents=revision["parents"], synthetic=revision["synthetic"], type=revision["type"], snapshot=snapshot_id, origin_url=origin_url, ) message_lines = ["None"] if revision["message"]: message_lines = revision["message"].split("\n") parents = [] for p in revision["parents"]: parent_url = gen_revision_url(p, snapshot_context) parents.append({"id": p, "url": parent_url}) path_info = gen_path_info(path) query_params = snapshot_context["query_params"] if snapshot_context else {} breadcrumbs = [] breadcrumbs.append({ "name": revision["directory"][:7], "url": reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ), }) for pi in path_info: query_params["path"] = pi["path"] breadcrumbs.append({ "name": pi["name"], "url": reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ), }) vault_cooking = { "directory_context": False, "directory_id": None, "revision_context": True, "revision_id": sha1_git, } swh_objects = [SWHObjectInfo(object_type=REVISION, object_id=sha1_git)] content = None content_size = None filename = None mimetype = None language = None readme_name = None readme_url = None readme_html = None readmes = {} extra_context = dict(revision_metadata) extra_context["path"] = f"/{path}" if path else None if content_data: breadcrumbs[-1]["url"] = None content_size = content_data["length"] mimetype = content_data["mimetype"] if content_data["raw_data"]: content_display_data = prepare_content_for_display( content_data["raw_data"], content_data["mimetype"], path) content = content_display_data["content_data"] language = content_display_data["language"] mimetype = content_display_data["mimetype"] if path: filename = path_info[-1]["name"] query_params["filename"] = filename filepath = "/".join(pi["name"] for pi in path_info[:-1]) extra_context["path"] = f"/{filepath}/" if filepath else "/" extra_context["filename"] = filename top_right_link = { "url": reverse( "browse-content-raw", url_args={"query_string": query_string}, query_params={"filename": filename}, ), "icon": swh_object_icons["content"], "text": "Raw File", } swh_objects.append( SWHObjectInfo(object_type=CONTENT, object_id=file_info["target"])) else: for d in dirs: if d["type"] == "rev": d["url"] = reverse("browse-revision", url_args={"sha1_git": d["target"]}) else: query_params["path"] = path + d["name"] d["url"] = reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ) for f in files: query_params["path"] = path + f["name"] f["url"] = reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ) if f["length"] is not None: f["length"] = filesizeformat(f["length"]) if f["name"].lower().startswith("readme"): readmes[f["name"]] = f["checksums"]["sha1"] readme_name, readme_url, readme_html = get_readme_to_display(readmes) top_right_link = { "url": get_revision_log_url(sha1_git, snapshot_context), "icon": swh_object_icons["revisions history"], "text": "History", } vault_cooking["directory_context"] = True vault_cooking["directory_id"] = dir_id swh_objects.append( SWHObjectInfo(object_type=DIRECTORY, object_id=dir_id)) query_params.pop("path", None) diff_revision_url = reverse( "diff-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ) if snapshot_id: swh_objects.append( SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id)) swhids_info = get_swhids_info(swh_objects, snapshot_context, extra_context) heading = "Revision - %s - %s" % ( sha1_git[:7], textwrap.shorten(message_lines[0], width=70), ) if snapshot_context: context_found = "snapshot: %s" % snapshot_context["snapshot_id"] if origin_info: context_found = "origin: %s" % origin_info["url"] heading += " - %s" % context_found return render( request, "browse/revision.html", { "heading": heading, "swh_object_id": swhids_info[0]["swhid"], "swh_object_name": "Revision", "swh_object_metadata": revision_metadata, "message_header": message_lines[0], "message_body": "\n".join(message_lines[1:]), "parents": parents, "snapshot_context": snapshot_context, "dirs": dirs, "files": files, "content": content, "content_size": content_size, "max_content_size": content_display_max_size, "filename": filename, "encoding": content_data.get("encoding"), "mimetype": mimetype, "language": language, "readme_name": readme_name, "readme_url": readme_url, "readme_html": readme_html, "breadcrumbs": breadcrumbs, "top_right_link": top_right_link, "vault_cooking": vault_cooking, "diff_revision_url": diff_revision_url, "show_actions": True, "swhids_info": swhids_info, "error_code": error_info["status_code"], "error_message": http_status_code_message.get(error_info["status_code"]), "error_description": error_info["description"], }, status=error_info["status_code"], )
def get_origin_visit(origin_info, visit_ts=None, visit_id=None, snapshot_id=None): """Function that returns information about a visit for a given origin. The visit is retrieved from a provided timestamp. The closest visit from that timestamp is selected. Args: origin_info (dict): a dict filled with origin information (id, url, type) visit_ts (int or str): an ISO date string or Unix timestamp to parse Returns: A dict containing the visit info as described below:: {'origin': 2, 'date': '2017-10-08T11:54:25.582463+00:00', 'metadata': {}, 'visit': 25, 'status': 'full'} """ visits = get_origin_visits(origin_info) if not visits: if 'type' in origin_info and 'url' in origin_info: message = ('No visit associated to origin with' ' type %s and url %s!' % (origin_info['type'], origin_info['url'])) else: message = ('No visit associated to origin with' ' id %s!' % origin_info['id']) raise NotFoundExc(message) if snapshot_id: visit = [v for v in visits if v['snapshot'] == snapshot_id] if len(visit) == 0: if 'type' in origin_info and 'url' in origin_info: message = ( 'Visit for snapshot with id %s for origin with type' ' %s and url %s not found!' % (snapshot_id, origin_info['type'], origin_info['url'])) else: message = ('Visit for snapshot with id %s for origin with' ' id %s not found!' % (snapshot_id, origin_info['id'])) raise NotFoundExc(message) return visit[0] if visit_id: visit = [v for v in visits if v['visit'] == int(visit_id)] if len(visit) == 0: if 'type' in origin_info and 'url' in origin_info: message = ('Visit with id %s for origin with type %s' ' and url %s not found!' % (visit_id, origin_info['type'], origin_info['url'])) else: message = ('Visit with id %s for origin with id %s' ' not found!' % (visit_id, origin_info['id'])) raise NotFoundExc(message) return visit[0] if not visit_ts: # returns the latest full visit when no timestamp is provided for v in reversed(visits): if v['status'] == 'full': return v return visits[-1] parsed_visit_ts = math.floor(parse_timestamp(visit_ts).timestamp()) visit_idx = None for i, visit in enumerate(visits): ts = math.floor(parse_timestamp(visit['date']).timestamp()) if i == 0 and parsed_visit_ts <= ts: return visit elif i == len(visits) - 1: if parsed_visit_ts >= ts: return visit else: next_ts = math.floor( parse_timestamp(visits[i + 1]['date']).timestamp()) if parsed_visit_ts >= ts and parsed_visit_ts < next_ts: if (parsed_visit_ts - ts) < (next_ts - parsed_visit_ts): visit_idx = i break else: visit_idx = i + 1 break if visit_idx is not None: visit = visits[visit_idx] while visit_idx < len(visits) - 1 and \ visit['date'] == visits[visit_idx+1]['date']: visit_idx = visit_idx + 1 visit = visits[visit_idx] return visit else: if 'type' in origin_info and 'url' in origin_info: message = ('Visit with timestamp %s for origin with type %s ' 'and url %s not found!' % (visit_ts, origin_info['type'], origin_info['url'])) else: message = ('Visit with timestamp %s for origin with id %s ' 'not found!' % (visit_ts, origin_info['id'])) raise NotFoundExc(message)
def get_origin_visit( origin_info: OriginInfo, visit_ts: Optional[str] = None, visit_id: Optional[int] = None, snapshot_id: Optional[str] = None, ) -> OriginVisitInfo: """Function that returns information about a visit for a given origin. If a timestamp is provided, the closest visit from that timestamp is returned. If a snapshot identifier is provided, the first visit with that snapshot is returned. If no search hints are provided, return the most recent full visit with a valid snapshot or the most recent partial visit with a valid snapshot otherwise. Args: origin_info: a dict filled with origin information visit_ts: an ISO 8601 datetime string to parse snapshot_id: a snapshot identifier Returns: A dict containing the visit info. Raises: swh.web.common.exc.NotFoundExc: if no visit can be found """ # returns the latest full visit with a valid snapshot visit = archive.lookup_origin_visit_latest(origin_info["url"], allowed_statuses=["full"], require_snapshot=True) if not visit: # or the latest partial visit with a valid snapshot otherwise visit = archive.lookup_origin_visit_latest( origin_info["url"], allowed_statuses=["partial"], require_snapshot=True) if not visit_ts and not visit_id and not snapshot_id: if visit: return visit else: raise NotFoundExc( f"No valid visit for origin with url {origin_info['url']} found!" ) # no need to fetch all visits list and search in it if the latest # visit matches some criteria if visit and (visit["snapshot"] == snapshot_id or visit["visit"] == visit_id): return visit visits = get_origin_visits(origin_info) if not visits: raise NotFoundExc( f"No visits associated to origin with url {origin_info['url']}!") if snapshot_id: visits = [v for v in visits if v["snapshot"] == snapshot_id] if len(visits) == 0: raise NotFoundExc( ("Visit for snapshot with id %s for origin with" " url %s not found!" % (snapshot_id, origin_info["url"]))) return visits[0] if visit_id: visits = [v for v in visits if v["visit"] == int(visit_id)] if len(visits) == 0: raise NotFoundExc( ("Visit with id %s for origin with" " url %s not found!" % (visit_id, origin_info["url"]))) return visits[0] if visit_ts: target_visit_ts = math.floor( parse_iso8601_date_to_utc(visit_ts).timestamp()) # Find the visit with date closest to the target (in absolute value) (abs_time_delta, visit_idx) = min( ((math.floor(parse_iso8601_date_to_utc( visit["date"]).timestamp()), i) for (i, visit) in enumerate(visits)), key=lambda ts_and_i: abs(ts_and_i[0] - target_visit_ts), ) if visit_idx is not None: visit = visits[visit_idx] # If multiple visits have the same date, select the one with # the largest id. while (visit_idx < len(visits) - 1 and visit["date"] == visits[visit_idx + 1]["date"]): visit_idx = visit_idx + 1 visit = visits[visit_idx] return visit else: raise NotFoundExc( ("Visit with timestamp %s for origin with " "url %s not found!" % (visit_ts, origin_info["url"]))) return visits[-1]
def get_snapshot_context(snapshot_id=None, origin_type=None, origin_url=None, timestamp=None, visit_id=None): """ Utility function to compute relevant information when navigating the archive in a snapshot context. The snapshot is either referenced by its id or it will be retrieved from an origin visit. Args: snapshot_id (str): hexadecimal representation of a snapshot identifier, all other parameters will be ignored if it is provided origin_type (str): the origin type (git, svn, deposit, ...) origin_url (str): the origin_url (e.g. https://github.com/(user)/(repo)/) timestamp (str): a datetime string for retrieving the closest visit of the origin visit_id (int): optional visit id for disambiguation in case of several visits with the same timestamp Returns: A dict with the following entries: * origin_info: dict containing origin information * visit_info: dict containing visit information * branches: the list of branches for the origin found during the visit * releases: the list of releases for the origin found during the visit * origin_browse_url: the url to browse the origin * origin_branches_url: the url to browse the origin branches * origin_releases_url': the url to browse the origin releases * origin_visit_url: the url to browse the snapshot of the origin found during the visit * url_args: dict containing url arguments to use when browsing in the context of the origin and its visit Raises: NotFoundExc: if no snapshot is found for the visit of an origin. """ # noqa origin_info = None visit_info = None url_args = None query_params = {} branches = [] releases = [] browse_url = None visit_url = None branches_url = None releases_url = None swh_type = 'snapshot' if origin_url: swh_type = 'origin' origin_info = get_origin_info(origin_url, origin_type) visit_info = get_origin_visit(origin_info, timestamp, visit_id, snapshot_id) fmt_date = format_utc_iso_date(visit_info['date']) visit_info['fmt_date'] = fmt_date snapshot_id = visit_info['snapshot'] if not snapshot_id: raise NotFoundExc('No snapshot associated to the visit of origin ' '%s on %s' % (origin_url, fmt_date)) # provided timestamp is not necessarily equals to the one # of the retrieved visit, so get the exact one in order # use it in the urls generated below if timestamp: timestamp = visit_info['date'] branches, releases = \ get_origin_visit_snapshot(origin_info, timestamp, visit_id, snapshot_id) url_args = { 'origin_type': origin_type, 'origin_url': origin_info['url'] } query_params = {'visit_id': visit_id} browse_url = reverse('browse-origin-visits', url_args=url_args) if timestamp: url_args['timestamp'] = format_utc_iso_date( timestamp, '%Y-%m-%dT%H:%M:%S') visit_url = reverse('browse-origin-directory', url_args=url_args, query_params=query_params) visit_info['url'] = visit_url branches_url = reverse('browse-origin-branches', url_args=url_args, query_params=query_params) releases_url = reverse('browse-origin-releases', url_args=url_args, query_params=query_params) elif snapshot_id: branches, releases = get_snapshot_content(snapshot_id) url_args = {'snapshot_id': snapshot_id} browse_url = reverse('browse-snapshot', url_args=url_args) branches_url = reverse('browse-snapshot-branches', url_args=url_args) releases_url = reverse('browse-snapshot-releases', url_args=url_args) releases = list(reversed(releases)) snapshot_size = service.lookup_snapshot_size(snapshot_id) is_empty = sum(snapshot_size.values()) == 0 swh_snp_id = persistent_identifier('snapshot', snapshot_id) return { 'swh_type': swh_type, 'swh_object_id': swh_snp_id, 'snapshot_id': snapshot_id, 'snapshot_size': snapshot_size, 'is_empty': is_empty, 'origin_info': origin_info, # keep track if the origin type was provided as url argument 'origin_type': origin_type, 'visit_info': visit_info, 'branches': branches, 'releases': releases, 'branch': None, 'release': None, 'browse_url': browse_url, 'branches_url': branches_url, 'releases_url': releases_url, 'url_args': url_args, 'query_params': query_params }