def test_get_swhids_info_revision_context(archive_data, revision): revision_data = archive_data.revision_get(revision) directory = revision_data["directory"] dir_content = archive_data.directory_ls(directory) dir_entry = random.choice(dir_content) swh_objects = [ SWHObjectInfo(object_type=REVISION, object_id=revision), SWHObjectInfo(object_type=DIRECTORY, object_id=directory), ] extra_context = {"revision": revision, "path": "/"} if dir_entry["type"] == "file": swh_objects.append( SWHObjectInfo(object_type=CONTENT, object_id=dir_entry["checksums"]["sha1_git"])) extra_context["filename"] = dir_entry["name"] swhids = get_swhids_info( swh_objects, snapshot_context=None, extra_context=extra_context, ) assert swhids[0]["context"] == {} swhid_dir_parsed = get_swhid(swhids[1]["swhid_with_context"]) anchor = gen_swhid(REVISION, revision) assert swhid_dir_parsed.qualifiers() == { "anchor": anchor, } if dir_entry["type"] == "file": swhid_cnt_parsed = get_swhid(swhids[2]["swhid_with_context"]) assert swhid_cnt_parsed.qualifiers() == { "anchor": anchor, "path": f'/{dir_entry["name"]}', }
def test_get_swhids_info_characters_and_url_escaping(archive_data, origin, directory): snapshot_context = get_snapshot_context(origin_url=origin["url"]) snapshot_context["origin_info"][ "url"] = "http://example.org/?project=abc;def%" path = "/foo;/bar%" swhid_info = get_swhids_info( [SWHObjectInfo(object_type=DIRECTORY, object_id=directory)], snapshot_context=snapshot_context, extra_context={"path": path}, )[0] # check special characters in SWHID have been escaped assert (swhid_info["context"]["origin"] == "http://example.org/?project%3Dabc%3Bdef%25") assert swhid_info["context"]["path"] == "/foo%3B/bar%25" # check special characters in SWHID URL have been escaped parsed_url_swhid = QualifiedSWHID.from_string( swhid_info["swhid_with_context_url"][1:-1]) assert (parsed_url_swhid.qualifiers()["origin"] == "http://example.org/%3Fproject%253Dabc%253Bdef%2525") assert parsed_url_swhid.qualifiers()["path"] == "/foo%253B/bar%2525"
def revision_browse(request, sha1_git): """ Django view that produces an HTML display of a revision identified by its id. The url that points to it is :http:get:`/browse/revision/(sha1_git)/`. """ revision = archive.lookup_revision(sha1_git) origin_info = None snapshot_context = None origin_url = request.GET.get("origin_url") if not origin_url: origin_url = request.GET.get("origin") timestamp = request.GET.get("timestamp") visit_id = request.GET.get("visit_id") snapshot_id = request.GET.get("snapshot_id") if not snapshot_id: snapshot_id = request.GET.get("snapshot") path = request.GET.get("path") dir_id = None dirs, files = [], [] content_data = {} if origin_url: try: snapshot_context = get_snapshot_context( snapshot_id=snapshot_id, origin_url=origin_url, timestamp=timestamp, visit_id=visit_id, branch_name=request.GET.get("branch"), release_name=request.GET.get("release"), revision_id=sha1_git, path=path, ) except NotFoundExc as e: raw_rev_url = reverse("browse-revision", url_args={"sha1_git": sha1_git}) error_message = ("The Software Heritage archive has a revision " "with the hash you provided but the origin " "mentioned in your request appears broken: %s. " "Please check the URL and try again.\n\n" "Nevertheless, you can still browse the revision " "without origin information: %s" % (gen_link(origin_url), gen_link(raw_rev_url))) if str(e).startswith("Origin"): raise NotFoundExc(error_message) else: raise e origin_info = snapshot_context["origin_info"] snapshot_id = snapshot_context["snapshot_id"] elif snapshot_id: snapshot_context = get_snapshot_context(snapshot_id) error_info = {"status_code": 200, "description": None} if path: try: file_info = archive.lookup_directory_with_path( revision["directory"], path) if file_info["type"] == "dir": dir_id = file_info["target"] else: query_string = "sha1_git:" + file_info["target"] content_data = request_content(query_string) except NotFoundExc as e: error_info["status_code"] = 404 error_info["description"] = f"NotFoundExc: {str(e)}" else: dir_id = revision["directory"] if dir_id: path = "" if path is None else (path + "/") dirs, files = get_directory_entries(dir_id) revision_metadata = RevisionMetadata( object_type=REVISION, object_id=sha1_git, revision=sha1_git, author=revision["author"]["fullname"] if revision["author"] else "None", author_url=gen_person_mail_link(revision["author"]) if revision["author"] else "None", committer=revision["committer"]["fullname"] if revision["committer"] else "None", committer_url=gen_person_mail_link(revision["committer"]) if revision["committer"] else "None", committer_date=format_utc_iso_date(revision["committer_date"]), date=format_utc_iso_date(revision["date"]), directory=revision["directory"], merge=revision["merge"], metadata=json.dumps(revision["metadata"], sort_keys=True, indent=4, separators=(",", ": ")), parents=revision["parents"], synthetic=revision["synthetic"], type=revision["type"], snapshot=snapshot_id, origin_url=origin_url, ) message_lines = ["None"] if revision["message"]: message_lines = revision["message"].split("\n") parents = [] for p in revision["parents"]: parent_url = gen_revision_url(p, snapshot_context) parents.append({"id": p, "url": parent_url}) path_info = gen_path_info(path) query_params = snapshot_context["query_params"] if snapshot_context else {} breadcrumbs = [] breadcrumbs.append({ "name": revision["directory"][:7], "url": reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ), }) for pi in path_info: query_params["path"] = pi["path"] breadcrumbs.append({ "name": pi["name"], "url": reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ), }) vault_cooking = { "directory_context": False, "directory_id": None, "revision_context": True, "revision_id": sha1_git, } swh_objects = [SWHObjectInfo(object_type=REVISION, object_id=sha1_git)] content = None content_size = None filename = None mimetype = None language = None readme_name = None readme_url = None readme_html = None readmes = {} extra_context = dict(revision_metadata) extra_context["path"] = f"/{path}" if path else None if content_data: breadcrumbs[-1]["url"] = None content_size = content_data["length"] mimetype = content_data["mimetype"] if content_data["raw_data"]: content_display_data = prepare_content_for_display( content_data["raw_data"], content_data["mimetype"], path) content = content_display_data["content_data"] language = content_display_data["language"] mimetype = content_display_data["mimetype"] if path: filename = path_info[-1]["name"] query_params["filename"] = filename filepath = "/".join(pi["name"] for pi in path_info[:-1]) extra_context["path"] = f"/{filepath}/" if filepath else "/" extra_context["filename"] = filename top_right_link = { "url": reverse( "browse-content-raw", url_args={"query_string": query_string}, query_params={"filename": filename}, ), "icon": swh_object_icons["content"], "text": "Raw File", } swh_objects.append( SWHObjectInfo(object_type=CONTENT, object_id=file_info["target"])) else: for d in dirs: if d["type"] == "rev": d["url"] = reverse("browse-revision", url_args={"sha1_git": d["target"]}) else: query_params["path"] = path + d["name"] d["url"] = reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ) for f in files: query_params["path"] = path + f["name"] f["url"] = reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ) if f["length"] is not None: f["length"] = filesizeformat(f["length"]) if f["name"].lower().startswith("readme"): readmes[f["name"]] = f["checksums"]["sha1"] readme_name, readme_url, readme_html = get_readme_to_display(readmes) top_right_link = { "url": get_revision_log_url(sha1_git, snapshot_context), "icon": swh_object_icons["revisions history"], "text": "History", } vault_cooking["directory_context"] = True vault_cooking["directory_id"] = dir_id swh_objects.append( SWHObjectInfo(object_type=DIRECTORY, object_id=dir_id)) query_params.pop("path", None) diff_revision_url = reverse( "diff-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ) if snapshot_id: swh_objects.append( SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id)) swhids_info = get_swhids_info(swh_objects, snapshot_context, extra_context) heading = "Revision - %s - %s" % ( sha1_git[:7], textwrap.shorten(message_lines[0], width=70), ) if snapshot_context: context_found = "snapshot: %s" % snapshot_context["snapshot_id"] if origin_info: context_found = "origin: %s" % origin_info["url"] heading += " - %s" % context_found return render( request, "browse/revision.html", { "heading": heading, "swh_object_id": swhids_info[0]["swhid"], "swh_object_name": "Revision", "swh_object_metadata": revision_metadata, "message_header": message_lines[0], "message_body": "\n".join(message_lines[1:]), "parents": parents, "snapshot_context": snapshot_context, "dirs": dirs, "files": files, "content": content, "content_size": content_size, "max_content_size": content_display_max_size, "filename": filename, "encoding": content_data.get("encoding"), "mimetype": mimetype, "language": language, "readme_name": readme_name, "readme_url": readme_url, "readme_html": readme_html, "breadcrumbs": breadcrumbs, "top_right_link": top_right_link, "vault_cooking": vault_cooking, "diff_revision_url": diff_revision_url, "show_actions": True, "swhids_info": swhids_info, "error_code": error_info["status_code"], "error_message": http_status_code_message.get(error_info["status_code"]), "error_description": error_info["description"], }, status=error_info["status_code"], )
def release_browse(request, sha1_git): """ Django view that produces an HTML display of a release identified by its id. The url that points to it is :http:get:`/browse/release/(sha1_git)/`. """ release = archive.lookup_release(sha1_git) snapshot_context = {} origin_info = None snapshot_id = request.GET.get("snapshot_id") if not snapshot_id: snapshot_id = request.GET.get("snapshot") origin_url = request.GET.get("origin_url") if not origin_url: origin_url = request.GET.get("origin") timestamp = request.GET.get("timestamp") visit_id = request.GET.get("visit_id") if origin_url: try: snapshot_context = get_snapshot_context( snapshot_id, origin_url, timestamp, visit_id, release_name=release["name"], ) except NotFoundExc as e: raw_rel_url = reverse("browse-release", url_args={"sha1_git": sha1_git}) error_message = ( "The Software Heritage archive has a release " "with the hash you provided but the origin " "mentioned in your request appears broken: %s. " "Please check the URL and try again.\n\n" "Nevertheless, you can still browse the release " "without origin information: %s" % (gen_link(origin_url), gen_link(raw_rel_url)) ) if str(e).startswith("Origin"): raise NotFoundExc(error_message) else: raise e origin_info = snapshot_context["origin_info"] elif snapshot_id: snapshot_context = get_snapshot_context( snapshot_id, release_name=release["name"] ) snapshot_id = snapshot_context.get("snapshot_id", None) release_metadata = ReleaseMetadata( object_type=RELEASE, object_id=sha1_git, release=sha1_git, author=release["author"]["fullname"] if release["author"] else "None", author_url=gen_person_mail_link(release["author"]) if release["author"] else "None", date=format_utc_iso_date(release["date"]), name=release["name"], synthetic=release["synthetic"], target=release["target"], target_type=release["target_type"], snapshot=snapshot_id, origin_url=origin_url, ) release_note_lines = [] if release["message"]: release_note_lines = release["message"].split("\n") swh_objects = [SWHObjectInfo(object_type=RELEASE, object_id=sha1_git)] vault_cooking = None rev_directory = None target_link = None if release["target_type"] == REVISION: target_link = gen_revision_link( release["target"], snapshot_context=snapshot_context, link_text=None, link_attrs=None, ) try: revision = archive.lookup_revision(release["target"]) rev_directory = revision["directory"] vault_cooking = { "directory_context": True, "directory_id": rev_directory, "revision_context": True, "revision_id": release["target"], } swh_objects.append( SWHObjectInfo(object_type=REVISION, object_id=release["target"]) ) swh_objects.append( SWHObjectInfo(object_type=DIRECTORY, object_id=rev_directory) ) except Exception as exc: sentry_sdk.capture_exception(exc) elif release["target_type"] == DIRECTORY: target_link = gen_directory_link( release["target"], snapshot_context=snapshot_context, link_text=None, link_attrs=None, ) try: # check directory exists archive.lookup_directory(release["target"]) vault_cooking = { "directory_context": True, "directory_id": release["target"], "revision_context": False, "revision_id": None, } swh_objects.append( SWHObjectInfo(object_type=DIRECTORY, object_id=release["target"]) ) except Exception as exc: sentry_sdk.capture_exception(exc) elif release["target_type"] == CONTENT: target_link = gen_content_link( release["target"], snapshot_context=snapshot_context, link_text=None, link_attrs=None, ) swh_objects.append( SWHObjectInfo(object_type=CONTENT, object_id=release["target"]) ) elif release["target_type"] == RELEASE: target_link = gen_release_link( release["target"], snapshot_context=snapshot_context, link_text=None, link_attrs=None, ) rev_directory_url = None if rev_directory is not None: if origin_info: rev_directory_url = reverse( "browse-origin-directory", query_params={ "origin_url": origin_info["url"], "release": release["name"], "snapshot": snapshot_id, }, ) elif snapshot_id: rev_directory_url = reverse( "browse-snapshot-directory", url_args={"snapshot_id": snapshot_id}, query_params={"release": release["name"]}, ) else: rev_directory_url = reverse( "browse-directory", url_args={"sha1_git": rev_directory} ) directory_link = None if rev_directory_url is not None: directory_link = gen_link(rev_directory_url, rev_directory) release["directory_link"] = directory_link release["target_link"] = target_link if snapshot_context: snapshot_id = snapshot_context["snapshot_id"] if snapshot_id: swh_objects.append(SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id)) swhids_info = get_swhids_info(swh_objects, snapshot_context) note_header = "None" if len(release_note_lines) > 0: note_header = release_note_lines[0] release["note_header"] = note_header release["note_body"] = "\n".join(release_note_lines[1:]) heading = "Release - %s" % release["name"] if snapshot_context: context_found = "snapshot: %s" % snapshot_context["snapshot_id"] if origin_info: context_found = "origin: %s" % origin_info["url"] heading += " - %s" % context_found return render( request, "browse/release.html", { "heading": heading, "swh_object_id": swhids_info[0]["swhid"], "swh_object_name": "Release", "swh_object_metadata": release_metadata, "release": release, "snapshot_context": snapshot_context, "show_actions": True, "breadcrumbs": None, "vault_cooking": vault_cooking, "top_right_link": None, "swhids_info": swhids_info, }, )
def content_display(request, query_string): """Django view that produces an HTML display of a content identified by its hash value. The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/` """ algo, checksum = query.parse_hash(query_string) checksum = hash_to_hex(checksum) origin_url = request.GET.get("origin_url") selected_language = request.GET.get("language") if not origin_url: origin_url = request.GET.get("origin") snapshot_id = request.GET.get("snapshot") path = request.GET.get("path") content_data = {} error_info = {"status_code": 200, "description": None} try: content_data = request_content(query_string) except NotFoundExc as e: error_info["status_code"] = 404 error_info["description"] = f"NotFoundExc: {str(e)}" snapshot_context = None if origin_url is not None or snapshot_id is not None: try: snapshot_context = get_snapshot_context( origin_url=origin_url, snapshot_id=snapshot_id, branch_name=request.GET.get("branch"), release_name=request.GET.get("release"), revision_id=request.GET.get("revision"), path=path, browse_context=CONTENT, ) except NotFoundExc as e: if str(e).startswith("Origin"): raw_cnt_url = reverse("browse-content", url_args={"query_string": query_string}) error_message = ( "The Software Heritage archive has a content " "with the hash you provided but the origin " "mentioned in your request appears broken: %s. " "Please check the URL and try again.\n\n" "Nevertheless, you can still browse the content " "without origin information: %s" % (gen_link(origin_url), gen_link(raw_cnt_url))) raise NotFoundExc(error_message) else: raise e content = None language = None mimetype = None if content_data.get("raw_data") is not None: content_display_data = prepare_content_for_display( content_data["raw_data"], content_data["mimetype"], path) content = content_display_data["content_data"] language = content_display_data["language"] mimetype = content_display_data["mimetype"] # Override language with user-selected language if selected_language is not None: language = selected_language available_languages = None if mimetype and "text/" in mimetype: available_languages = highlightjs.get_supported_languages() filename = None path_info = None directory_id = None root_dir = None if snapshot_context: root_dir = snapshot_context.get("root_directory") query_params = snapshot_context["query_params"] if snapshot_context else {} breadcrumbs = [] if path: split_path = path.split("/") root_dir = root_dir or split_path[0] filename = split_path[-1] if root_dir != path: path = path.replace(root_dir + "/", "") path = path[:-len(filename)] path_info = gen_path_info(path) query_params.pop("path", None) dir_url = reverse( "browse-directory", url_args={"sha1_git": root_dir}, query_params=query_params, ) breadcrumbs.append({"name": root_dir[:7], "url": dir_url}) for pi in path_info: query_params["path"] = pi["path"] dir_url = reverse( "browse-directory", url_args={"sha1_git": root_dir}, query_params=query_params, ) breadcrumbs.append({"name": pi["name"], "url": dir_url}) breadcrumbs.append({"name": filename, "url": None}) if path and root_dir != path: dir_info = archive.lookup_directory_with_path(root_dir, path) directory_id = dir_info["target"] elif root_dir != path: directory_id = root_dir else: root_dir = None query_params = {"filename": filename} content_checksums = content_data.get("checksums", {}) content_url = reverse( "browse-content", url_args={"query_string": query_string}, ) content_raw_url = reverse( "browse-content-raw", url_args={"query_string": query_string}, query_params=query_params, ) content_metadata = ContentMetadata( object_type=CONTENT, object_id=content_checksums.get("sha1_git"), sha1=content_checksums.get("sha1"), sha1_git=content_checksums.get("sha1_git"), sha256=content_checksums.get("sha256"), blake2s256=content_checksums.get("blake2s256"), content_url=content_url, mimetype=content_data.get("mimetype"), encoding=content_data.get("encoding"), size=filesizeformat(content_data.get("length", 0)), language=content_data.get("language"), root_directory=root_dir, path=f"/{path}" if path else None, filename=filename or "", directory=directory_id, revision=None, release=None, snapshot=None, origin_url=origin_url, ) swh_objects = [ SWHObjectInfo(object_type=CONTENT, object_id=content_checksums.get("sha1_git")) ] if directory_id: swh_objects.append( SWHObjectInfo(object_type=DIRECTORY, object_id=directory_id)) if snapshot_context: swh_objects.append( SWHObjectInfo(object_type=REVISION, object_id=snapshot_context["revision_id"])) swh_objects.append( SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_context["snapshot_id"])) if snapshot_context["release_id"]: swh_objects.append( SWHObjectInfo(object_type=RELEASE, object_id=snapshot_context["release_id"])) swhids_info = get_swhids_info( swh_objects, snapshot_context, extra_context=content_metadata, ) heading = "Content - %s" % content_checksums.get("sha1_git") if breadcrumbs: content_path = "/".join([bc["name"] for bc in breadcrumbs]) heading += " - %s" % content_path return render( request, "browse/content.html", { "heading": heading, "swh_object_id": swhids_info[0]["swhid"], "swh_object_name": "Content", "swh_object_metadata": content_metadata, "content": content, "content_size": content_data.get("length"), "max_content_size": content_display_max_size, "filename": filename, "encoding": content_data.get("encoding"), "mimetype": mimetype, "language": language, "available_languages": available_languages, "breadcrumbs": breadcrumbs, "top_right_link": { "url": content_raw_url, "icon": swh_object_icons["content"], "text": "Raw File", }, "snapshot_context": snapshot_context, "vault_cooking": None, "show_actions": True, "swhids_info": swhids_info, "error_code": error_info["status_code"], "error_message": http_status_code_message.get(error_info["status_code"]), "error_description": error_info["description"], }, status=error_info["status_code"], )
def test_get_swhids_info_origin_snapshot_context(archive_data, origin): """ Test SWHIDs with contextual info computation under a variety of origin / snapshot browsing contexts. """ visits = archive_data.origin_visit_get(origin["url"]) for visit in visits: snapshot = archive_data.snapshot_get(visit["snapshot"]) snapshot_id = snapshot["id"] branches = { k: v["target"] for k, v in snapshot["branches"].items() if v["target_type"] == "revision" } releases = { k: v["target"] for k, v in snapshot["branches"].items() if v["target_type"] == "release" } head_rev_id = archive_data.snapshot_get_head(snapshot) head_rev = archive_data.revision_get(head_rev_id) root_dir = head_rev["directory"] dir_content = archive_data.directory_ls(root_dir) dir_files = [e for e in dir_content if e["type"] == "file"] dir_file = random.choice(dir_files) revision_log = [ r["id"] for r in archive_data.revision_log(head_rev_id) ] branch_name = random.choice(list(branches)) release = random.choice(list(releases)) release_data = archive_data.release_get(releases[release]) release_name = release_data["name"] revision_id = random.choice(revision_log) for snp_ctx_params, anchor_info in ( ( { "snapshot_id": snapshot_id }, { "anchor_type": REVISION, "anchor_id": head_rev_id }, ), ( { "snapshot_id": snapshot_id, "branch_name": branch_name }, { "anchor_type": REVISION, "anchor_id": branches[branch_name] }, ), ( { "snapshot_id": snapshot_id, "release_name": release_name }, { "anchor_type": RELEASE, "anchor_id": releases[release] }, ), ( { "snapshot_id": snapshot_id, "revision_id": revision_id }, { "anchor_type": REVISION, "anchor_id": revision_id }, ), ( { "origin_url": origin["url"], "snapshot_id": snapshot_id }, { "anchor_type": REVISION, "anchor_id": head_rev_id }, ), ( { "origin_url": origin["url"], "snapshot_id": snapshot_id, "branch_name": branch_name, }, { "anchor_type": REVISION, "anchor_id": branches[branch_name] }, ), ( { "origin_url": origin["url"], "snapshot_id": snapshot_id, "release_name": release_name, }, { "anchor_type": RELEASE, "anchor_id": releases[release] }, ), ( { "origin_url": origin["url"], "snapshot_id": snapshot_id, "revision_id": revision_id, }, { "anchor_type": REVISION, "anchor_id": revision_id }, ), ): snapshot_context = get_snapshot_context(**snp_ctx_params) rev_id = head_rev_id if "branch_name" in snp_ctx_params: rev_id = branches[branch_name] elif "release_name" in snp_ctx_params: rev_id = release_data["target"] elif "revision_id" in snp_ctx_params: rev_id = revision_id swh_objects = [ SWHObjectInfo(object_type=CONTENT, object_id=dir_file["checksums"]["sha1_git"]), SWHObjectInfo(object_type=DIRECTORY, object_id=root_dir), SWHObjectInfo(object_type=REVISION, object_id=rev_id), SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id), ] if "release_name" in snp_ctx_params: swh_objects.append( SWHObjectInfo(object_type=RELEASE, object_id=release_data["id"])) swhids = get_swhids_info( swh_objects, snapshot_context, extra_context={ "path": "/", "filename": dir_file["name"] }, ) swhid_cnt_parsed = get_swhid(swhids[0]["swhid_with_context"]) swhid_dir_parsed = get_swhid(swhids[1]["swhid_with_context"]) swhid_rev_parsed = get_swhid(swhids[2]["swhid_with_context"]) swhid_snp_parsed = get_swhid(swhids[3]["swhid_with_context"] or swhids[3]["swhid"]) swhid_rel_parsed = None if "release_name" in snp_ctx_params: swhid_rel_parsed = get_swhid(swhids[4]["swhid_with_context"]) anchor = gen_swhid( object_type=anchor_info["anchor_type"], object_id=anchor_info["anchor_id"], ) snapshot_swhid = gen_swhid(object_type=SNAPSHOT, object_id=snapshot_id) expected_cnt_context = { "visit": snapshot_swhid, "anchor": anchor, "path": f'/{dir_file["name"]}', } expected_dir_context = { "visit": snapshot_swhid, "anchor": anchor, } expected_rev_context = {"visit": snapshot_swhid} expected_snp_context = {} if "origin_url" in snp_ctx_params: expected_cnt_context["origin"] = origin["url"] expected_dir_context["origin"] = origin["url"] expected_rev_context["origin"] = origin["url"] expected_snp_context["origin"] = origin["url"] assert swhid_cnt_parsed.qualifiers() == expected_cnt_context assert swhid_dir_parsed.qualifiers() == expected_dir_context assert swhid_rev_parsed.qualifiers() == expected_rev_context assert swhid_snp_parsed.qualifiers() == expected_snp_context if "release_name" in snp_ctx_params: assert swhid_rel_parsed.qualifiers() == expected_rev_context
def test_get_swhids_info_directory_context(archive_data, directory): swhid = get_swhids_info( [SWHObjectInfo(object_type=DIRECTORY, object_id=directory)], snapshot_context=None, )[0] assert swhid["swhid_with_context"] is None # path qualifier should be discarded for a root directory swhid = get_swhids_info( [SWHObjectInfo(object_type=DIRECTORY, object_id=directory)], snapshot_context=None, extra_context={"path": "/"}, )[0] assert swhid["swhid_with_context"] is None dir_content = archive_data.directory_ls(directory) dir_subdirs = [e for e in dir_content if e["type"] == "dir"] dir_subdir = random.choice(dir_subdirs) dir_subdir_path = f'/{dir_subdir["name"]}/' dir_subdir_content = archive_data.directory_ls(dir_subdir["target"]) dir_subdir_files = [e for e in dir_subdir_content if e["type"] == "file"] swh_objects_info = [ SWHObjectInfo(object_type=DIRECTORY, object_id=dir_subdir["target"]) ] extra_context = { "root_directory": directory, "path": dir_subdir_path, } if dir_subdir_files: dir_subdir_file = random.choice(dir_subdir_files) extra_context["filename"] = dir_subdir_file["name"] swh_objects_info.append( SWHObjectInfo(object_type=CONTENT, object_id=dir_subdir_file["checksums"]["sha1_git"])) swhids = get_swhids_info( swh_objects_info, snapshot_context=None, extra_context=extra_context, ) swhid_dir_parsed = get_swhid(swhids[0]["swhid_with_context"]) anchor = gen_swhid(DIRECTORY, directory) assert swhid_dir_parsed.qualifiers() == { "anchor": anchor, "path": dir_subdir_path, } if dir_subdir_files: swhid_cnt_parsed = get_swhid(swhids[1]["swhid_with_context"]) assert swhid_cnt_parsed.qualifiers() == { "anchor": anchor, "path": f'{dir_subdir_path}{dir_subdir_file["name"]}', }
def _directory_browse(request, sha1_git, path=None): root_sha1_git = sha1_git error_info = {"status_code": 200, "description": None} if path: try: dir_info = archive.lookup_directory_with_path(sha1_git, path) sha1_git = dir_info["target"] except NotFoundExc as e: error_info["status_code"] = 404 error_info["description"] = f"NotFoundExc: {str(e)}" sha1_git = None dirs, files = [], [] if sha1_git is not None: dirs, files = get_directory_entries(sha1_git) origin_url = request.GET.get("origin_url") if not origin_url: origin_url = request.GET.get("origin") snapshot_id = request.GET.get("snapshot") snapshot_context = None if origin_url is not None or snapshot_id is not None: try: snapshot_context = get_snapshot_context( snapshot_id=snapshot_id, origin_url=origin_url, branch_name=request.GET.get("branch"), release_name=request.GET.get("release"), revision_id=request.GET.get("revision"), path=path, ) except NotFoundExc as e: if str(e).startswith("Origin"): raw_dir_url = reverse( "browse-directory", url_args={"sha1_git": sha1_git} ) error_message = ( "The Software Heritage archive has a directory " "with the hash you provided but the origin " "mentioned in your request appears broken: %s. " "Please check the URL and try again.\n\n" "Nevertheless, you can still browse the directory " "without origin information: %s" % (gen_link(origin_url), gen_link(raw_dir_url)) ) raise NotFoundExc(error_message) else: raise e path_info = gen_path_info(path) query_params = snapshot_context["query_params"] if snapshot_context else {} breadcrumbs = [] breadcrumbs.append( { "name": root_sha1_git[:7], "url": reverse( "browse-directory", url_args={"sha1_git": root_sha1_git}, query_params={**query_params, "path": None}, ), } ) for pi in path_info: breadcrumbs.append( { "name": pi["name"], "url": reverse( "browse-directory", url_args={"sha1_git": root_sha1_git}, query_params={**query_params, "path": pi["path"],}, ), } ) path = "" if path is None else (path + "/") for d in dirs: if d["type"] == "rev": d["url"] = reverse( "browse-revision", url_args={"sha1_git": d["target"]}, query_params=query_params, ) else: d["url"] = reverse( "browse-directory", url_args={"sha1_git": root_sha1_git}, query_params={**query_params, "path": path + d["name"],}, ) sum_file_sizes = 0 readmes = {} for f in files: query_string = "sha1_git:" + f["target"] f["url"] = reverse( "browse-content", url_args={"query_string": query_string}, query_params={ **query_params, "path": root_sha1_git + "/" + path + f["name"], }, ) if f["length"] is not None: sum_file_sizes += f["length"] f["length"] = filesizeformat(f["length"]) if f["name"].lower().startswith("readme"): readmes[f["name"]] = f["checksums"]["sha1"] readme_name, readme_url, readme_html = get_readme_to_display(readmes) sum_file_sizes = filesizeformat(sum_file_sizes) dir_metadata = DirectoryMetadata( object_type=DIRECTORY, object_id=sha1_git, directory=root_sha1_git, nb_files=len(files), nb_dirs=len(dirs), sum_file_sizes=sum_file_sizes, root_directory=root_sha1_git, path=f"/{path}" if path else None, revision=None, revision_found=None, release=None, snapshot=None, ) vault_cooking = { "directory_context": True, "directory_id": sha1_git, "revision_context": False, "revision_id": None, } swh_objects = [SWHObjectInfo(object_type=DIRECTORY, object_id=sha1_git)] if snapshot_context: swh_objects.append( SWHObjectInfo( object_type=REVISION, object_id=snapshot_context["revision_id"] ) ) swh_objects.append( SWHObjectInfo( object_type=SNAPSHOT, object_id=snapshot_context["snapshot_id"] ) ) if snapshot_context["release_id"]: swh_objects.append( SWHObjectInfo( object_type=RELEASE, object_id=snapshot_context["release_id"] ) ) swhids_info = get_swhids_info(swh_objects, snapshot_context, dir_metadata) heading = "Directory - %s" % sha1_git if breadcrumbs: dir_path = "/".join([bc["name"] for bc in breadcrumbs]) + "/" heading += " - %s" % dir_path top_right_link = None if snapshot_context is not None and not snapshot_context["is_empty"]: history_url = reverse( "browse-revision-log", url_args={"sha1_git": snapshot_context["revision_id"]}, query_params=query_params, ) top_right_link = { "url": history_url, "icon": swh_object_icons["revisions history"], "text": "History", } return render( request, "browse/directory.html", { "heading": heading, "swh_object_id": swhids_info[0]["swhid"], "swh_object_name": "Directory", "swh_object_metadata": dir_metadata, "dirs": dirs, "files": files, "breadcrumbs": breadcrumbs, "top_right_link": top_right_link, "readme_name": readme_name, "readme_url": readme_url, "readme_html": readme_html, "snapshot_context": snapshot_context, "vault_cooking": vault_cooking, "show_actions": True, "swhids_info": swhids_info, "error_code": error_info["status_code"], "error_message": http_status_code_message.get(error_info["status_code"]), "error_description": error_info["description"], }, status=error_info["status_code"], )