Exemple #1
0
def _gen_diff_link(idx, diff_anchor, link_text):
    if idx < _max_displayed_file_diffs:
        return gen_link(diff_anchor, link_text)
    else:
        return link_text
Exemple #2
0
def release_browse(request, sha1_git):
    """
    Django view that produces an HTML display of a release
    identified by its id.

    The url that points to it is :http:get:`/browse/release/(sha1_git)/`.
    """
    release = archive.lookup_release(sha1_git)
    snapshot_context = {}
    origin_info = None
    snapshot_id = request.GET.get("snapshot_id")
    if not snapshot_id:
        snapshot_id = request.GET.get("snapshot")
    origin_url = request.GET.get("origin_url")
    if not origin_url:
        origin_url = request.GET.get("origin")
    timestamp = request.GET.get("timestamp")
    visit_id = request.GET.get("visit_id")
    if origin_url:
        try:
            snapshot_context = get_snapshot_context(
                snapshot_id,
                origin_url,
                timestamp,
                visit_id,
                release_name=release["name"],
            )
        except NotFoundExc as e:
            raw_rel_url = reverse("browse-release", url_args={"sha1_git": sha1_git})
            error_message = (
                "The Software Heritage archive has a release "
                "with the hash you provided but the origin "
                "mentioned in your request appears broken: %s. "
                "Please check the URL and try again.\n\n"
                "Nevertheless, you can still browse the release "
                "without origin information: %s"
                % (gen_link(origin_url), gen_link(raw_rel_url))
            )
            if str(e).startswith("Origin"):
                raise NotFoundExc(error_message)
            else:
                raise e
        origin_info = snapshot_context["origin_info"]
    elif snapshot_id:
        snapshot_context = get_snapshot_context(
            snapshot_id, release_name=release["name"]
        )

    snapshot_id = snapshot_context.get("snapshot_id", None)

    release_metadata = ReleaseMetadata(
        object_type=RELEASE,
        object_id=sha1_git,
        release=sha1_git,
        author=release["author"]["fullname"] if release["author"] else "None",
        author_url=gen_person_mail_link(release["author"])
        if release["author"]
        else "None",
        date=format_utc_iso_date(release["date"]),
        name=release["name"],
        synthetic=release["synthetic"],
        target=release["target"],
        target_type=release["target_type"],
        snapshot=snapshot_id,
        origin_url=origin_url,
    )

    release_note_lines = []
    if release["message"]:
        release_note_lines = release["message"].split("\n")

    swh_objects = [SWHObjectInfo(object_type=RELEASE, object_id=sha1_git)]

    vault_cooking = None

    rev_directory = None
    target_link = None
    if release["target_type"] == REVISION:
        target_link = gen_revision_link(
            release["target"],
            snapshot_context=snapshot_context,
            link_text=None,
            link_attrs=None,
        )
        try:
            revision = archive.lookup_revision(release["target"])
            rev_directory = revision["directory"]
            vault_cooking = {
                "directory_context": True,
                "directory_id": rev_directory,
                "revision_context": True,
                "revision_id": release["target"],
            }
            swh_objects.append(
                SWHObjectInfo(object_type=REVISION, object_id=release["target"])
            )
            swh_objects.append(
                SWHObjectInfo(object_type=DIRECTORY, object_id=rev_directory)
            )
        except Exception as exc:
            sentry_sdk.capture_exception(exc)
    elif release["target_type"] == DIRECTORY:
        target_link = gen_directory_link(
            release["target"],
            snapshot_context=snapshot_context,
            link_text=None,
            link_attrs=None,
        )
        try:
            # check directory exists
            archive.lookup_directory(release["target"])
            vault_cooking = {
                "directory_context": True,
                "directory_id": release["target"],
                "revision_context": False,
                "revision_id": None,
            }
            swh_objects.append(
                SWHObjectInfo(object_type=DIRECTORY, object_id=release["target"])
            )
        except Exception as exc:
            sentry_sdk.capture_exception(exc)
    elif release["target_type"] == CONTENT:
        target_link = gen_content_link(
            release["target"],
            snapshot_context=snapshot_context,
            link_text=None,
            link_attrs=None,
        )
        swh_objects.append(
            SWHObjectInfo(object_type=CONTENT, object_id=release["target"])
        )
    elif release["target_type"] == RELEASE:
        target_link = gen_release_link(
            release["target"],
            snapshot_context=snapshot_context,
            link_text=None,
            link_attrs=None,
        )

    rev_directory_url = None
    if rev_directory is not None:
        if origin_info:
            rev_directory_url = reverse(
                "browse-origin-directory",
                query_params={
                    "origin_url": origin_info["url"],
                    "release": release["name"],
                    "snapshot": snapshot_id,
                },
            )
        elif snapshot_id:
            rev_directory_url = reverse(
                "browse-snapshot-directory",
                url_args={"snapshot_id": snapshot_id},
                query_params={"release": release["name"]},
            )
        else:
            rev_directory_url = reverse(
                "browse-directory", url_args={"sha1_git": rev_directory}
            )

    directory_link = None
    if rev_directory_url is not None:
        directory_link = gen_link(rev_directory_url, rev_directory)
    release["directory_link"] = directory_link
    release["target_link"] = target_link

    if snapshot_context:
        snapshot_id = snapshot_context["snapshot_id"]

    if snapshot_id:
        swh_objects.append(SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id))

    swhids_info = get_swhids_info(swh_objects, snapshot_context)

    note_header = "None"
    if len(release_note_lines) > 0:
        note_header = release_note_lines[0]

    release["note_header"] = note_header
    release["note_body"] = "\n".join(release_note_lines[1:])

    heading = "Release - %s" % release["name"]
    if snapshot_context:
        context_found = "snapshot: %s" % snapshot_context["snapshot_id"]
        if origin_info:
            context_found = "origin: %s" % origin_info["url"]
        heading += " - %s" % context_found

    return render(
        request,
        "browse/release.html",
        {
            "heading": heading,
            "swh_object_id": swhids_info[0]["swhid"],
            "swh_object_name": "Release",
            "swh_object_metadata": release_metadata,
            "release": release,
            "snapshot_context": snapshot_context,
            "show_actions": True,
            "breadcrumbs": None,
            "vault_cooking": vault_cooking,
            "top_right_link": None,
            "swhids_info": swhids_info,
        },
    )
Exemple #3
0
def revision_browse(request, sha1_git):
    """
    Django view that produces an HTML display of a revision
    identified by its id.

    The url that points to it is :http:get:`/browse/revision/(sha1_git)/`.
    """
    revision = archive.lookup_revision(sha1_git)
    origin_info = None
    snapshot_context = None
    origin_url = request.GET.get("origin_url")
    if not origin_url:
        origin_url = request.GET.get("origin")
    timestamp = request.GET.get("timestamp")
    visit_id = request.GET.get("visit_id")
    snapshot_id = request.GET.get("snapshot_id")
    if not snapshot_id:
        snapshot_id = request.GET.get("snapshot")
    path = request.GET.get("path")
    dir_id = None
    dirs, files = [], []
    content_data = {}
    if origin_url:
        try:
            snapshot_context = get_snapshot_context(
                snapshot_id=snapshot_id,
                origin_url=origin_url,
                timestamp=timestamp,
                visit_id=visit_id,
                branch_name=request.GET.get("branch"),
                release_name=request.GET.get("release"),
                revision_id=sha1_git,
                path=path,
            )
        except NotFoundExc as e:
            raw_rev_url = reverse("browse-revision",
                                  url_args={"sha1_git": sha1_git})
            error_message = ("The Software Heritage archive has a revision "
                             "with the hash you provided but the origin "
                             "mentioned in your request appears broken: %s. "
                             "Please check the URL and try again.\n\n"
                             "Nevertheless, you can still browse the revision "
                             "without origin information: %s" %
                             (gen_link(origin_url), gen_link(raw_rev_url)))
            if str(e).startswith("Origin"):
                raise NotFoundExc(error_message)
            else:
                raise e
        origin_info = snapshot_context["origin_info"]
        snapshot_id = snapshot_context["snapshot_id"]
    elif snapshot_id:
        snapshot_context = get_snapshot_context(snapshot_id)

    error_info = {"status_code": 200, "description": None}

    if path:
        try:
            file_info = archive.lookup_directory_with_path(
                revision["directory"], path)
            if file_info["type"] == "dir":
                dir_id = file_info["target"]
            else:
                query_string = "sha1_git:" + file_info["target"]
                content_data = request_content(query_string)
        except NotFoundExc as e:
            error_info["status_code"] = 404
            error_info["description"] = f"NotFoundExc: {str(e)}"
    else:
        dir_id = revision["directory"]

    if dir_id:
        path = "" if path is None else (path + "/")
        dirs, files = get_directory_entries(dir_id)

    revision_metadata = RevisionMetadata(
        object_type=REVISION,
        object_id=sha1_git,
        revision=sha1_git,
        author=revision["author"]["fullname"]
        if revision["author"] else "None",
        author_url=gen_person_mail_link(revision["author"])
        if revision["author"] else "None",
        committer=revision["committer"]["fullname"]
        if revision["committer"] else "None",
        committer_url=gen_person_mail_link(revision["committer"])
        if revision["committer"] else "None",
        committer_date=format_utc_iso_date(revision["committer_date"]),
        date=format_utc_iso_date(revision["date"]),
        directory=revision["directory"],
        merge=revision["merge"],
        metadata=json.dumps(revision["metadata"],
                            sort_keys=True,
                            indent=4,
                            separators=(",", ": ")),
        parents=revision["parents"],
        synthetic=revision["synthetic"],
        type=revision["type"],
        snapshot=snapshot_id,
        origin_url=origin_url,
    )

    message_lines = ["None"]
    if revision["message"]:
        message_lines = revision["message"].split("\n")

    parents = []
    for p in revision["parents"]:
        parent_url = gen_revision_url(p, snapshot_context)
        parents.append({"id": p, "url": parent_url})

    path_info = gen_path_info(path)

    query_params = snapshot_context["query_params"] if snapshot_context else {}

    breadcrumbs = []
    breadcrumbs.append({
        "name":
        revision["directory"][:7],
        "url":
        reverse(
            "browse-revision",
            url_args={"sha1_git": sha1_git},
            query_params=query_params,
        ),
    })
    for pi in path_info:
        query_params["path"] = pi["path"]
        breadcrumbs.append({
            "name":
            pi["name"],
            "url":
            reverse(
                "browse-revision",
                url_args={"sha1_git": sha1_git},
                query_params=query_params,
            ),
        })

    vault_cooking = {
        "directory_context": False,
        "directory_id": None,
        "revision_context": True,
        "revision_id": sha1_git,
    }

    swh_objects = [SWHObjectInfo(object_type=REVISION, object_id=sha1_git)]

    content = None
    content_size = None
    filename = None
    mimetype = None
    language = None
    readme_name = None
    readme_url = None
    readme_html = None
    readmes = {}

    extra_context = dict(revision_metadata)
    extra_context["path"] = f"/{path}" if path else None

    if content_data:
        breadcrumbs[-1]["url"] = None
        content_size = content_data["length"]
        mimetype = content_data["mimetype"]
        if content_data["raw_data"]:
            content_display_data = prepare_content_for_display(
                content_data["raw_data"], content_data["mimetype"], path)
            content = content_display_data["content_data"]
            language = content_display_data["language"]
            mimetype = content_display_data["mimetype"]
        if path:
            filename = path_info[-1]["name"]
            query_params["filename"] = filename
            filepath = "/".join(pi["name"] for pi in path_info[:-1])
            extra_context["path"] = f"/{filepath}/" if filepath else "/"
            extra_context["filename"] = filename

        top_right_link = {
            "url":
            reverse(
                "browse-content-raw",
                url_args={"query_string": query_string},
                query_params={"filename": filename},
            ),
            "icon":
            swh_object_icons["content"],
            "text":
            "Raw File",
        }

        swh_objects.append(
            SWHObjectInfo(object_type=CONTENT, object_id=file_info["target"]))
    else:
        for d in dirs:
            if d["type"] == "rev":
                d["url"] = reverse("browse-revision",
                                   url_args={"sha1_git": d["target"]})
            else:
                query_params["path"] = path + d["name"]
                d["url"] = reverse(
                    "browse-revision",
                    url_args={"sha1_git": sha1_git},
                    query_params=query_params,
                )
        for f in files:
            query_params["path"] = path + f["name"]
            f["url"] = reverse(
                "browse-revision",
                url_args={"sha1_git": sha1_git},
                query_params=query_params,
            )
            if f["length"] is not None:
                f["length"] = filesizeformat(f["length"])
            if f["name"].lower().startswith("readme"):
                readmes[f["name"]] = f["checksums"]["sha1"]

        readme_name, readme_url, readme_html = get_readme_to_display(readmes)

        top_right_link = {
            "url": get_revision_log_url(sha1_git, snapshot_context),
            "icon": swh_object_icons["revisions history"],
            "text": "History",
        }

        vault_cooking["directory_context"] = True
        vault_cooking["directory_id"] = dir_id

        swh_objects.append(
            SWHObjectInfo(object_type=DIRECTORY, object_id=dir_id))

    query_params.pop("path", None)

    diff_revision_url = reverse(
        "diff-revision",
        url_args={"sha1_git": sha1_git},
        query_params=query_params,
    )

    if snapshot_id:
        swh_objects.append(
            SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id))

    swhids_info = get_swhids_info(swh_objects, snapshot_context, extra_context)

    heading = "Revision - %s - %s" % (
        sha1_git[:7],
        textwrap.shorten(message_lines[0], width=70),
    )
    if snapshot_context:
        context_found = "snapshot: %s" % snapshot_context["snapshot_id"]
        if origin_info:
            context_found = "origin: %s" % origin_info["url"]
        heading += " - %s" % context_found

    return render(
        request,
        "browse/revision.html",
        {
            "heading":
            heading,
            "swh_object_id":
            swhids_info[0]["swhid"],
            "swh_object_name":
            "Revision",
            "swh_object_metadata":
            revision_metadata,
            "message_header":
            message_lines[0],
            "message_body":
            "\n".join(message_lines[1:]),
            "parents":
            parents,
            "snapshot_context":
            snapshot_context,
            "dirs":
            dirs,
            "files":
            files,
            "content":
            content,
            "content_size":
            content_size,
            "max_content_size":
            content_display_max_size,
            "filename":
            filename,
            "encoding":
            content_data.get("encoding"),
            "mimetype":
            mimetype,
            "language":
            language,
            "readme_name":
            readme_name,
            "readme_url":
            readme_url,
            "readme_html":
            readme_html,
            "breadcrumbs":
            breadcrumbs,
            "top_right_link":
            top_right_link,
            "vault_cooking":
            vault_cooking,
            "diff_revision_url":
            diff_revision_url,
            "show_actions":
            True,
            "swhids_info":
            swhids_info,
            "error_code":
            error_info["status_code"],
            "error_message":
            http_status_code_message.get(error_info["status_code"]),
            "error_description":
            error_info["description"],
        },
        status=error_info["status_code"],
    )
Exemple #4
0
def release_browse(request, sha1_git):
    """
    Django view that produces an HTML display of a release
    identified by its id.

    The url that points to it is :http:get:`/browse/release/(sha1_git)/`.
    """
    try:
        release = service.lookup_release(sha1_git)
        snapshot_context = None
        origin_info = None
        snapshot_id = request.GET.get('snapshot_id', None)
        origin_type = request.GET.get('origin_type', None)
        origin_url = request.GET.get('origin_url', None)
        if not origin_url:
            origin_url = request.GET.get('origin', None)
        timestamp = request.GET.get('timestamp', None)
        visit_id = request.GET.get('visit_id', None)
        if origin_url:
            try:
                snapshot_context = \
                    get_snapshot_context(snapshot_id, origin_type,
                                         origin_url, timestamp,
                                         visit_id)
            except Exception:
                raw_rel_url = reverse('browse-release',
                                      url_args={'sha1_git': sha1_git})
                error_message = \
                    ('The Software Heritage archive has a release '
                     'with the hash you provided but the origin '
                     'mentioned in your request appears broken: %s. '
                     'Please check the URL and try again.\n\n'
                     'Nevertheless, you can still browse the release '
                     'without origin information: %s'
                        % (gen_link(origin_url), gen_link(raw_rel_url)))

                raise NotFoundExc(error_message)
            origin_info = snapshot_context['origin_info']
        elif snapshot_id:
            snapshot_context = get_snapshot_context(snapshot_id)
    except Exception as exc:
        return handle_view_exception(request, exc)

    release_data = {}

    author_name = 'None'
    release_data['author'] = 'None'
    if release['author']:
        author_name = release['author']['name'] or \
                      release['author']['fullname']
        release_data['author'] = \
            gen_person_link(release['author']['id'], author_name,
                            snapshot_context)
    release_data['date'] = format_utc_iso_date(release['date'])
    release_data['id'] = sha1_git
    release_data['name'] = release['name']
    release_data['synthetic'] = release['synthetic']
    release_data['target type'] = release['target_type']

    if release['target_type'] == 'revision':
        release_data['target'] = \
            gen_revision_link(release['target'],
                              snapshot_context=snapshot_context)
    elif release['target_type'] == 'content':
        content_url = \
            reverse('browse-content',
                    url_args={'query_string': 'sha1_git:' + release['target']})
        release_data['target'] = gen_link(content_url, release['target'])
    elif release['target_type'] == 'directory':
        directory_url = \
            reverse('browse-directory',
                    url_args={'sha1_git': release['target']})
        release_data['target'] = gen_link(directory_url, release['target'])
    elif release['target_type'] == 'release':
        release_url = \
            reverse('browse-release',
                    url_args={'sha1_git': release['target']})
        release_data['target'] = gen_link(release_url, release['target'])

    release_note_lines = []
    if release['message']:
        release_note_lines = release['message'].split('\n')

    vault_cooking = None

    query_params = {}
    if snapshot_id:
        query_params = {'snapshot_id': snapshot_id}
    elif origin_info:
        query_params = {'origin': origin_info['url']}

    target_url = ''
    if release['target_type'] == 'revision':
        target_url = reverse('browse-revision',
                             url_args={'sha1_git': release['target']},
                             query_params=query_params)
        try:
            revision = service.lookup_revision(release['target'])
            vault_cooking = {
                'directory_context': True,
                'directory_id': revision['directory'],
                'revision_context': True,
                'revision_id': release['target']
            }
        except Exception:
            pass
    elif release['target_type'] == 'directory':
        target_url = reverse('browse-directory',
                             url_args={'sha1_git': release['target']},
                             query_params=query_params)
        try:
            revision = service.lookup_directory(release['target'])
            vault_cooking = {
                'directory_context': True,
                'directory_id': revision['directory'],
                'revision_context': False,
                'revision_id': None
            }
        except Exception:
            pass
    elif release['target_type'] == 'content':
        target_url = reverse('browse-content',
                             url_args={'query_string': release['target']},
                             query_params=query_params)
    elif release['target_type'] == 'release':
        target_url = reverse('browse-release',
                             url_args={'sha1_git': release['target']},
                             query_params=query_params)

    release['target_url'] = target_url

    if snapshot_context:
        release_data['snapshot id'] = snapshot_context['snapshot_id']

    if origin_info:
        release_url = reverse('browse-release',
                              url_args={'sha1_git': release['id']})
        release_data['context-independent release'] = \
            gen_link(release_url, link_text='Browse',
                     link_attrs={'class': 'btn btn-default btn-sm',
                                 'role': 'button'})
        release_data['origin id'] = origin_info['id']
        release_data['origin type'] = origin_info['type']
        release_data['origin url'] = gen_link(origin_info['url'],
                                              origin_info['url'])
        browse_snapshot_link = \
            gen_snapshot_link(snapshot_context['snapshot_id'],
                              link_text='Browse',
                              link_attrs={'class': 'btn btn-default btn-sm',
                                          'role': 'button'})
        release_data['snapshot'] = browse_snapshot_link

    swh_objects = [{'type': 'release',
                    'id': sha1_git}]

    if snapshot_context:
        snapshot_id = snapshot_context['snapshot_id']

    if snapshot_id:
        swh_objects.append({'type': 'snapshot',
                            'id': snapshot_id})

    swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context)

    note_header = 'None'
    if len(release_note_lines) > 0:
        note_header = release_note_lines[0]

    release['note_header'] = note_header
    release['note_body'] = '\n'.join(release_note_lines[1:])

    heading = 'Release - %s' % release['name']
    if snapshot_context:
        context_found = 'snapshot: %s' % snapshot_context['snapshot_id']
        if origin_info:
            context_found = 'origin: %s' % origin_info['url']
        heading += ' - %s' % context_found

    return render(request, 'browse/release.html',
                  {'heading': heading,
                   'swh_object_id': swh_ids[0]['swh_id'],
                   'swh_object_name': 'Release',
                   'swh_object_metadata': release_data,
                   'release': release,
                   'snapshot_context': snapshot_context,
                   'show_actions_menu': True,
                   'breadcrumbs': None,
                   'vault_cooking': vault_cooking,
                   'top_right_link': None,
                   'swh_ids': swh_ids})
Exemple #5
0
def directory_browse(request, sha1_git, path=None):
    """Django view for browsing the content of a directory identified
    by its sha1_git value.

    The url that points to it is :http:get:`/browse/directory/(sha1_git)/[(path)/]`
    """ # noqa
    root_sha1_git = sha1_git
    try:
        if path:
            dir_info = service.lookup_directory_with_path(sha1_git, path)
            # some readme files can reference assets reachable from the
            # browsed directory, handle that special case in order to
            # correctly displayed them
            if dir_info and dir_info['type'] == 'file':
                file_raw_url = reverse(
                    'browse-content-raw',
                    url_args={'query_string': dir_info['checksums']['sha1']})
                return redirect(file_raw_url)
            sha1_git = dir_info['target']

        dirs, files = get_directory_entries(sha1_git)
        origin_type = request.GET.get('origin_type', None)
        origin_url = request.GET.get('origin_url', None)
        if not origin_url:
            origin_url = request.GET.get('origin', None)
        snapshot_context = None
        if origin_url:
            try:
                snapshot_context = get_snapshot_context(
                    None, origin_type, origin_url)
            except Exception:
                raw_dir_url = reverse('browse-directory',
                                      url_args={'sha1_git': sha1_git})
                error_message = \
                    ('The Software Heritage archive has a directory '
                     'with the hash you provided but the origin '
                     'mentioned in your request appears broken: %s. '
                     'Please check the URL and try again.\n\n'
                     'Nevertheless, you can still browse the directory '
                     'without origin information: %s'
                        % (gen_link(origin_url), gen_link(raw_dir_url)))

                raise NotFoundExc(error_message)
        if snapshot_context:
            snapshot_context['visit_info'] = None
    except Exception as exc:
        return handle_view_exception(request, exc)

    path_info = gen_path_info(path)

    query_params = {'origin': origin_url}

    breadcrumbs = []
    breadcrumbs.append({
        'name':
        root_sha1_git[:7],
        'url':
        reverse('browse-directory',
                url_args={'sha1_git': root_sha1_git},
                query_params=query_params)
    })
    for pi in path_info:
        breadcrumbs.append({
            'name':
            pi['name'],
            'url':
            reverse('browse-directory',
                    url_args={
                        'sha1_git': root_sha1_git,
                        'path': pi['path']
                    },
                    query_params=query_params)
        })

    path = '' if path is None else (path + '/')

    for d in dirs:
        if d['type'] == 'rev':
            d['url'] = reverse('browse-revision',
                               url_args={'sha1_git': d['target']},
                               query_params=query_params)
        else:
            d['url'] = reverse('browse-directory',
                               url_args={
                                   'sha1_git': root_sha1_git,
                                   'path': path + d['name']
                               },
                               query_params=query_params)

    sum_file_sizes = 0

    readmes = {}

    for f in files:
        query_string = 'sha1_git:' + f['target']
        f['url'] = reverse('browse-content',
                           url_args={'query_string': query_string},
                           query_params={
                               'path': root_sha1_git + '/' + path + f['name'],
                               'origin': origin_url
                           })
        if f['length'] is not None:
            sum_file_sizes += f['length']
            f['length'] = filesizeformat(f['length'])
        if f['name'].lower().startswith('readme'):
            readmes[f['name']] = f['checksums']['sha1']

    readme_name, readme_url, readme_html = get_readme_to_display(readmes)

    sum_file_sizes = filesizeformat(sum_file_sizes)

    dir_metadata = {
        'id': sha1_git,
        'number of regular files': len(files),
        'number of subdirectories': len(dirs),
        'sum of regular file sizes': sum_file_sizes
    }

    vault_cooking = {
        'directory_context': True,
        'directory_id': sha1_git,
        'revision_context': False,
        'revision_id': None
    }

    swh_ids = get_swh_persistent_ids([{'type': 'directory', 'id': sha1_git}])

    heading = 'Directory - %s' % sha1_git
    if breadcrumbs:
        dir_path = '/'.join([bc['name'] for bc in breadcrumbs]) + '/'
        heading += ' - %s' % dir_path

    return render(
        request, 'browse/directory.html', {
            'heading': heading,
            'swh_object_id': swh_ids[0]['swh_id'],
            'swh_object_name': 'Directory',
            'swh_object_metadata': dir_metadata,
            'dirs': dirs,
            'files': files,
            'breadcrumbs': breadcrumbs,
            'top_right_link': None,
            'readme_name': readme_name,
            'readme_url': readme_url,
            'readme_html': readme_html,
            'snapshot_context': snapshot_context,
            'vault_cooking': vault_cooking,
            'show_actions_menu': True,
            'swh_ids': swh_ids
        })
Exemple #6
0
def browse_snapshot_log(request,
                        snapshot_id=None,
                        origin_type=None,
                        origin_url=None,
                        timestamp=None):
    """
    Django view implementation for browsing a revision history in a
    snapshot context.
    """
    try:

        snapshot_context = _process_snapshot_request(
            request,
            snapshot_id,
            origin_type,
            origin_url,
            timestamp,
            browse_context='log')  # noqa

        revision_id = snapshot_context['revision_id']

        per_page = int(request.GET.get('per_page', PER_PAGE))
        offset = int(request.GET.get('offset', 0))
        revs_ordering = request.GET.get('revs_ordering', 'committer_date')
        session_key = 'rev_%s_log_ordering_%s' % (revision_id, revs_ordering)
        rev_log_session = request.session.get(session_key, None)
        rev_log = []
        revs_walker_state = None
        if rev_log_session:
            rev_log = rev_log_session['rev_log']
            revs_walker_state = rev_log_session['revs_walker_state']

        if len(rev_log) < offset + per_page:
            revs_walker = \
                service.get_revisions_walker(revs_ordering,
                                             revision_id,
                                             max_revs=offset+per_page+1,
                                             state=revs_walker_state)
            rev_log += list(revs_walker)
            revs_walker_state = revs_walker.export_state()

        revision_log = rev_log[offset:offset + per_page]

        request.session[session_key] = {
            'rev_log': rev_log,
            'revs_walker_state': revs_walker_state
        }

    except Exception as exc:
        return handle_view_exception(request, exc)

    swh_type = snapshot_context['swh_type']
    origin_info = snapshot_context['origin_info']
    visit_info = snapshot_context['visit_info']
    url_args = snapshot_context['url_args']
    query_params = snapshot_context['query_params']
    snapshot_id = snapshot_context['snapshot_id']

    query_params['per_page'] = per_page
    revs_ordering = request.GET.get('revs_ordering', '')
    query_params['revs_ordering'] = revs_ordering

    browse_view_name = 'browse-' + swh_type + '-log'

    prev_log_url = None
    if len(rev_log) > offset + per_page:
        query_params['offset'] = offset + per_page
        prev_log_url = reverse(browse_view_name,
                               url_args=url_args,
                               query_params=query_params)

    next_log_url = None
    if offset != 0:
        query_params['offset'] = offset - per_page
        next_log_url = reverse(browse_view_name,
                               url_args=url_args,
                               query_params=query_params)

    revision_log_data = format_log_entries(revision_log, per_page,
                                           snapshot_context)

    browse_log_link = \
        gen_revision_log_link(revision_id, link_text='Browse',
                              link_attrs={'class': 'btn btn-default btn-sm',
                                          'role': 'button'})

    revision_metadata = {
        'context-independent revision history': browse_log_link,
        'snapshot id': snapshot_id
    }

    if origin_info:
        revision_metadata['origin id'] = origin_info['id']
        revision_metadata['origin type'] = origin_info['type']
        revision_metadata['origin url'] = origin_info['url']
        revision_metadata['origin visit date'] = format_utc_iso_date(
            visit_info['date'])  # noqa
        revision_metadata['origin visit id'] = visit_info['visit']
        browse_snapshot_url = reverse('browse-snapshot-log',
                                      url_args={'snapshot_id': snapshot_id},
                                      query_params=request.GET)
        browse_snapshot_link = \
            gen_link(browse_snapshot_url, link_text='Browse',
                     link_attrs={'class': 'btn btn-default btn-sm',
                                 'role': 'button'})
        revision_metadata['snapshot context'] = browse_snapshot_link

    swh_objects = [{
        'type': 'revision',
        'id': revision_id
    }, {
        'type': 'snapshot',
        'id': snapshot_id
    }]

    release_id = snapshot_context['release_id']
    if release_id:
        swh_objects.append({'type': 'release', 'id': release_id})

    swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context)

    context_found = 'snapshot: %s' % snapshot_context['snapshot_id']
    if origin_info:
        context_found = 'origin: %s' % origin_info['url']
    heading = 'Revision history - %s - %s' %\
        (snapshot_context['branch'], context_found)

    return render(
        request, 'browse/revision-log.html', {
            'heading': heading,
            'swh_object_name': 'Revisions history',
            'swh_object_metadata': revision_metadata,
            'revision_log': revision_log_data,
            'revs_ordering': revs_ordering,
            'next_log_url': next_log_url,
            'prev_log_url': prev_log_url,
            'breadcrumbs': None,
            'top_right_link': None,
            'snapshot_context': snapshot_context,
            'vault_cooking': None,
            'show_actions_menu': True,
            'swh_ids': swh_ids
        })
Exemple #7
0
def test_gen_link():
    assert (gen_link(
        "https://www.softwareheritage.org/",
        "swh") == '<a href="https://www.softwareheritage.org/">swh</a>')
Exemple #8
0
def browse_snapshot_directory(request,
                              snapshot_id=None,
                              origin_type=None,
                              origin_url=None,
                              timestamp=None,
                              path=None):
    """
    Django view implementation for browsing a directory in a snapshot context.
    """
    try:

        snapshot_context = _process_snapshot_request(
            request,
            snapshot_id,
            origin_type,
            origin_url,
            timestamp,
            path,
            browse_context='directory')  # noqa

        root_sha1_git = snapshot_context['root_sha1_git']
        sha1_git = root_sha1_git
        if root_sha1_git and path:
            dir_info = service.lookup_directory_with_path(root_sha1_git, path)
            # some readme files can reference assets reachable from the
            # browsed directory, handle that special case in order to
            # correctly displayed them
            if dir_info and dir_info['type'] == 'file':
                file_raw_url = reverse(
                    'browse-content-raw',
                    url_args={'query_string': dir_info['checksums']['sha1']})
                return redirect(file_raw_url)
            sha1_git = dir_info['target']

        dirs = []
        files = []
        if sha1_git:
            dirs, files = get_directory_entries(sha1_git)

    except Exception as exc:
        return handle_view_exception(request, exc)

    swh_type = snapshot_context['swh_type']
    origin_info = snapshot_context['origin_info']
    visit_info = snapshot_context['visit_info']
    url_args = snapshot_context['url_args']
    query_params = snapshot_context['query_params']
    revision_id = snapshot_context['revision_id']
    snapshot_id = snapshot_context['snapshot_id']

    path_info = gen_path_info(path)

    browse_view_name = 'browse-' + swh_type + '-directory'

    breadcrumbs = []
    if root_sha1_git:
        breadcrumbs.append({
            'name':
            root_sha1_git[:7],
            'url':
            reverse(browse_view_name,
                    url_args=url_args,
                    query_params=query_params)
        })
    for pi in path_info:
        bc_url_args = dict(url_args)
        bc_url_args['path'] = pi['path']
        breadcrumbs.append({
            'name':
            pi['name'],
            'url':
            reverse(browse_view_name,
                    url_args=bc_url_args,
                    query_params=query_params)
        })

    path = '' if path is None else (path + '/')

    for d in dirs:
        if d['type'] == 'rev':
            d['url'] = reverse('browse-revision',
                               url_args={'sha1_git': d['target']})
        else:
            bc_url_args = dict(url_args)
            bc_url_args['path'] = path + d['name']
            d['url'] = reverse(browse_view_name,
                               url_args=bc_url_args,
                               query_params=query_params)

    sum_file_sizes = 0

    readmes = {}

    browse_view_name = 'browse-' + swh_type + '-content'

    for f in files:
        bc_url_args = dict(url_args)
        bc_url_args['path'] = path + f['name']
        f['url'] = reverse(browse_view_name,
                           url_args=bc_url_args,
                           query_params=query_params)
        if f['length'] is not None:
            sum_file_sizes += f['length']
            f['length'] = filesizeformat(f['length'])
        if f['name'].lower().startswith('readme'):
            readmes[f['name']] = f['checksums']['sha1']

    readme_name, readme_url, readme_html = get_readme_to_display(readmes)

    browse_view_name = 'browse-' + swh_type + '-log'

    history_url = None
    if snapshot_id != _empty_snapshot_id:
        history_url = reverse(browse_view_name,
                              url_args=url_args,
                              query_params=query_params)

    nb_files = None
    nb_dirs = None
    sum_file_sizes = None
    dir_path = None
    if root_sha1_git:
        nb_files = len(files)
        nb_dirs = len(dirs)
        sum_file_sizes = filesizeformat(sum_file_sizes)
        dir_path = '/' + path

    browse_dir_link = \
        gen_directory_link(sha1_git, link_text='Browse',
                           link_attrs={'class': 'btn btn-default btn-sm',
                                       'role': 'button'})

    browse_rev_link = \
        gen_revision_link(revision_id,
                          snapshot_context=snapshot_context,
                          link_text='Browse',
                          link_attrs={'class': 'btn btn-default btn-sm',
                                      'role': 'button'})

    dir_metadata = {
        'id': sha1_git,
        'context-independent directory': browse_dir_link,
        'number of regular files': nb_files,
        'number of subdirectories': nb_dirs,
        'sum of regular file sizes': sum_file_sizes,
        'path': dir_path,
        'revision id': revision_id,
        'revision': browse_rev_link,
        'snapshot id': snapshot_id
    }

    if origin_info:
        dir_metadata['origin id'] = origin_info['id']
        dir_metadata['origin type'] = origin_info['type']
        dir_metadata['origin url'] = origin_info['url']
        dir_metadata['origin visit date'] = format_utc_iso_date(
            visit_info['date'])  # noqa
        dir_metadata['origin visit id'] = visit_info['visit']
        snapshot_context_url = reverse('browse-snapshot-directory',
                                       url_args={'snapshot_id': snapshot_id},
                                       query_params=request.GET)
        browse_snapshot_link = \
            gen_link(snapshot_context_url, link_text='Browse',
                     link_attrs={'class': 'btn btn-default btn-sm',
                                 'role': 'button'})
        dir_metadata['snapshot context'] = browse_snapshot_link

    vault_cooking = {
        'directory_context': True,
        'directory_id': sha1_git,
        'revision_context': True,
        'revision_id': revision_id
    }

    swh_objects = [{
        'type': 'directory',
        'id': sha1_git
    }, {
        'type': 'revision',
        'id': revision_id
    }, {
        'type': 'snapshot',
        'id': snapshot_id
    }]

    release_id = snapshot_context['release_id']
    if release_id:
        swh_objects.append({'type': 'release', 'id': release_id})

    swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context)

    dir_path = '/'.join([bc['name'] for bc in breadcrumbs]) + '/'
    context_found = 'snapshot: %s' % snapshot_context['snapshot_id']
    if origin_info:
        context_found = 'origin: %s' % origin_info['url']
    heading = 'Directory - %s - %s - %s' %\
        (dir_path, snapshot_context['branch'], context_found)

    return render(
        request, 'browse/directory.html', {
            'heading': heading,
            'swh_object_name': 'Directory',
            'swh_object_metadata': dir_metadata,
            'dirs': dirs,
            'files': files,
            'breadcrumbs': breadcrumbs if root_sha1_git else [],
            'top_right_link': {
                'url': history_url,
                'icon': swh_object_icons['revisions history'],
                'text': 'History'
            },
            'readme_name': readme_name,
            'readme_url': readme_url,
            'readme_html': readme_html,
            'snapshot_context': snapshot_context,
            'vault_cooking': vault_cooking,
            'show_actions_menu': True,
            'swh_ids': swh_ids
        })
Exemple #9
0
def browse_snapshot_content(request,
                            snapshot_id=None,
                            origin_type=None,
                            origin_url=None,
                            timestamp=None,
                            path=None):
    """
    Django view implementation for browsing a content in a snapshot context.
    """
    try:

        snapshot_context = _process_snapshot_request(request,
                                                     snapshot_id,
                                                     origin_type,
                                                     origin_url,
                                                     timestamp,
                                                     path,
                                                     browse_context='content')

        root_sha1_git = snapshot_context['root_sha1_git']
        sha1_git = None
        query_string = None
        content_data = None
        if root_sha1_git:
            content_info = service.lookup_directory_with_path(
                root_sha1_git, path)
            sha1_git = content_info['target']
            query_string = 'sha1_git:' + sha1_git
            content_data = request_content(query_string,
                                           raise_if_unavailable=False)

    except Exception as exc:
        return handle_view_exception(request, exc)

    swh_type = snapshot_context['swh_type']
    url_args = snapshot_context['url_args']
    query_params = snapshot_context['query_params']
    revision_id = snapshot_context['revision_id']
    origin_info = snapshot_context['origin_info']
    visit_info = snapshot_context['visit_info']
    snapshot_id = snapshot_context['snapshot_id']

    content = None
    language = None
    mimetype = None
    if content_data and content_data['raw_data'] is not None:
        content_display_data = prepare_content_for_display(
            content_data['raw_data'], content_data['mimetype'], path)
        content = content_display_data['content_data']
        language = content_display_data['language']
        mimetype = content_display_data['mimetype']

    filename = None
    path_info = None

    browse_view_name = 'browse-' + swh_type + '-directory'

    breadcrumbs = []

    split_path = path.split('/')
    filename = split_path[-1]
    path_info = gen_path_info(path[:-len(filename)])
    if root_sha1_git:
        breadcrumbs.append({
            'name':
            root_sha1_git[:7],
            'url':
            reverse(browse_view_name,
                    url_args=url_args,
                    query_params=query_params)
        })
    for pi in path_info:
        bc_url_args = dict(url_args)
        bc_url_args['path'] = pi['path']
        breadcrumbs.append({
            'name':
            pi['name'],
            'url':
            reverse(browse_view_name,
                    url_args=bc_url_args,
                    query_params=query_params)
        })

    breadcrumbs.append({'name': filename, 'url': None})

    browse_content_link = \
        gen_content_link(sha1_git, link_text='Browse',
                         link_attrs={'class': 'btn btn-default btn-sm',
                                     'role': 'button'})

    content_raw_url = None
    if query_string:
        content_raw_url = reverse('browse-content-raw',
                                  url_args={'query_string': query_string},
                                  query_params={'filename': filename})

    browse_rev_link = \
        gen_revision_link(revision_id,
                          snapshot_context=snapshot_context,
                          link_text='Browse',
                          link_attrs={'class': 'btn btn-default btn-sm',
                                      'role': 'button'})

    content_metadata = {
        'context-independent content': browse_content_link,
        'path': None,
        'filename': None,
        'revision id': revision_id,
        'revision': browse_rev_link,
        'snapshot id': snapshot_id
    }

    cnt_sha1_git = None
    content_size = None
    error_code = 200
    error_description = ''
    error_message = ''
    if content_data:
        content_metadata['sha1 checksum'] = \
            content_data['checksums']['sha1']
        content_metadata['sha1_git checksum'] = \
            content_data['checksums']['sha1_git']
        content_metadata['sha256 checksum'] = \
            content_data['checksums']['sha256']
        content_metadata['blake2s256 checksum'] = \
            content_data['checksums']['blake2s256']
        content_metadata['mime type'] = content_data['mimetype']
        content_metadata['encoding'] = content_data['encoding']
        content_metadata['size'] = filesizeformat(content_data['length'])
        content_metadata['language'] = content_data['language']
        content_metadata['licenses'] = content_data['licenses']
        content_metadata['path'] = '/' + path[:-len(filename)]
        content_metadata['filename'] = filename

        cnt_sha1_git = content_data['checksums']['sha1_git']
        content_size = content_data['length']
        error_code = content_data['error_code']
        error_message = content_data['error_message']
        error_description = content_data['error_description']

    if origin_info:
        content_metadata['origin id'] = origin_info['id']
        content_metadata['origin type'] = origin_info['type']
        content_metadata['origin url'] = origin_info['url']
        content_metadata['origin visit date'] = format_utc_iso_date(
            visit_info['date'])  # noqa
        content_metadata['origin visit id'] = visit_info['visit']
        browse_snapshot_url = reverse('browse-snapshot-content',
                                      url_args={
                                          'snapshot_id': snapshot_id,
                                          'path': path
                                      },
                                      query_params=request.GET)
        browse_snapshot_link = \
            gen_link(browse_snapshot_url, link_text='Browse',
                     link_attrs={'class': 'btn btn-default btn-sm',
                                 'role': 'button'})
        content_metadata['snapshot context'] = browse_snapshot_link

    swh_objects = [{
        'type': 'content',
        'id': cnt_sha1_git
    }, {
        'type': 'revision',
        'id': revision_id
    }, {
        'type': 'snapshot',
        'id': snapshot_id
    }]

    release_id = snapshot_context['release_id']
    if release_id:
        swh_objects.append({'type': 'release', 'id': release_id})

    swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context)

    content_path = '/'.join([bc['name'] for bc in breadcrumbs])
    context_found = 'snapshot: %s' % snapshot_context['snapshot_id']
    if origin_info:
        context_found = 'origin: %s' % origin_info['url']
    heading = 'Content - %s - %s - %s' %\
        (content_path, snapshot_context['branch'], context_found)

    return render(request,
                  'browse/content.html', {
                      'heading': heading,
                      'swh_object_name': 'Content',
                      'swh_object_metadata': content_metadata,
                      'content': content,
                      'content_size': content_size,
                      'max_content_size': content_display_max_size,
                      'mimetype': mimetype,
                      'language': language,
                      'breadcrumbs': breadcrumbs if root_sha1_git else [],
                      'top_right_link': {
                          'url': content_raw_url,
                          'icon': swh_object_icons['content'],
                          'text': 'Raw File'
                      },
                      'snapshot_context': snapshot_context,
                      'vault_cooking': None,
                      'show_actions_menu': True,
                      'swh_ids': swh_ids,
                      'error_code': error_code,
                      'error_message': error_message,
                      'error_description': error_description
                  },
                  status=error_code)
Exemple #10
0
def content_display(request, query_string):
    """Django view that produces an HTML display of a content identified
    by its hash value.

    The url that points to it is
    :http:get:`/browse/content/[(algo_hash):](hash)/`
    """
    algo, checksum = query.parse_hash(query_string)
    checksum = hash_to_hex(checksum)
    origin_url = request.GET.get("origin_url")
    selected_language = request.GET.get("language")
    if not origin_url:
        origin_url = request.GET.get("origin")
    snapshot_id = request.GET.get("snapshot")
    path = request.GET.get("path")
    content_data = {}
    error_info = {"status_code": 200, "description": None}
    try:
        content_data = request_content(query_string)
    except NotFoundExc as e:
        error_info["status_code"] = 404
        error_info["description"] = f"NotFoundExc: {str(e)}"

    snapshot_context = None
    if origin_url is not None or snapshot_id is not None:
        try:
            snapshot_context = get_snapshot_context(
                origin_url=origin_url,
                snapshot_id=snapshot_id,
                branch_name=request.GET.get("branch"),
                release_name=request.GET.get("release"),
                revision_id=request.GET.get("revision"),
                path=path,
                browse_context=CONTENT,
            )
        except NotFoundExc as e:
            if str(e).startswith("Origin"):
                raw_cnt_url = reverse("browse-content",
                                      url_args={"query_string": query_string})
                error_message = (
                    "The Software Heritage archive has a content "
                    "with the hash you provided but the origin "
                    "mentioned in your request appears broken: %s. "
                    "Please check the URL and try again.\n\n"
                    "Nevertheless, you can still browse the content "
                    "without origin information: %s" %
                    (gen_link(origin_url), gen_link(raw_cnt_url)))
                raise NotFoundExc(error_message)
            else:
                raise e
    content = None
    language = None
    mimetype = None
    if content_data.get("raw_data") is not None:
        content_display_data = prepare_content_for_display(
            content_data["raw_data"], content_data["mimetype"], path)
        content = content_display_data["content_data"]
        language = content_display_data["language"]
        mimetype = content_display_data["mimetype"]

    # Override language with user-selected language
    if selected_language is not None:
        language = selected_language

    available_languages = None

    if mimetype and "text/" in mimetype:
        available_languages = highlightjs.get_supported_languages()

    filename = None
    path_info = None
    directory_id = None

    root_dir = None
    if snapshot_context:
        root_dir = snapshot_context.get("root_directory")

    query_params = snapshot_context["query_params"] if snapshot_context else {}

    breadcrumbs = []

    if path:
        split_path = path.split("/")
        root_dir = root_dir or split_path[0]
        filename = split_path[-1]
        if root_dir != path:
            path = path.replace(root_dir + "/", "")
            path = path[:-len(filename)]
            path_info = gen_path_info(path)
            query_params.pop("path", None)
            dir_url = reverse(
                "browse-directory",
                url_args={"sha1_git": root_dir},
                query_params=query_params,
            )
            breadcrumbs.append({"name": root_dir[:7], "url": dir_url})
            for pi in path_info:
                query_params["path"] = pi["path"]
                dir_url = reverse(
                    "browse-directory",
                    url_args={"sha1_git": root_dir},
                    query_params=query_params,
                )
                breadcrumbs.append({"name": pi["name"], "url": dir_url})
        breadcrumbs.append({"name": filename, "url": None})

    if path and root_dir != path:
        dir_info = archive.lookup_directory_with_path(root_dir, path)
        directory_id = dir_info["target"]
    elif root_dir != path:
        directory_id = root_dir
    else:
        root_dir = None

    query_params = {"filename": filename}

    content_checksums = content_data.get("checksums", {})

    content_url = reverse(
        "browse-content",
        url_args={"query_string": query_string},
    )

    content_raw_url = reverse(
        "browse-content-raw",
        url_args={"query_string": query_string},
        query_params=query_params,
    )

    content_metadata = ContentMetadata(
        object_type=CONTENT,
        object_id=content_checksums.get("sha1_git"),
        sha1=content_checksums.get("sha1"),
        sha1_git=content_checksums.get("sha1_git"),
        sha256=content_checksums.get("sha256"),
        blake2s256=content_checksums.get("blake2s256"),
        content_url=content_url,
        mimetype=content_data.get("mimetype"),
        encoding=content_data.get("encoding"),
        size=filesizeformat(content_data.get("length", 0)),
        language=content_data.get("language"),
        root_directory=root_dir,
        path=f"/{path}" if path else None,
        filename=filename or "",
        directory=directory_id,
        revision=None,
        release=None,
        snapshot=None,
        origin_url=origin_url,
    )

    swh_objects = [
        SWHObjectInfo(object_type=CONTENT,
                      object_id=content_checksums.get("sha1_git"))
    ]

    if directory_id:
        swh_objects.append(
            SWHObjectInfo(object_type=DIRECTORY, object_id=directory_id))

    if snapshot_context:
        swh_objects.append(
            SWHObjectInfo(object_type=REVISION,
                          object_id=snapshot_context["revision_id"]))
        swh_objects.append(
            SWHObjectInfo(object_type=SNAPSHOT,
                          object_id=snapshot_context["snapshot_id"]))
        if snapshot_context["release_id"]:
            swh_objects.append(
                SWHObjectInfo(object_type=RELEASE,
                              object_id=snapshot_context["release_id"]))

    swhids_info = get_swhids_info(
        swh_objects,
        snapshot_context,
        extra_context=content_metadata,
    )

    heading = "Content - %s" % content_checksums.get("sha1_git")
    if breadcrumbs:
        content_path = "/".join([bc["name"] for bc in breadcrumbs])
        heading += " - %s" % content_path

    return render(
        request,
        "browse/content.html",
        {
            "heading":
            heading,
            "swh_object_id":
            swhids_info[0]["swhid"],
            "swh_object_name":
            "Content",
            "swh_object_metadata":
            content_metadata,
            "content":
            content,
            "content_size":
            content_data.get("length"),
            "max_content_size":
            content_display_max_size,
            "filename":
            filename,
            "encoding":
            content_data.get("encoding"),
            "mimetype":
            mimetype,
            "language":
            language,
            "available_languages":
            available_languages,
            "breadcrumbs":
            breadcrumbs,
            "top_right_link": {
                "url": content_raw_url,
                "icon": swh_object_icons["content"],
                "text": "Raw File",
            },
            "snapshot_context":
            snapshot_context,
            "vault_cooking":
            None,
            "show_actions":
            True,
            "swhids_info":
            swhids_info,
            "error_code":
            error_info["status_code"],
            "error_message":
            http_status_code_message.get(error_info["status_code"]),
            "error_description":
            error_info["description"],
        },
        status=error_info["status_code"],
    )
Exemple #11
0
def content_display(request, query_string):
    """Django view that produces an HTML display of a content identified
    by its hash value.

    The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/`
    """ # noqa
    try:
        algo, checksum = query.parse_hash(query_string)
        checksum = hash_to_hex(checksum)
        content_data = request_content(query_string,
                                       raise_if_unavailable=False)
        origin_type = request.GET.get('origin_type', None)
        origin_url = request.GET.get('origin_url', None)
        if not origin_url:
            origin_url = request.GET.get('origin', None)
        snapshot_context = None
        if origin_url:
            try:
                snapshot_context = get_snapshot_context(
                    None, origin_type, origin_url)
            except Exception:
                raw_cnt_url = reverse('browse-content',
                                      url_args={'query_string': query_string})
                error_message = \
                    ('The Software Heritage archive has a content '
                     'with the hash you provided but the origin '
                     'mentioned in your request appears broken: %s. '
                     'Please check the URL and try again.\n\n'
                     'Nevertheless, you can still browse the content '
                     'without origin information: %s'
                        % (gen_link(origin_url), gen_link(raw_cnt_url)))

                raise NotFoundExc(error_message)
        if snapshot_context:
            snapshot_context['visit_info'] = None
    except Exception as exc:
        return handle_view_exception(request, exc)

    path = request.GET.get('path', None)

    content = None
    language = None
    mimetype = None
    if content_data['raw_data'] is not None:
        content_display_data = prepare_content_for_display(
            content_data['raw_data'], content_data['mimetype'], path)
        content = content_display_data['content_data']
        language = content_display_data['language']
        mimetype = content_display_data['mimetype']

    root_dir = None
    filename = None
    path_info = None

    query_params = {'origin': origin_url}

    breadcrumbs = []

    if path:
        split_path = path.split('/')
        root_dir = split_path[0]
        filename = split_path[-1]
        if root_dir != path:
            path = path.replace(root_dir + '/', '')
            path = path[:-len(filename)]
            path_info = gen_path_info(path)
            dir_url = reverse('browse-directory',
                              url_args={'sha1_git': root_dir},
                              query_params=query_params)
            breadcrumbs.append({'name': root_dir[:7], 'url': dir_url})
            for pi in path_info:
                dir_url = reverse('browse-directory',
                                  url_args={
                                      'sha1_git': root_dir,
                                      'path': pi['path']
                                  },
                                  query_params=query_params)
                breadcrumbs.append({'name': pi['name'], 'url': dir_url})
        breadcrumbs.append({'name': filename, 'url': None})

    query_params = {'filename': filename}

    content_raw_url = reverse('browse-content-raw',
                              url_args={'query_string': query_string},
                              query_params=query_params)

    content_metadata = {
        'sha1 checksum': content_data['checksums']['sha1'],
        'sha1_git checksum': content_data['checksums']['sha1_git'],
        'sha256 checksum': content_data['checksums']['sha256'],
        'blake2s256 checksum': content_data['checksums']['blake2s256'],
        'mime type': content_data['mimetype'],
        'encoding': content_data['encoding'],
        'size': filesizeformat(content_data['length']),
        'language': content_data['language'],
        'licenses': content_data['licenses'],
        'filename': filename
    }

    if filename:
        content_metadata['filename'] = filename

    sha1_git = content_data['checksums']['sha1_git']
    swh_ids = get_swh_persistent_ids([{'type': 'content', 'id': sha1_git}])

    heading = 'Content - %s' % sha1_git
    if breadcrumbs:
        content_path = '/'.join([bc['name'] for bc in breadcrumbs])
        heading += ' - %s' % content_path

    return render(request,
                  'browse/content.html', {
                      'heading': heading,
                      'swh_object_id': swh_ids[0]['swh_id'],
                      'swh_object_name': 'Content',
                      'swh_object_metadata': content_metadata,
                      'content': content,
                      'content_size': content_data['length'],
                      'max_content_size': content_display_max_size,
                      'mimetype': mimetype,
                      'language': language,
                      'breadcrumbs': breadcrumbs,
                      'top_right_link': {
                          'url': content_raw_url,
                          'icon': swh_object_icons['content'],
                          'text': 'Raw File'
                      },
                      'snapshot_context': snapshot_context,
                      'vault_cooking': None,
                      'show_actions_menu': True,
                      'swh_ids': swh_ids,
                      'error_code': content_data['error_code'],
                      'error_message': content_data['error_message'],
                      'error_description': content_data['error_description']
                  },
                  status=content_data['error_code'])
Exemple #12
0
 def test_gen_link(self):
     self.assertEqual(
         utils.gen_link('https://www.softwareheritage.org/', 'swh'),
         '<a href="https://www.softwareheritage.org/">swh</a>')
Exemple #13
0
def _directory_browse(request, sha1_git, path=None):
    root_sha1_git = sha1_git
    error_info = {"status_code": 200, "description": None}
    if path:
        try:
            dir_info = archive.lookup_directory_with_path(sha1_git, path)
            sha1_git = dir_info["target"]
        except NotFoundExc as e:
            error_info["status_code"] = 404
            error_info["description"] = f"NotFoundExc: {str(e)}"
            sha1_git = None

    dirs, files = [], []
    if sha1_git is not None:
        dirs, files = get_directory_entries(sha1_git)
    origin_url = request.GET.get("origin_url")
    if not origin_url:
        origin_url = request.GET.get("origin")
    snapshot_id = request.GET.get("snapshot")
    snapshot_context = None
    if origin_url is not None or snapshot_id is not None:
        try:
            snapshot_context = get_snapshot_context(
                snapshot_id=snapshot_id,
                origin_url=origin_url,
                branch_name=request.GET.get("branch"),
                release_name=request.GET.get("release"),
                revision_id=request.GET.get("revision"),
                path=path,
            )
        except NotFoundExc as e:
            if str(e).startswith("Origin"):
                raw_dir_url = reverse(
                    "browse-directory", url_args={"sha1_git": sha1_git}
                )
                error_message = (
                    "The Software Heritage archive has a directory "
                    "with the hash you provided but the origin "
                    "mentioned in your request appears broken: %s. "
                    "Please check the URL and try again.\n\n"
                    "Nevertheless, you can still browse the directory "
                    "without origin information: %s"
                    % (gen_link(origin_url), gen_link(raw_dir_url))
                )
                raise NotFoundExc(error_message)
            else:
                raise e

    path_info = gen_path_info(path)

    query_params = snapshot_context["query_params"] if snapshot_context else {}

    breadcrumbs = []
    breadcrumbs.append(
        {
            "name": root_sha1_git[:7],
            "url": reverse(
                "browse-directory",
                url_args={"sha1_git": root_sha1_git},
                query_params={**query_params, "path": None},
            ),
        }
    )

    for pi in path_info:
        breadcrumbs.append(
            {
                "name": pi["name"],
                "url": reverse(
                    "browse-directory",
                    url_args={"sha1_git": root_sha1_git},
                    query_params={**query_params, "path": pi["path"],},
                ),
            }
        )

    path = "" if path is None else (path + "/")

    for d in dirs:
        if d["type"] == "rev":
            d["url"] = reverse(
                "browse-revision",
                url_args={"sha1_git": d["target"]},
                query_params=query_params,
            )
        else:
            d["url"] = reverse(
                "browse-directory",
                url_args={"sha1_git": root_sha1_git},
                query_params={**query_params, "path": path + d["name"],},
            )

    sum_file_sizes = 0

    readmes = {}

    for f in files:
        query_string = "sha1_git:" + f["target"]
        f["url"] = reverse(
            "browse-content",
            url_args={"query_string": query_string},
            query_params={
                **query_params,
                "path": root_sha1_git + "/" + path + f["name"],
            },
        )
        if f["length"] is not None:
            sum_file_sizes += f["length"]
            f["length"] = filesizeformat(f["length"])
        if f["name"].lower().startswith("readme"):
            readmes[f["name"]] = f["checksums"]["sha1"]

    readme_name, readme_url, readme_html = get_readme_to_display(readmes)

    sum_file_sizes = filesizeformat(sum_file_sizes)

    dir_metadata = DirectoryMetadata(
        object_type=DIRECTORY,
        object_id=sha1_git,
        directory=root_sha1_git,
        nb_files=len(files),
        nb_dirs=len(dirs),
        sum_file_sizes=sum_file_sizes,
        root_directory=root_sha1_git,
        path=f"/{path}" if path else None,
        revision=None,
        revision_found=None,
        release=None,
        snapshot=None,
    )

    vault_cooking = {
        "directory_context": True,
        "directory_id": sha1_git,
        "revision_context": False,
        "revision_id": None,
    }

    swh_objects = [SWHObjectInfo(object_type=DIRECTORY, object_id=sha1_git)]

    if snapshot_context:
        swh_objects.append(
            SWHObjectInfo(
                object_type=REVISION, object_id=snapshot_context["revision_id"]
            )
        )
        swh_objects.append(
            SWHObjectInfo(
                object_type=SNAPSHOT, object_id=snapshot_context["snapshot_id"]
            )
        )
        if snapshot_context["release_id"]:
            swh_objects.append(
                SWHObjectInfo(
                    object_type=RELEASE, object_id=snapshot_context["release_id"]
                )
            )

    swhids_info = get_swhids_info(swh_objects, snapshot_context, dir_metadata)

    heading = "Directory - %s" % sha1_git
    if breadcrumbs:
        dir_path = "/".join([bc["name"] for bc in breadcrumbs]) + "/"
        heading += " - %s" % dir_path

    top_right_link = None
    if snapshot_context is not None and not snapshot_context["is_empty"]:
        history_url = reverse(
            "browse-revision-log",
            url_args={"sha1_git": snapshot_context["revision_id"]},
            query_params=query_params,
        )
        top_right_link = {
            "url": history_url,
            "icon": swh_object_icons["revisions history"],
            "text": "History",
        }

    return render(
        request,
        "browse/directory.html",
        {
            "heading": heading,
            "swh_object_id": swhids_info[0]["swhid"],
            "swh_object_name": "Directory",
            "swh_object_metadata": dir_metadata,
            "dirs": dirs,
            "files": files,
            "breadcrumbs": breadcrumbs,
            "top_right_link": top_right_link,
            "readme_name": readme_name,
            "readme_url": readme_url,
            "readme_html": readme_html,
            "snapshot_context": snapshot_context,
            "vault_cooking": vault_cooking,
            "show_actions": True,
            "swhids_info": swhids_info,
            "error_code": error_info["status_code"],
            "error_message": http_status_code_message.get(error_info["status_code"]),
            "error_description": error_info["description"],
        },
        status=error_info["status_code"],
    )
Exemple #14
0
def revision_browse(request, sha1_git, extra_path=None):
    """
    Django view that produces an HTML display of a revision
    identified by its id.

    The url that points to it is :http:get:`/browse/revision/(sha1_git)/`.
    """
    try:
        revision = service.lookup_revision(sha1_git)
        # some readme files can reference assets reachable from the
        # browsed directory, handle that special case in order to
        # correctly displayed them
        if extra_path:
            dir_info = \
                service.lookup_directory_with_path(revision['directory'],
                                                   extra_path)
            if dir_info and dir_info['type'] == 'file':
                file_raw_url = reverse(
                    'browse-content-raw',
                    url_args={'query_string': dir_info['checksums']['sha1']})
                return redirect(file_raw_url)
        origin_info = None
        snapshot_context = None
        origin_type = request.GET.get('origin_type', None)
        origin_url = request.GET.get('origin_url', None)
        if not origin_url:
            origin_url = request.GET.get('origin', None)
        timestamp = request.GET.get('timestamp', None)
        visit_id = request.GET.get('visit_id', None)
        snapshot_id = request.GET.get('snapshot_id', None)
        path = request.GET.get('path', None)
        dir_id = None
        dirs, files = None, None
        content_data = None
        if origin_url:
            try:
                snapshot_context = get_snapshot_context(
                    None, origin_type, origin_url, timestamp, visit_id)
            except Exception:
                raw_rev_url = reverse('browse-revision',
                                      url_args={'sha1_git': sha1_git})
                error_message = \
                    ('The Software Heritage archive has a revision '
                     'with the hash you provided but the origin '
                     'mentioned in your request appears broken: %s. '
                     'Please check the URL and try again.\n\n'
                     'Nevertheless, you can still browse the revision '
                     'without origin information: %s'
                        % (gen_link(origin_url), gen_link(raw_rev_url)))
                raise NotFoundExc(error_message)
            origin_info = snapshot_context['origin_info']
            snapshot_id = snapshot_context['snapshot_id']
        elif snapshot_id:
            snapshot_context = get_snapshot_context(snapshot_id)
        if path:
            file_info = \
                service.lookup_directory_with_path(revision['directory'], path)
            if file_info['type'] == 'dir':
                dir_id = file_info['target']
            else:
                query_string = 'sha1_git:' + file_info['target']
                content_data = request_content(query_string,
                                               raise_if_unavailable=False)
        else:
            dir_id = revision['directory']

        if dir_id:
            path = '' if path is None else (path + '/')
            dirs, files = get_directory_entries(dir_id)
    except Exception as exc:
        return handle_view_exception(request, exc)

    revision_data = {}

    author_name = 'None'
    revision_data['author'] = 'None'
    if revision['author']:
        author_name = revision['author']['name'] or \
                      revision['author']['fullname']
        revision_data['author'] = \
            gen_person_link(revision['author']['id'], author_name,
                            snapshot_context)
    revision_data['committer'] = 'None'
    if revision['committer']:
        revision_data['committer'] = \
            gen_person_link(revision['committer']['id'],
                            revision['committer']['name'], snapshot_context)
    revision_data['committer date'] = \
        format_utc_iso_date(revision['committer_date'])
    revision_data['date'] = format_utc_iso_date(revision['date'])
    if snapshot_context:
        revision_data['snapshot id'] = snapshot_id
        revision_data['directory'] = \
            gen_snapshot_directory_link(snapshot_context, sha1_git,
                                        link_text='Browse',
                                        link_attrs={'class': 'btn btn-default btn-sm', # noqa
                                                    'role': 'button'})
    else:
        revision_data['directory'] = \
            gen_directory_link(revision['directory'], link_text='Browse',
                               link_attrs={'class': 'btn btn-default btn-sm',
                                           'role': 'button'})
    revision_data['id'] = sha1_git
    revision_data['merge'] = revision['merge']
    revision_data['metadata'] = escape(
        json.dumps(revision['metadata'],
                   sort_keys=True,
                   indent=4,
                   separators=(',', ': ')))

    if origin_info:
        revision_data['context-independent revision'] = \
            gen_revision_link(sha1_git, link_text='Browse',
                              link_attrs={'class': 'btn btn-default btn-sm',
                                          'role': 'button'})
        revision_data['origin id'] = origin_info['id']
        revision_data['origin type'] = origin_info['type']
        revision_data['origin url'] = gen_link(origin_info['url'],
                                               origin_info['url'])
        browse_snapshot_link = \
            gen_snapshot_link(snapshot_id, link_text='Browse',
                              link_attrs={'class': 'btn btn-default btn-sm',
                                          'role': 'button'})
        revision_data['snapshot'] = browse_snapshot_link

    parents = ''
    for p in revision['parents']:
        parent_link = gen_revision_link(p, snapshot_context=snapshot_context)
        parents += parent_link + '<br/>'

    revision_data['parents'] = mark_safe(parents)
    revision_data['synthetic'] = revision['synthetic']
    revision_data['type'] = revision['type']

    message_lines = ['None']
    if revision['message']:
        message_lines = revision['message'].split('\n')

    parents = []
    for p in revision['parents']:
        parent_url = gen_revision_url(p, snapshot_context)
        parents.append({'id': p, 'url': parent_url})

    path_info = gen_path_info(path)

    query_params = {
        'snapshot_id': snapshot_id,
        'origin_type': origin_type,
        'origin': origin_url,
        'timestamp': timestamp,
        'visit_id': visit_id
    }

    breadcrumbs = []
    breadcrumbs.append({
        'name':
        revision['directory'][:7],
        'url':
        reverse('browse-revision',
                url_args={'sha1_git': sha1_git},
                query_params=query_params)
    })
    for pi in path_info:
        query_params['path'] = pi['path']
        breadcrumbs.append({
            'name':
            pi['name'],
            'url':
            reverse('browse-revision',
                    url_args={'sha1_git': sha1_git},
                    query_params=query_params)
        })

    vault_cooking = {
        'directory_context': False,
        'directory_id': None,
        'revision_context': True,
        'revision_id': sha1_git
    }

    swh_objects = [{'type': 'revision', 'id': sha1_git}]

    content = None
    content_size = None
    mimetype = None
    language = None
    readme_name = None
    readme_url = None
    readme_html = None
    readmes = {}
    error_code = 200
    error_message = ''
    error_description = ''

    if content_data:
        breadcrumbs[-1]['url'] = None
        content_size = content_data['length']
        mimetype = content_data['mimetype']
        if content_data['raw_data']:
            content_display_data = prepare_content_for_display(
                content_data['raw_data'], content_data['mimetype'], path)
            content = content_display_data['content_data']
            language = content_display_data['language']
            mimetype = content_display_data['mimetype']
        query_params = {}
        if path:
            filename = path_info[-1]['name']
            query_params['filename'] = path_info[-1]['name']
            revision_data['filename'] = filename

        top_right_link = {
            'url':
            reverse('browse-content-raw',
                    url_args={'query_string': query_string},
                    query_params=query_params),
            'icon':
            swh_object_icons['content'],
            'text':
            'Raw File'
        }

        swh_objects.append({'type': 'content', 'id': file_info['target']})

        error_code = content_data['error_code']
        error_message = content_data['error_message']
        error_description = content_data['error_description']
    else:
        for d in dirs:
            if d['type'] == 'rev':
                d['url'] = reverse('browse-revision',
                                   url_args={'sha1_git': d['target']})
            else:
                query_params['path'] = path + d['name']
                d['url'] = reverse('browse-revision',
                                   url_args={'sha1_git': sha1_git},
                                   query_params=query_params)
        for f in files:
            query_params['path'] = path + f['name']
            f['url'] = reverse('browse-revision',
                               url_args={'sha1_git': sha1_git},
                               query_params=query_params)
            if f['length'] is not None:
                f['length'] = filesizeformat(f['length'])
            if f['name'].lower().startswith('readme'):
                readmes[f['name']] = f['checksums']['sha1']

        readme_name, readme_url, readme_html = get_readme_to_display(readmes)

        top_right_link = {
            'url': get_revision_log_url(sha1_git, snapshot_context),
            'icon': swh_object_icons['revisions history'],
            'text': 'History'
        }

        vault_cooking['directory_context'] = True
        vault_cooking['directory_id'] = dir_id

        swh_objects.append({'type': 'directory', 'id': dir_id})

    diff_revision_url = reverse('diff-revision',
                                url_args={'sha1_git': sha1_git},
                                query_params={
                                    'origin_type': origin_type,
                                    'origin': origin_url,
                                    'timestamp': timestamp,
                                    'visit_id': visit_id
                                })

    if snapshot_id:
        swh_objects.append({'type': 'snapshot', 'id': snapshot_id})

    swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context)

    heading = 'Revision - %s - %s' %\
        (sha1_git[:7], textwrap.shorten(message_lines[0], width=70))
    if snapshot_context:
        context_found = 'snapshot: %s' % snapshot_context['snapshot_id']
        if origin_info:
            context_found = 'origin: %s' % origin_info['url']
        heading += ' - %s' % context_found

    return render(request,
                  'browse/revision.html', {
                      'heading': heading,
                      'swh_object_id': swh_ids[0]['swh_id'],
                      'swh_object_name': 'Revision',
                      'swh_object_metadata': revision_data,
                      'message_header': message_lines[0],
                      'message_body': '\n'.join(message_lines[1:]),
                      'parents': parents,
                      'snapshot_context': snapshot_context,
                      'dirs': dirs,
                      'files': files,
                      'content': content,
                      'content_size': content_size,
                      'max_content_size': content_display_max_size,
                      'mimetype': mimetype,
                      'language': language,
                      'readme_name': readme_name,
                      'readme_url': readme_url,
                      'readme_html': readme_html,
                      'breadcrumbs': breadcrumbs,
                      'top_right_link': top_right_link,
                      'vault_cooking': vault_cooking,
                      'diff_revision_url': diff_revision_url,
                      'show_actions_menu': True,
                      'swh_ids': swh_ids,
                      'error_code': error_code,
                      'error_message': error_message,
                      'error_description': error_description
                  },
                  status=error_code)