예제 #1
0
def content_raw(request, query_string):
    """Django view that produces a raw display of a content identified
    by its hash value.

    The url that points to it is
    :http:get:`/browse/content/[(algo_hash):](hash)/raw/`
    """
    re_encode = bool(strtobool(request.GET.get("re_encode", "false")))
    algo, checksum = query.parse_hash(query_string)
    checksum = hash_to_hex(checksum)
    content_data = request_content(query_string,
                                   max_size=None,
                                   re_encode=re_encode)

    filename = request.GET.get("filename", None)
    if not filename:
        filename = "%s_%s" % (algo, checksum)

    if (content_data["mimetype"].startswith("text/")
            or content_data["mimetype"] == "inode/x-empty"):
        response = HttpResponse(content_data["raw_data"],
                                content_type="text/plain")
        response["Content-disposition"] = "filename=%s" % filename
    else:
        response = HttpResponse(content_data["raw_data"],
                                content_type="application/octet-stream")
        response["Content-disposition"] = "attachment; filename=%s" % filename
    return response
예제 #2
0
def content_raw(request, query_string):
    """Django view that produces a raw display of a content identified
    by its hash value.

    The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/raw/`
    """ # noqa
    try:
        reencode = bool(strtobool(request.GET.get('reencode', 'false')))
        algo, checksum = query.parse_hash(query_string)
        checksum = hash_to_hex(checksum)
        content_data = request_content(query_string,
                                       max_size=None,
                                       reencode=reencode)
    except Exception as exc:
        return handle_view_exception(request, exc)

    filename = request.GET.get('filename', None)
    if not filename:
        filename = '%s_%s' % (algo, checksum)

    if content_data['mimetype'].startswith('text/') or \
       content_data['mimetype'] == 'inode/x-empty':
        response = HttpResponse(content_data['raw_data'],
                                content_type="text/plain")
        response['Content-disposition'] = 'filename=%s' % filename
    else:
        response = HttpResponse(content_data['raw_data'],
                                content_type='application/octet-stream')
        response['Content-disposition'] = 'attachment; filename=%s' % filename
    return response
예제 #3
0
def revision_browse(request, sha1_git):
    """
    Django view that produces an HTML display of a revision
    identified by its id.

    The url that points to it is :http:get:`/browse/revision/(sha1_git)/`.
    """
    revision = archive.lookup_revision(sha1_git)
    origin_info = None
    snapshot_context = None
    origin_url = request.GET.get("origin_url")
    if not origin_url:
        origin_url = request.GET.get("origin")
    timestamp = request.GET.get("timestamp")
    visit_id = request.GET.get("visit_id")
    snapshot_id = request.GET.get("snapshot_id")
    if not snapshot_id:
        snapshot_id = request.GET.get("snapshot")
    path = request.GET.get("path")
    dir_id = None
    dirs, files = [], []
    content_data = {}
    if origin_url:
        try:
            snapshot_context = get_snapshot_context(
                snapshot_id=snapshot_id,
                origin_url=origin_url,
                timestamp=timestamp,
                visit_id=visit_id,
                branch_name=request.GET.get("branch"),
                release_name=request.GET.get("release"),
                revision_id=sha1_git,
                path=path,
            )
        except NotFoundExc as e:
            raw_rev_url = reverse("browse-revision",
                                  url_args={"sha1_git": sha1_git})
            error_message = ("The Software Heritage archive has a revision "
                             "with the hash you provided but the origin "
                             "mentioned in your request appears broken: %s. "
                             "Please check the URL and try again.\n\n"
                             "Nevertheless, you can still browse the revision "
                             "without origin information: %s" %
                             (gen_link(origin_url), gen_link(raw_rev_url)))
            if str(e).startswith("Origin"):
                raise NotFoundExc(error_message)
            else:
                raise e
        origin_info = snapshot_context["origin_info"]
        snapshot_id = snapshot_context["snapshot_id"]
    elif snapshot_id:
        snapshot_context = get_snapshot_context(snapshot_id)

    error_info = {"status_code": 200, "description": None}

    if path:
        try:
            file_info = archive.lookup_directory_with_path(
                revision["directory"], path)
            if file_info["type"] == "dir":
                dir_id = file_info["target"]
            else:
                query_string = "sha1_git:" + file_info["target"]
                content_data = request_content(query_string)
        except NotFoundExc as e:
            error_info["status_code"] = 404
            error_info["description"] = f"NotFoundExc: {str(e)}"
    else:
        dir_id = revision["directory"]

    if dir_id:
        path = "" if path is None else (path + "/")
        dirs, files = get_directory_entries(dir_id)

    revision_metadata = RevisionMetadata(
        object_type=REVISION,
        object_id=sha1_git,
        revision=sha1_git,
        author=revision["author"]["fullname"]
        if revision["author"] else "None",
        author_url=gen_person_mail_link(revision["author"])
        if revision["author"] else "None",
        committer=revision["committer"]["fullname"]
        if revision["committer"] else "None",
        committer_url=gen_person_mail_link(revision["committer"])
        if revision["committer"] else "None",
        committer_date=format_utc_iso_date(revision["committer_date"]),
        date=format_utc_iso_date(revision["date"]),
        directory=revision["directory"],
        merge=revision["merge"],
        metadata=json.dumps(revision["metadata"],
                            sort_keys=True,
                            indent=4,
                            separators=(",", ": ")),
        parents=revision["parents"],
        synthetic=revision["synthetic"],
        type=revision["type"],
        snapshot=snapshot_id,
        origin_url=origin_url,
    )

    message_lines = ["None"]
    if revision["message"]:
        message_lines = revision["message"].split("\n")

    parents = []
    for p in revision["parents"]:
        parent_url = gen_revision_url(p, snapshot_context)
        parents.append({"id": p, "url": parent_url})

    path_info = gen_path_info(path)

    query_params = snapshot_context["query_params"] if snapshot_context else {}

    breadcrumbs = []
    breadcrumbs.append({
        "name":
        revision["directory"][:7],
        "url":
        reverse(
            "browse-revision",
            url_args={"sha1_git": sha1_git},
            query_params=query_params,
        ),
    })
    for pi in path_info:
        query_params["path"] = pi["path"]
        breadcrumbs.append({
            "name":
            pi["name"],
            "url":
            reverse(
                "browse-revision",
                url_args={"sha1_git": sha1_git},
                query_params=query_params,
            ),
        })

    vault_cooking = {
        "directory_context": False,
        "directory_id": None,
        "revision_context": True,
        "revision_id": sha1_git,
    }

    swh_objects = [SWHObjectInfo(object_type=REVISION, object_id=sha1_git)]

    content = None
    content_size = None
    filename = None
    mimetype = None
    language = None
    readme_name = None
    readme_url = None
    readme_html = None
    readmes = {}

    extra_context = dict(revision_metadata)
    extra_context["path"] = f"/{path}" if path else None

    if content_data:
        breadcrumbs[-1]["url"] = None
        content_size = content_data["length"]
        mimetype = content_data["mimetype"]
        if content_data["raw_data"]:
            content_display_data = prepare_content_for_display(
                content_data["raw_data"], content_data["mimetype"], path)
            content = content_display_data["content_data"]
            language = content_display_data["language"]
            mimetype = content_display_data["mimetype"]
        if path:
            filename = path_info[-1]["name"]
            query_params["filename"] = filename
            filepath = "/".join(pi["name"] for pi in path_info[:-1])
            extra_context["path"] = f"/{filepath}/" if filepath else "/"
            extra_context["filename"] = filename

        top_right_link = {
            "url":
            reverse(
                "browse-content-raw",
                url_args={"query_string": query_string},
                query_params={"filename": filename},
            ),
            "icon":
            swh_object_icons["content"],
            "text":
            "Raw File",
        }

        swh_objects.append(
            SWHObjectInfo(object_type=CONTENT, object_id=file_info["target"]))
    else:
        for d in dirs:
            if d["type"] == "rev":
                d["url"] = reverse("browse-revision",
                                   url_args={"sha1_git": d["target"]})
            else:
                query_params["path"] = path + d["name"]
                d["url"] = reverse(
                    "browse-revision",
                    url_args={"sha1_git": sha1_git},
                    query_params=query_params,
                )
        for f in files:
            query_params["path"] = path + f["name"]
            f["url"] = reverse(
                "browse-revision",
                url_args={"sha1_git": sha1_git},
                query_params=query_params,
            )
            if f["length"] is not None:
                f["length"] = filesizeformat(f["length"])
            if f["name"].lower().startswith("readme"):
                readmes[f["name"]] = f["checksums"]["sha1"]

        readme_name, readme_url, readme_html = get_readme_to_display(readmes)

        top_right_link = {
            "url": get_revision_log_url(sha1_git, snapshot_context),
            "icon": swh_object_icons["revisions history"],
            "text": "History",
        }

        vault_cooking["directory_context"] = True
        vault_cooking["directory_id"] = dir_id

        swh_objects.append(
            SWHObjectInfo(object_type=DIRECTORY, object_id=dir_id))

    query_params.pop("path", None)

    diff_revision_url = reverse(
        "diff-revision",
        url_args={"sha1_git": sha1_git},
        query_params=query_params,
    )

    if snapshot_id:
        swh_objects.append(
            SWHObjectInfo(object_type=SNAPSHOT, object_id=snapshot_id))

    swhids_info = get_swhids_info(swh_objects, snapshot_context, extra_context)

    heading = "Revision - %s - %s" % (
        sha1_git[:7],
        textwrap.shorten(message_lines[0], width=70),
    )
    if snapshot_context:
        context_found = "snapshot: %s" % snapshot_context["snapshot_id"]
        if origin_info:
            context_found = "origin: %s" % origin_info["url"]
        heading += " - %s" % context_found

    return render(
        request,
        "browse/revision.html",
        {
            "heading":
            heading,
            "swh_object_id":
            swhids_info[0]["swhid"],
            "swh_object_name":
            "Revision",
            "swh_object_metadata":
            revision_metadata,
            "message_header":
            message_lines[0],
            "message_body":
            "\n".join(message_lines[1:]),
            "parents":
            parents,
            "snapshot_context":
            snapshot_context,
            "dirs":
            dirs,
            "files":
            files,
            "content":
            content,
            "content_size":
            content_size,
            "max_content_size":
            content_display_max_size,
            "filename":
            filename,
            "encoding":
            content_data.get("encoding"),
            "mimetype":
            mimetype,
            "language":
            language,
            "readme_name":
            readme_name,
            "readme_url":
            readme_url,
            "readme_html":
            readme_html,
            "breadcrumbs":
            breadcrumbs,
            "top_right_link":
            top_right_link,
            "vault_cooking":
            vault_cooking,
            "diff_revision_url":
            diff_revision_url,
            "show_actions":
            True,
            "swhids_info":
            swhids_info,
            "error_code":
            error_info["status_code"],
            "error_message":
            http_status_code_message.get(error_info["status_code"]),
            "error_description":
            error_info["description"],
        },
        status=error_info["status_code"],
    )
예제 #4
0
def browse_snapshot_content(request,
                            snapshot_id=None,
                            origin_type=None,
                            origin_url=None,
                            timestamp=None,
                            path=None):
    """
    Django view implementation for browsing a content in a snapshot context.
    """
    try:

        snapshot_context = _process_snapshot_request(request,
                                                     snapshot_id,
                                                     origin_type,
                                                     origin_url,
                                                     timestamp,
                                                     path,
                                                     browse_context='content')

        root_sha1_git = snapshot_context['root_sha1_git']
        sha1_git = None
        query_string = None
        content_data = None
        if root_sha1_git:
            content_info = service.lookup_directory_with_path(
                root_sha1_git, path)
            sha1_git = content_info['target']
            query_string = 'sha1_git:' + sha1_git
            content_data = request_content(query_string,
                                           raise_if_unavailable=False)

    except Exception as exc:
        return handle_view_exception(request, exc)

    swh_type = snapshot_context['swh_type']
    url_args = snapshot_context['url_args']
    query_params = snapshot_context['query_params']
    revision_id = snapshot_context['revision_id']
    origin_info = snapshot_context['origin_info']
    visit_info = snapshot_context['visit_info']
    snapshot_id = snapshot_context['snapshot_id']

    content = None
    language = None
    mimetype = None
    if content_data and content_data['raw_data'] is not None:
        content_display_data = prepare_content_for_display(
            content_data['raw_data'], content_data['mimetype'], path)
        content = content_display_data['content_data']
        language = content_display_data['language']
        mimetype = content_display_data['mimetype']

    filename = None
    path_info = None

    browse_view_name = 'browse-' + swh_type + '-directory'

    breadcrumbs = []

    split_path = path.split('/')
    filename = split_path[-1]
    path_info = gen_path_info(path[:-len(filename)])
    if root_sha1_git:
        breadcrumbs.append({
            'name':
            root_sha1_git[:7],
            'url':
            reverse(browse_view_name,
                    url_args=url_args,
                    query_params=query_params)
        })
    for pi in path_info:
        bc_url_args = dict(url_args)
        bc_url_args['path'] = pi['path']
        breadcrumbs.append({
            'name':
            pi['name'],
            'url':
            reverse(browse_view_name,
                    url_args=bc_url_args,
                    query_params=query_params)
        })

    breadcrumbs.append({'name': filename, 'url': None})

    browse_content_link = \
        gen_content_link(sha1_git, link_text='Browse',
                         link_attrs={'class': 'btn btn-default btn-sm',
                                     'role': 'button'})

    content_raw_url = None
    if query_string:
        content_raw_url = reverse('browse-content-raw',
                                  url_args={'query_string': query_string},
                                  query_params={'filename': filename})

    browse_rev_link = \
        gen_revision_link(revision_id,
                          snapshot_context=snapshot_context,
                          link_text='Browse',
                          link_attrs={'class': 'btn btn-default btn-sm',
                                      'role': 'button'})

    content_metadata = {
        'context-independent content': browse_content_link,
        'path': None,
        'filename': None,
        'revision id': revision_id,
        'revision': browse_rev_link,
        'snapshot id': snapshot_id
    }

    cnt_sha1_git = None
    content_size = None
    error_code = 200
    error_description = ''
    error_message = ''
    if content_data:
        content_metadata['sha1 checksum'] = \
            content_data['checksums']['sha1']
        content_metadata['sha1_git checksum'] = \
            content_data['checksums']['sha1_git']
        content_metadata['sha256 checksum'] = \
            content_data['checksums']['sha256']
        content_metadata['blake2s256 checksum'] = \
            content_data['checksums']['blake2s256']
        content_metadata['mime type'] = content_data['mimetype']
        content_metadata['encoding'] = content_data['encoding']
        content_metadata['size'] = filesizeformat(content_data['length'])
        content_metadata['language'] = content_data['language']
        content_metadata['licenses'] = content_data['licenses']
        content_metadata['path'] = '/' + path[:-len(filename)]
        content_metadata['filename'] = filename

        cnt_sha1_git = content_data['checksums']['sha1_git']
        content_size = content_data['length']
        error_code = content_data['error_code']
        error_message = content_data['error_message']
        error_description = content_data['error_description']

    if origin_info:
        content_metadata['origin id'] = origin_info['id']
        content_metadata['origin type'] = origin_info['type']
        content_metadata['origin url'] = origin_info['url']
        content_metadata['origin visit date'] = format_utc_iso_date(
            visit_info['date'])  # noqa
        content_metadata['origin visit id'] = visit_info['visit']
        browse_snapshot_url = reverse('browse-snapshot-content',
                                      url_args={
                                          'snapshot_id': snapshot_id,
                                          'path': path
                                      },
                                      query_params=request.GET)
        browse_snapshot_link = \
            gen_link(browse_snapshot_url, link_text='Browse',
                     link_attrs={'class': 'btn btn-default btn-sm',
                                 'role': 'button'})
        content_metadata['snapshot context'] = browse_snapshot_link

    swh_objects = [{
        'type': 'content',
        'id': cnt_sha1_git
    }, {
        'type': 'revision',
        'id': revision_id
    }, {
        'type': 'snapshot',
        'id': snapshot_id
    }]

    release_id = snapshot_context['release_id']
    if release_id:
        swh_objects.append({'type': 'release', 'id': release_id})

    swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context)

    content_path = '/'.join([bc['name'] for bc in breadcrumbs])
    context_found = 'snapshot: %s' % snapshot_context['snapshot_id']
    if origin_info:
        context_found = 'origin: %s' % origin_info['url']
    heading = 'Content - %s - %s - %s' %\
        (content_path, snapshot_context['branch'], context_found)

    return render(request,
                  'browse/content.html', {
                      'heading': heading,
                      'swh_object_name': 'Content',
                      'swh_object_metadata': content_metadata,
                      'content': content,
                      'content_size': content_size,
                      'max_content_size': content_display_max_size,
                      'mimetype': mimetype,
                      'language': language,
                      'breadcrumbs': breadcrumbs if root_sha1_git else [],
                      'top_right_link': {
                          'url': content_raw_url,
                          'icon': swh_object_icons['content'],
                          'text': 'Raw File'
                      },
                      'snapshot_context': snapshot_context,
                      'vault_cooking': None,
                      'show_actions_menu': True,
                      'swh_ids': swh_ids,
                      'error_code': error_code,
                      'error_message': error_message,
                      'error_description': error_description
                  },
                  status=error_code)
예제 #5
0
def _contents_diff(request, from_query_string, to_query_string):
    """
    Browse endpoint used to compute unified diffs between two contents.

    Diffs are generated only if the two contents are textual.
    By default, diffs whose size are greater than 20 kB will
    not be generated. To force the generation of large diffs,
    the 'force' boolean query parameter must be used.

    Args:
        request: input django http request
        from_query_string: a string of the form "[ALGO_HASH:]HASH" where
            optional ALGO_HASH can be either ``sha1``, ``sha1_git``,
            ``sha256``, or ``blake2s256`` (default to ``sha1``) and HASH
            the hexadecimal representation of the hash value identifying
            the first content
        to_query_string: same as above for identifying the second content

    Returns:
        A JSON object containing the unified diff.

    """
    diff_data = {}
    content_from = None
    content_to = None
    content_from_size = 0
    content_to_size = 0
    content_from_lines = []
    content_to_lines = []
    force = request.GET.get("force", "false")
    path = request.GET.get("path", None)
    language = "nohighlight"

    force = bool(strtobool(force))

    if from_query_string == to_query_string:
        diff_str = "File renamed without changes"
    else:
        try:
            text_diff = True
            if from_query_string:
                content_from = request_content(from_query_string,
                                               max_size=None)
                content_from_display_data = prepare_content_for_display(
                    content_from["raw_data"], content_from["mimetype"], path)
                language = content_from_display_data["language"]
                content_from_size = content_from["length"]
                if not (content_from["mimetype"].startswith("text/")
                        or content_from["mimetype"] == "inode/x-empty"):
                    text_diff = False

            if text_diff and to_query_string:
                content_to = request_content(to_query_string, max_size=None)
                content_to_display_data = prepare_content_for_display(
                    content_to["raw_data"], content_to["mimetype"], path)
                language = content_to_display_data["language"]
                content_to_size = content_to["length"]
                if not (content_to["mimetype"].startswith("text/")
                        or content_to["mimetype"] == "inode/x-empty"):
                    text_diff = False

            diff_size = abs(content_to_size - content_from_size)

            if not text_diff:
                diff_str = "Diffs are not generated for non textual content"
                language = "nohighlight"
            elif not force and diff_size > _auto_diff_size_limit:
                diff_str = "Large diffs are not automatically computed"
                language = "nohighlight"
            else:
                if content_from:
                    content_from_lines = (content_from["raw_data"].decode(
                        "utf-8").splitlines(True))
                    if content_from_lines and content_from_lines[-1][
                            -1] != "\n":
                        content_from_lines[-1] += "[swh-no-nl-marker]\n"

                if content_to:
                    content_to_lines = (content_to["raw_data"].decode(
                        "utf-8").splitlines(True))
                    if content_to_lines and content_to_lines[-1][-1] != "\n":
                        content_to_lines[-1] += "[swh-no-nl-marker]\n"

                diff_lines = difflib.unified_diff(content_from_lines,
                                                  content_to_lines)
                diff_str = "".join(list(diff_lines)[2:])
        except Exception as exc:
            sentry_sdk.capture_exception(exc)
            diff_str = str(exc)

    diff_data["diff_str"] = diff_str
    diff_data["language"] = language
    return JsonResponse(diff_data)
예제 #6
0
def content_display(request, query_string):
    """Django view that produces an HTML display of a content identified
    by its hash value.

    The url that points to it is
    :http:get:`/browse/content/[(algo_hash):](hash)/`
    """
    algo, checksum = query.parse_hash(query_string)
    checksum = hash_to_hex(checksum)
    origin_url = request.GET.get("origin_url")
    selected_language = request.GET.get("language")
    if not origin_url:
        origin_url = request.GET.get("origin")
    snapshot_id = request.GET.get("snapshot")
    path = request.GET.get("path")
    content_data = {}
    error_info = {"status_code": 200, "description": None}
    try:
        content_data = request_content(query_string)
    except NotFoundExc as e:
        error_info["status_code"] = 404
        error_info["description"] = f"NotFoundExc: {str(e)}"

    snapshot_context = None
    if origin_url is not None or snapshot_id is not None:
        try:
            snapshot_context = get_snapshot_context(
                origin_url=origin_url,
                snapshot_id=snapshot_id,
                branch_name=request.GET.get("branch"),
                release_name=request.GET.get("release"),
                revision_id=request.GET.get("revision"),
                path=path,
                browse_context=CONTENT,
            )
        except NotFoundExc as e:
            if str(e).startswith("Origin"):
                raw_cnt_url = reverse("browse-content",
                                      url_args={"query_string": query_string})
                error_message = (
                    "The Software Heritage archive has a content "
                    "with the hash you provided but the origin "
                    "mentioned in your request appears broken: %s. "
                    "Please check the URL and try again.\n\n"
                    "Nevertheless, you can still browse the content "
                    "without origin information: %s" %
                    (gen_link(origin_url), gen_link(raw_cnt_url)))
                raise NotFoundExc(error_message)
            else:
                raise e
    content = None
    language = None
    mimetype = None
    if content_data.get("raw_data") is not None:
        content_display_data = prepare_content_for_display(
            content_data["raw_data"], content_data["mimetype"], path)
        content = content_display_data["content_data"]
        language = content_display_data["language"]
        mimetype = content_display_data["mimetype"]

    # Override language with user-selected language
    if selected_language is not None:
        language = selected_language

    available_languages = None

    if mimetype and "text/" in mimetype:
        available_languages = highlightjs.get_supported_languages()

    filename = None
    path_info = None
    directory_id = None

    root_dir = None
    if snapshot_context:
        root_dir = snapshot_context.get("root_directory")

    query_params = snapshot_context["query_params"] if snapshot_context else {}

    breadcrumbs = []

    if path:
        split_path = path.split("/")
        root_dir = root_dir or split_path[0]
        filename = split_path[-1]
        if root_dir != path:
            path = path.replace(root_dir + "/", "")
            path = path[:-len(filename)]
            path_info = gen_path_info(path)
            query_params.pop("path", None)
            dir_url = reverse(
                "browse-directory",
                url_args={"sha1_git": root_dir},
                query_params=query_params,
            )
            breadcrumbs.append({"name": root_dir[:7], "url": dir_url})
            for pi in path_info:
                query_params["path"] = pi["path"]
                dir_url = reverse(
                    "browse-directory",
                    url_args={"sha1_git": root_dir},
                    query_params=query_params,
                )
                breadcrumbs.append({"name": pi["name"], "url": dir_url})
        breadcrumbs.append({"name": filename, "url": None})

    if path and root_dir != path:
        dir_info = archive.lookup_directory_with_path(root_dir, path)
        directory_id = dir_info["target"]
    elif root_dir != path:
        directory_id = root_dir
    else:
        root_dir = None

    query_params = {"filename": filename}

    content_checksums = content_data.get("checksums", {})

    content_url = reverse(
        "browse-content",
        url_args={"query_string": query_string},
    )

    content_raw_url = reverse(
        "browse-content-raw",
        url_args={"query_string": query_string},
        query_params=query_params,
    )

    content_metadata = ContentMetadata(
        object_type=CONTENT,
        object_id=content_checksums.get("sha1_git"),
        sha1=content_checksums.get("sha1"),
        sha1_git=content_checksums.get("sha1_git"),
        sha256=content_checksums.get("sha256"),
        blake2s256=content_checksums.get("blake2s256"),
        content_url=content_url,
        mimetype=content_data.get("mimetype"),
        encoding=content_data.get("encoding"),
        size=filesizeformat(content_data.get("length", 0)),
        language=content_data.get("language"),
        root_directory=root_dir,
        path=f"/{path}" if path else None,
        filename=filename or "",
        directory=directory_id,
        revision=None,
        release=None,
        snapshot=None,
        origin_url=origin_url,
    )

    swh_objects = [
        SWHObjectInfo(object_type=CONTENT,
                      object_id=content_checksums.get("sha1_git"))
    ]

    if directory_id:
        swh_objects.append(
            SWHObjectInfo(object_type=DIRECTORY, object_id=directory_id))

    if snapshot_context:
        swh_objects.append(
            SWHObjectInfo(object_type=REVISION,
                          object_id=snapshot_context["revision_id"]))
        swh_objects.append(
            SWHObjectInfo(object_type=SNAPSHOT,
                          object_id=snapshot_context["snapshot_id"]))
        if snapshot_context["release_id"]:
            swh_objects.append(
                SWHObjectInfo(object_type=RELEASE,
                              object_id=snapshot_context["release_id"]))

    swhids_info = get_swhids_info(
        swh_objects,
        snapshot_context,
        extra_context=content_metadata,
    )

    heading = "Content - %s" % content_checksums.get("sha1_git")
    if breadcrumbs:
        content_path = "/".join([bc["name"] for bc in breadcrumbs])
        heading += " - %s" % content_path

    return render(
        request,
        "browse/content.html",
        {
            "heading":
            heading,
            "swh_object_id":
            swhids_info[0]["swhid"],
            "swh_object_name":
            "Content",
            "swh_object_metadata":
            content_metadata,
            "content":
            content,
            "content_size":
            content_data.get("length"),
            "max_content_size":
            content_display_max_size,
            "filename":
            filename,
            "encoding":
            content_data.get("encoding"),
            "mimetype":
            mimetype,
            "language":
            language,
            "available_languages":
            available_languages,
            "breadcrumbs":
            breadcrumbs,
            "top_right_link": {
                "url": content_raw_url,
                "icon": swh_object_icons["content"],
                "text": "Raw File",
            },
            "snapshot_context":
            snapshot_context,
            "vault_cooking":
            None,
            "show_actions":
            True,
            "swhids_info":
            swhids_info,
            "error_code":
            error_info["status_code"],
            "error_message":
            http_status_code_message.get(error_info["status_code"]),
            "error_description":
            error_info["description"],
        },
        status=error_info["status_code"],
    )
예제 #7
0
def _contents_diff(request, from_query_string, to_query_string):
    """
    Browse endpoint used to compute unified diffs between two contents.

    Diffs are generated only if the two contents are textual.
    By default, diffs whose size are greater than 20 kB will
    not be generated. To force the generation of large diffs,
    the 'force' boolean query parameter must be used.

    Args:
        request: input django http request
        from_query_string: a string of the form "[ALGO_HASH:]HASH" where
            optional ALGO_HASH can be either ``sha1``, ``sha1_git``,
            ``sha256``, or ``blake2s256`` (default to ``sha1``) and HASH
            the hexadecimal representation of the hash value identifying
            the first content
        to_query_string: same as above for identifying the second content

    Returns:
        A JSON object containing the unified diff.

    """
    diff_data = {}
    content_from = None
    content_to = None
    content_from_size = 0
    content_to_size = 0
    content_from_lines = []
    content_to_lines = []
    force = request.GET.get('force', 'false')
    path = request.GET.get('path', None)
    language = 'nohighlight'

    force = bool(strtobool(force))

    if from_query_string == to_query_string:
        diff_str = 'File renamed without changes'
    else:
        try:
            text_diff = True
            if from_query_string:
                content_from = \
                    request_content(from_query_string, max_size=None)
                content_from_display_data = \
                    prepare_content_for_display(content_from['raw_data'],
                                                content_from['mimetype'], path)
                language = content_from_display_data['language']
                content_from_size = content_from['length']
                if not (content_from['mimetype'].startswith('text/')
                        or content_from['mimetype'] == 'inode/x-empty'):
                    text_diff = False

            if text_diff and to_query_string:
                content_to = request_content(to_query_string, max_size=None)
                content_to_display_data = prepare_content_for_display(
                    content_to['raw_data'], content_to['mimetype'], path)
                language = content_to_display_data['language']
                content_to_size = content_to['length']
                if not (content_to['mimetype'].startswith('text/')
                        or content_to['mimetype'] == 'inode/x-empty'):
                    text_diff = False

            diff_size = abs(content_to_size - content_from_size)

            if not text_diff:
                diff_str = 'Diffs are not generated for non textual content'
                language = 'nohighlight'
            elif not force and diff_size > _auto_diff_size_limit:
                diff_str = 'Large diffs are not automatically computed'
                language = 'nohighlight'
            else:
                if content_from:
                    content_from_lines = \
                        content_from['raw_data'].decode('utf-8')\
                                                .splitlines(True)
                    if content_from_lines and \
                            content_from_lines[-1][-1] != '\n':
                        content_from_lines[-1] += '[swh-no-nl-marker]\n'

                if content_to:
                    content_to_lines = content_to['raw_data'].decode('utf-8')\
                                                            .splitlines(True)
                    if content_to_lines and content_to_lines[-1][-1] != '\n':
                        content_to_lines[-1] += '[swh-no-nl-marker]\n'

                diff_lines = difflib.unified_diff(content_from_lines,
                                                  content_to_lines)
                diff_str = ''.join(list(diff_lines)[2:])
        except Exception as e:
            diff_str = str(e)

    diff_data['diff_str'] = diff_str
    diff_data['language'] = language
    diff_data_json = json.dumps(diff_data, separators=(',', ': '))
    return HttpResponse(diff_data_json, content_type='application/json')
예제 #8
0
def content_display(request, query_string):
    """Django view that produces an HTML display of a content identified
    by its hash value.

    The url that points to it is :http:get:`/browse/content/[(algo_hash):](hash)/`
    """ # noqa
    try:
        algo, checksum = query.parse_hash(query_string)
        checksum = hash_to_hex(checksum)
        content_data = request_content(query_string,
                                       raise_if_unavailable=False)
        origin_type = request.GET.get('origin_type', None)
        origin_url = request.GET.get('origin_url', None)
        if not origin_url:
            origin_url = request.GET.get('origin', None)
        snapshot_context = None
        if origin_url:
            try:
                snapshot_context = get_snapshot_context(
                    None, origin_type, origin_url)
            except Exception:
                raw_cnt_url = reverse('browse-content',
                                      url_args={'query_string': query_string})
                error_message = \
                    ('The Software Heritage archive has a content '
                     'with the hash you provided but the origin '
                     'mentioned in your request appears broken: %s. '
                     'Please check the URL and try again.\n\n'
                     'Nevertheless, you can still browse the content '
                     'without origin information: %s'
                        % (gen_link(origin_url), gen_link(raw_cnt_url)))

                raise NotFoundExc(error_message)
        if snapshot_context:
            snapshot_context['visit_info'] = None
    except Exception as exc:
        return handle_view_exception(request, exc)

    path = request.GET.get('path', None)

    content = None
    language = None
    mimetype = None
    if content_data['raw_data'] is not None:
        content_display_data = prepare_content_for_display(
            content_data['raw_data'], content_data['mimetype'], path)
        content = content_display_data['content_data']
        language = content_display_data['language']
        mimetype = content_display_data['mimetype']

    root_dir = None
    filename = None
    path_info = None

    query_params = {'origin': origin_url}

    breadcrumbs = []

    if path:
        split_path = path.split('/')
        root_dir = split_path[0]
        filename = split_path[-1]
        if root_dir != path:
            path = path.replace(root_dir + '/', '')
            path = path[:-len(filename)]
            path_info = gen_path_info(path)
            dir_url = reverse('browse-directory',
                              url_args={'sha1_git': root_dir},
                              query_params=query_params)
            breadcrumbs.append({'name': root_dir[:7], 'url': dir_url})
            for pi in path_info:
                dir_url = reverse('browse-directory',
                                  url_args={
                                      'sha1_git': root_dir,
                                      'path': pi['path']
                                  },
                                  query_params=query_params)
                breadcrumbs.append({'name': pi['name'], 'url': dir_url})
        breadcrumbs.append({'name': filename, 'url': None})

    query_params = {'filename': filename}

    content_raw_url = reverse('browse-content-raw',
                              url_args={'query_string': query_string},
                              query_params=query_params)

    content_metadata = {
        'sha1 checksum': content_data['checksums']['sha1'],
        'sha1_git checksum': content_data['checksums']['sha1_git'],
        'sha256 checksum': content_data['checksums']['sha256'],
        'blake2s256 checksum': content_data['checksums']['blake2s256'],
        'mime type': content_data['mimetype'],
        'encoding': content_data['encoding'],
        'size': filesizeformat(content_data['length']),
        'language': content_data['language'],
        'licenses': content_data['licenses'],
        'filename': filename
    }

    if filename:
        content_metadata['filename'] = filename

    sha1_git = content_data['checksums']['sha1_git']
    swh_ids = get_swh_persistent_ids([{'type': 'content', 'id': sha1_git}])

    heading = 'Content - %s' % sha1_git
    if breadcrumbs:
        content_path = '/'.join([bc['name'] for bc in breadcrumbs])
        heading += ' - %s' % content_path

    return render(request,
                  'browse/content.html', {
                      'heading': heading,
                      'swh_object_id': swh_ids[0]['swh_id'],
                      'swh_object_name': 'Content',
                      'swh_object_metadata': content_metadata,
                      'content': content,
                      'content_size': content_data['length'],
                      'max_content_size': content_display_max_size,
                      'mimetype': mimetype,
                      'language': language,
                      'breadcrumbs': breadcrumbs,
                      'top_right_link': {
                          'url': content_raw_url,
                          'icon': swh_object_icons['content'],
                          'text': 'Raw File'
                      },
                      'snapshot_context': snapshot_context,
                      'vault_cooking': None,
                      'show_actions_menu': True,
                      'swh_ids': swh_ids,
                      'error_code': content_data['error_code'],
                      'error_message': content_data['error_message'],
                      'error_description': content_data['error_description']
                  },
                  status=content_data['error_code'])
예제 #9
0
def revision_browse(request, sha1_git, extra_path=None):
    """
    Django view that produces an HTML display of a revision
    identified by its id.

    The url that points to it is :http:get:`/browse/revision/(sha1_git)/`.
    """
    try:
        revision = service.lookup_revision(sha1_git)
        # some readme files can reference assets reachable from the
        # browsed directory, handle that special case in order to
        # correctly displayed them
        if extra_path:
            dir_info = \
                service.lookup_directory_with_path(revision['directory'],
                                                   extra_path)
            if dir_info and dir_info['type'] == 'file':
                file_raw_url = reverse(
                    'browse-content-raw',
                    url_args={'query_string': dir_info['checksums']['sha1']})
                return redirect(file_raw_url)
        origin_info = None
        snapshot_context = None
        origin_type = request.GET.get('origin_type', None)
        origin_url = request.GET.get('origin_url', None)
        if not origin_url:
            origin_url = request.GET.get('origin', None)
        timestamp = request.GET.get('timestamp', None)
        visit_id = request.GET.get('visit_id', None)
        snapshot_id = request.GET.get('snapshot_id', None)
        path = request.GET.get('path', None)
        dir_id = None
        dirs, files = None, None
        content_data = None
        if origin_url:
            try:
                snapshot_context = get_snapshot_context(
                    None, origin_type, origin_url, timestamp, visit_id)
            except Exception:
                raw_rev_url = reverse('browse-revision',
                                      url_args={'sha1_git': sha1_git})
                error_message = \
                    ('The Software Heritage archive has a revision '
                     'with the hash you provided but the origin '
                     'mentioned in your request appears broken: %s. '
                     'Please check the URL and try again.\n\n'
                     'Nevertheless, you can still browse the revision '
                     'without origin information: %s'
                        % (gen_link(origin_url), gen_link(raw_rev_url)))
                raise NotFoundExc(error_message)
            origin_info = snapshot_context['origin_info']
            snapshot_id = snapshot_context['snapshot_id']
        elif snapshot_id:
            snapshot_context = get_snapshot_context(snapshot_id)
        if path:
            file_info = \
                service.lookup_directory_with_path(revision['directory'], path)
            if file_info['type'] == 'dir':
                dir_id = file_info['target']
            else:
                query_string = 'sha1_git:' + file_info['target']
                content_data = request_content(query_string,
                                               raise_if_unavailable=False)
        else:
            dir_id = revision['directory']

        if dir_id:
            path = '' if path is None else (path + '/')
            dirs, files = get_directory_entries(dir_id)
    except Exception as exc:
        return handle_view_exception(request, exc)

    revision_data = {}

    author_name = 'None'
    revision_data['author'] = 'None'
    if revision['author']:
        author_name = revision['author']['name'] or \
                      revision['author']['fullname']
        revision_data['author'] = \
            gen_person_link(revision['author']['id'], author_name,
                            snapshot_context)
    revision_data['committer'] = 'None'
    if revision['committer']:
        revision_data['committer'] = \
            gen_person_link(revision['committer']['id'],
                            revision['committer']['name'], snapshot_context)
    revision_data['committer date'] = \
        format_utc_iso_date(revision['committer_date'])
    revision_data['date'] = format_utc_iso_date(revision['date'])
    if snapshot_context:
        revision_data['snapshot id'] = snapshot_id
        revision_data['directory'] = \
            gen_snapshot_directory_link(snapshot_context, sha1_git,
                                        link_text='Browse',
                                        link_attrs={'class': 'btn btn-default btn-sm', # noqa
                                                    'role': 'button'})
    else:
        revision_data['directory'] = \
            gen_directory_link(revision['directory'], link_text='Browse',
                               link_attrs={'class': 'btn btn-default btn-sm',
                                           'role': 'button'})
    revision_data['id'] = sha1_git
    revision_data['merge'] = revision['merge']
    revision_data['metadata'] = escape(
        json.dumps(revision['metadata'],
                   sort_keys=True,
                   indent=4,
                   separators=(',', ': ')))

    if origin_info:
        revision_data['context-independent revision'] = \
            gen_revision_link(sha1_git, link_text='Browse',
                              link_attrs={'class': 'btn btn-default btn-sm',
                                          'role': 'button'})
        revision_data['origin id'] = origin_info['id']
        revision_data['origin type'] = origin_info['type']
        revision_data['origin url'] = gen_link(origin_info['url'],
                                               origin_info['url'])
        browse_snapshot_link = \
            gen_snapshot_link(snapshot_id, link_text='Browse',
                              link_attrs={'class': 'btn btn-default btn-sm',
                                          'role': 'button'})
        revision_data['snapshot'] = browse_snapshot_link

    parents = ''
    for p in revision['parents']:
        parent_link = gen_revision_link(p, snapshot_context=snapshot_context)
        parents += parent_link + '<br/>'

    revision_data['parents'] = mark_safe(parents)
    revision_data['synthetic'] = revision['synthetic']
    revision_data['type'] = revision['type']

    message_lines = ['None']
    if revision['message']:
        message_lines = revision['message'].split('\n')

    parents = []
    for p in revision['parents']:
        parent_url = gen_revision_url(p, snapshot_context)
        parents.append({'id': p, 'url': parent_url})

    path_info = gen_path_info(path)

    query_params = {
        'snapshot_id': snapshot_id,
        'origin_type': origin_type,
        'origin': origin_url,
        'timestamp': timestamp,
        'visit_id': visit_id
    }

    breadcrumbs = []
    breadcrumbs.append({
        'name':
        revision['directory'][:7],
        'url':
        reverse('browse-revision',
                url_args={'sha1_git': sha1_git},
                query_params=query_params)
    })
    for pi in path_info:
        query_params['path'] = pi['path']
        breadcrumbs.append({
            'name':
            pi['name'],
            'url':
            reverse('browse-revision',
                    url_args={'sha1_git': sha1_git},
                    query_params=query_params)
        })

    vault_cooking = {
        'directory_context': False,
        'directory_id': None,
        'revision_context': True,
        'revision_id': sha1_git
    }

    swh_objects = [{'type': 'revision', 'id': sha1_git}]

    content = None
    content_size = None
    mimetype = None
    language = None
    readme_name = None
    readme_url = None
    readme_html = None
    readmes = {}
    error_code = 200
    error_message = ''
    error_description = ''

    if content_data:
        breadcrumbs[-1]['url'] = None
        content_size = content_data['length']
        mimetype = content_data['mimetype']
        if content_data['raw_data']:
            content_display_data = prepare_content_for_display(
                content_data['raw_data'], content_data['mimetype'], path)
            content = content_display_data['content_data']
            language = content_display_data['language']
            mimetype = content_display_data['mimetype']
        query_params = {}
        if path:
            filename = path_info[-1]['name']
            query_params['filename'] = path_info[-1]['name']
            revision_data['filename'] = filename

        top_right_link = {
            'url':
            reverse('browse-content-raw',
                    url_args={'query_string': query_string},
                    query_params=query_params),
            'icon':
            swh_object_icons['content'],
            'text':
            'Raw File'
        }

        swh_objects.append({'type': 'content', 'id': file_info['target']})

        error_code = content_data['error_code']
        error_message = content_data['error_message']
        error_description = content_data['error_description']
    else:
        for d in dirs:
            if d['type'] == 'rev':
                d['url'] = reverse('browse-revision',
                                   url_args={'sha1_git': d['target']})
            else:
                query_params['path'] = path + d['name']
                d['url'] = reverse('browse-revision',
                                   url_args={'sha1_git': sha1_git},
                                   query_params=query_params)
        for f in files:
            query_params['path'] = path + f['name']
            f['url'] = reverse('browse-revision',
                               url_args={'sha1_git': sha1_git},
                               query_params=query_params)
            if f['length'] is not None:
                f['length'] = filesizeformat(f['length'])
            if f['name'].lower().startswith('readme'):
                readmes[f['name']] = f['checksums']['sha1']

        readme_name, readme_url, readme_html = get_readme_to_display(readmes)

        top_right_link = {
            'url': get_revision_log_url(sha1_git, snapshot_context),
            'icon': swh_object_icons['revisions history'],
            'text': 'History'
        }

        vault_cooking['directory_context'] = True
        vault_cooking['directory_id'] = dir_id

        swh_objects.append({'type': 'directory', 'id': dir_id})

    diff_revision_url = reverse('diff-revision',
                                url_args={'sha1_git': sha1_git},
                                query_params={
                                    'origin_type': origin_type,
                                    'origin': origin_url,
                                    'timestamp': timestamp,
                                    'visit_id': visit_id
                                })

    if snapshot_id:
        swh_objects.append({'type': 'snapshot', 'id': snapshot_id})

    swh_ids = get_swh_persistent_ids(swh_objects, snapshot_context)

    heading = 'Revision - %s - %s' %\
        (sha1_git[:7], textwrap.shorten(message_lines[0], width=70))
    if snapshot_context:
        context_found = 'snapshot: %s' % snapshot_context['snapshot_id']
        if origin_info:
            context_found = 'origin: %s' % origin_info['url']
        heading += ' - %s' % context_found

    return render(request,
                  'browse/revision.html', {
                      'heading': heading,
                      'swh_object_id': swh_ids[0]['swh_id'],
                      'swh_object_name': 'Revision',
                      'swh_object_metadata': revision_data,
                      'message_header': message_lines[0],
                      'message_body': '\n'.join(message_lines[1:]),
                      'parents': parents,
                      'snapshot_context': snapshot_context,
                      'dirs': dirs,
                      'files': files,
                      'content': content,
                      'content_size': content_size,
                      'max_content_size': content_display_max_size,
                      'mimetype': mimetype,
                      'language': language,
                      'readme_name': readme_name,
                      'readme_url': readme_url,
                      'readme_html': readme_html,
                      'breadcrumbs': breadcrumbs,
                      'top_right_link': top_right_link,
                      'vault_cooking': vault_cooking,
                      'diff_revision_url': diff_revision_url,
                      'show_actions_menu': True,
                      'swh_ids': swh_ids,
                      'error_code': error_code,
                      'error_message': error_message,
                      'error_description': error_description
                  },
                  status=error_code)