def test_content_for_storage_data(tmpdir):
    # given
    data = b"temp file for testing content storage conversion"
    obj = from_disk.Content.from_bytes(data=data, mode=0o100644).get_data()
    del obj["perms"]

    expected_content = obj.copy()
    expected_content["status"] = "visible"
    expected_content = Content.from_dict(expected_content)

    # when
    content = converters.content_for_storage(obj)

    # then
    assert content == expected_content
def test_content_for_storage_path(tmpdir):
    # given
    data = b"temp file for testing content storage conversion"
    tmpfile = tmpfile_with_content(tmpdir, data)

    obj = from_disk.Content.from_file(path=os.fsdecode(tmpfile)).get_data()

    expected_content = obj.copy()
    expected_content["data"] = data
    expected_content["status"] = "visible"
    del expected_content["path"]
    del expected_content["perms"]
    expected_content = Content.from_dict(expected_content)

    # when
    content = converters.content_for_storage(obj)

    # then
    assert content == expected_content
Esempio n. 3
0
def content_for_storage(
    content: Dict,
    max_content_size: Optional[int] = None,
    origin_url: Optional[str] = None,
) -> BaseContent:
    """Prepare content to be ready for storage

    Note:
    - 'data' is returned only if max_content_size is not reached.

    Returns:
        content with added data (or reason for being missing)

    """
    ret = content.copy()
    ret.pop("perms", None)

    if max_content_size and ret["length"] > max_content_size:
        logger.info("Skipping content %s, too large (%s > %s)" % (hash_to_hex(
            content["sha1_git"]), ret["length"], max_content_size))
        ret.pop("data", None)
        ret.update({
            "status": "absent",
            "reason": "Content too large",
            "origin": origin_url
        })
        return SkippedContent.from_dict(ret)

    if "data" not in ret:
        with open(ret["path"], "rb") as f:
            ret["data"] = f.read()

    # Extra keys added by swh.model.from_disk, that are not accepted
    # by swh-storage
    ret.pop("path", None)

    ret["status"] = "visible"

    return Content.from_dict(ret)
Esempio n. 4
0
def _init_content_tests_data(data_path, data_dict, ext_key):
    """
    Helper function to read the content of a directory, store it
    into a test archive and add some files metadata (sha1 and/or
    expected programming language) in a dict.

    Args:
        data_path (str): path to a directory relative to the tests
            folder of swh-web
        data_dict (dict): the dict that will store files metadata
        ext_key (bool): whether to use file extensions or filenames
            as dict keys
    """
    test_contents_dir = os.path.join(os.path.dirname(__file__),
                                     data_path).encode("utf-8")
    directory = from_disk.Directory.from_disk(path=test_contents_dir)

    contents = []
    for name, obj_ in directory.items():
        obj = obj_.to_model()
        if obj.object_type in [
                Content.object_type, DiskBackedContent.object_type
        ]:
            c = obj.with_data().to_dict()
            c["status"] = "visible"
            sha1 = hash_to_hex(c["sha1"])
            if ext_key:
                key = name.decode("utf-8").split(".")[-1]
                filename = "test." + key
            else:
                filename = name.decode("utf-8").split("/")[-1]
                key = filename
            language = get_hljs_language_from_filename(filename)
            data_dict[key] = {"sha1": sha1, "language": language}
            contents.append(Content.from_dict(c))
    storage = get_tests_data()["storage"]
    storage.content_add(contents)