def test_content_for_storage_data(tmpdir): # given data = b"temp file for testing content storage conversion" obj = from_disk.Content.from_bytes(data=data, mode=0o100644).get_data() del obj["perms"] expected_content = obj.copy() expected_content["status"] = "visible" expected_content = Content.from_dict(expected_content) # when content = converters.content_for_storage(obj) # then assert content == expected_content
def test_content_for_storage_path(tmpdir): # given data = b"temp file for testing content storage conversion" tmpfile = tmpfile_with_content(tmpdir, data) obj = from_disk.Content.from_file(path=os.fsdecode(tmpfile)).get_data() expected_content = obj.copy() expected_content["data"] = data expected_content["status"] = "visible" del expected_content["path"] del expected_content["perms"] expected_content = Content.from_dict(expected_content) # when content = converters.content_for_storage(obj) # then assert content == expected_content
def content_for_storage( content: Dict, max_content_size: Optional[int] = None, origin_url: Optional[str] = None, ) -> BaseContent: """Prepare content to be ready for storage Note: - 'data' is returned only if max_content_size is not reached. Returns: content with added data (or reason for being missing) """ ret = content.copy() ret.pop("perms", None) if max_content_size and ret["length"] > max_content_size: logger.info("Skipping content %s, too large (%s > %s)" % (hash_to_hex( content["sha1_git"]), ret["length"], max_content_size)) ret.pop("data", None) ret.update({ "status": "absent", "reason": "Content too large", "origin": origin_url }) return SkippedContent.from_dict(ret) if "data" not in ret: with open(ret["path"], "rb") as f: ret["data"] = f.read() # Extra keys added by swh.model.from_disk, that are not accepted # by swh-storage ret.pop("path", None) ret["status"] = "visible" return Content.from_dict(ret)
def _init_content_tests_data(data_path, data_dict, ext_key): """ Helper function to read the content of a directory, store it into a test archive and add some files metadata (sha1 and/or expected programming language) in a dict. Args: data_path (str): path to a directory relative to the tests folder of swh-web data_dict (dict): the dict that will store files metadata ext_key (bool): whether to use file extensions or filenames as dict keys """ test_contents_dir = os.path.join(os.path.dirname(__file__), data_path).encode("utf-8") directory = from_disk.Directory.from_disk(path=test_contents_dir) contents = [] for name, obj_ in directory.items(): obj = obj_.to_model() if obj.object_type in [ Content.object_type, DiskBackedContent.object_type ]: c = obj.with_data().to_dict() c["status"] = "visible" sha1 = hash_to_hex(c["sha1"]) if ext_key: key = name.decode("utf-8").split(".")[-1] filename = "test." + key else: filename = name.decode("utf-8").split("/")[-1] key = filename language = get_hljs_language_from_filename(filename) data_dict[key] = {"sha1": sha1, "language": language} contents.append(Content.from_dict(c)) storage = get_tests_data()["storage"] storage.content_add(contents)