def test_from_swh_edge_cases_convert_invalid_utf8_bytes(): some_input = { "a": "something", "b": "someone", "c": b"a name \xff", "d": b"an email \xff", } expected_output = { "a": "something", "b": "someone", "c": "a name \\xff", "d": "an email \\xff", "decoding_failures": ["c", "d"], } actual_output = converters.from_swh(some_input, hashess={"a", "b"}, bytess={"c", "d"}) for v in ["a", "b", "c", "d"]: assert expected_output[v] == actual_output[v] assert len(expected_output["decoding_failures"]) == len( actual_output["decoding_failures"]) for v in expected_output["decoding_failures"]: assert v in actual_output["decoding_failures"]
def lookup_content_license(q): """Return license information from a specified content. Args: q: query string of the form <hash_algo:hash> Yields: license information (dict) list if the content is found. """ sha1 = _lookup_content_sha1(q) if not sha1: return None licenses = list(idx_storage.content_fossology_license_get([sha1])) if not licenses: return None license_dicts = [license.to_dict() for license in licenses] for license_dict in license_dicts: del license_dict["id"] lic = { "id": sha1, "facts": license_dicts, } return converters.from_swh(lic, hashess={"id"})
def content_get_license(cls, cnt_id): cnt_id_bytes = hash_to_bytes(cnt_id) lic = next(cls.idx_storage.content_fossology_license_get( [cnt_id_bytes])) return converters.from_swh({'id': cnt_id_bytes, 'facts': lic[cnt_id_bytes]}, hashess={'id'})
def test_from_swh_edge_cases_convert_invalid_utf8_bytes(self): some_input = { 'a': 'something', 'b': 'someone', 'c': b'a name \xff', 'd': b'an email \xff', } expected_output = { 'a': 'something', 'b': 'someone', 'c': 'a name \\xff', 'd': 'an email \\xff', 'decoding_failures': ['c', 'd'] } actual_output = converters.from_swh(some_input, hashess={'a', 'b'}, bytess={'c', 'd'}) for v in ['a', 'b', 'c', 'd']: self.assertEqual(expected_output[v], actual_output[v]) self.assertEqual(len(expected_output['decoding_failures']), len(actual_output['decoding_failures'])) for v in expected_output['decoding_failures']: self.assertTrue(v in actual_output['decoding_failures'])
def content_get(self, cnt_id: str) -> Dict[str, Any]: cnt_id_bytes = hash_to_bytes(cnt_id) content = self.storage.content_get([cnt_id_bytes])[0] if content: content_d = content.to_dict() content_d.pop("ctime", None) else: content_d = None return converters.from_swh( content_d, hashess={"sha1", "sha1_git", "sha256", "blake2s256"})
def lookup_snapshot_alias(snapshot_id: str, alias_name: str) -> Optional[Dict[str, Any]]: """Try to resolve a branch alias in a snapshot. Args: snapshot_id: hexadecimal representation of a snapshot id alias_name: name of the branch alias to resolve Returns: Target branch information or None if the alias does not exist or target a dangling branch. """ resolved_alias = snapshot_resolve_alias(storage, _to_sha1_bin(snapshot_id), alias_name.encode()) return (converters.from_swh(resolved_alias.to_dict(), hashess={"target"}) if resolved_alias is not None else None)
def lookup_content_language(q): """Return language information from a specified content. Args: q: query string of the form <hash_algo:hash> Yields: language information (dict) list if the content is found. """ sha1 = _lookup_content_sha1(q) if not sha1: return None lang = _first_element(list(idx_storage.content_language_get([sha1]))) if not lang: return None return converters.from_swh(lang, hashess={'id'})
def lookup_content_license(q): """Return license information from a specified content. Args: q: query string of the form <hash_algo:hash> Yields: license information (dict) list if the content is found. """ sha1 = _lookup_content_sha1(q) if not sha1: return None lic = _first_element(idx_storage.content_fossology_license_get([sha1])) if not lic: return None return converters.from_swh({'id': sha1, 'facts': lic[sha1]}, hashess={'id'})
def lookup_content_ctags(q): """Return ctags information from a specified content. Args: q: query string of the form <hash_algo:hash> Yields: ctags information (dict) list if the content is found. """ sha1 = _lookup_content_sha1(q) if not sha1: return None ctags = list(idx_storage.content_ctags_get([sha1])) if not ctags: return None for ctag in ctags: yield converters.from_swh(ctag, hashess={"id"})
def test_from_swh_edge_cases_do_no_conversion_if_none_or_not_bytes(): some_input = { "a": "something", "b": None, "c": "someone", "d": None, "e": None } expected_output = { "a": "something", "b": None, "c": "someone", "d": None, "e": None, } actual_output = converters.from_swh(some_input, hashess={"a", "b"}, bytess={"c", "d"}, dates={"e"}) assert expected_output == actual_output
def test_from_swh_edge_cases_do_no_conversion_if_none_or_not_bytes(self): some_input = { 'a': 'something', 'b': None, 'c': 'someone', 'd': None, 'e': None } expected_output = { 'a': 'something', 'b': None, 'c': 'someone', 'd': None, 'e': None } actual_output = converters.from_swh(some_input, hashess={'a', 'b'}, bytess={'c', 'd'}, dates={'e'}) self.assertEqual(expected_output, actual_output)
def lookup_expression(expression, last_sha1, per_page): """Lookup expression in raw content. Args: expression (str): An expression to lookup through raw indexed content last_sha1 (str): Last sha1 seen per_page (int): Number of results per page Yields: ctags whose content match the expression """ limit = min(per_page, MAX_LIMIT) ctags = idx_storage.content_ctags_search(expression, last_sha1=last_sha1, limit=limit) for ctag in ctags: ctag = converters.from_swh(ctag, hashess={"id"}) ctag["sha1"] = ctag["id"] ctag.pop("id") yield ctag
def test_from_swh(): some_input = { "a": "something", "b": "someone", "c": b"sharp-0.3.4.tgz", "d": hashutil.hash_to_bytes("b04caf10e9535160d90e874b45aa426de762f19f"), "e": b"sharp.html/doc_002dS_005fISREG.html", "g": [b"utf-8-to-decode", b"another-one"], "h": "something filtered", "i": { "e": b"something" }, "j": { "k": { "l": [b"bytes thing", b"another thingy", b""], "n": "don't care either", }, "m": "don't care", }, "o": "something", "p": b"foo", "q": { "extra-headers": [["a", b"intact"]] }, "w": None, "r": { "p": "also intact", "q": "bar" }, "s": { "timestamp": 42, "offset": -420, "negative_utc": None, }, "s1": { "timestamp": { "seconds": 42, "microseconds": 0 }, "offset": -420, "negative_utc": None, }, "s2": datetime.datetime(2013, 7, 1, 20, 0, 0, tzinfo=datetime.timezone.utc), "t": None, "u": None, "v": None, "x": None, } expected_output = { "a": "something", "b": "someone", "c": "sharp-0.3.4.tgz", "d": "b04caf10e9535160d90e874b45aa426de762f19f", "e": "sharp.html/doc_002dS_005fISREG.html", "g": ["utf-8-to-decode", "another-one"], "i": { "e": "something" }, "j": { "k": { "l": ["bytes thing", "another thingy", ""] } }, "p": "foo", "q": { "extra-headers": [["a", "intact"]] }, "w": {}, "r": { "p": "also intact", "q": "bar" }, "s": "1969-12-31T17:00:42-07:00", "s1": "1969-12-31T17:00:42-07:00", "s2": "2013-07-01T20:00:00+00:00", "u": {}, "v": [], "x": None, } actual_output = converters.from_swh( some_input, hashess={"d", "o", "x"}, bytess={"c", "e", "g", "l"}, dates={"s", "s1", "s2"}, blacklist={"h", "m", "n", "o"}, removables_if_empty={"t"}, empty_dict={"u"}, empty_list={"v"}, convert={"p", "q", "w"}, convert_fn=converters.convert_revision_metadata, ) assert expected_output == actual_output
def content_get_ctags(cls, cnt_id): cnt_id_bytes = hash_to_bytes(cnt_id) ctags = cls.idx_storage.content_ctags_get([cnt_id_bytes]) for ctag in ctags: yield converters.from_swh(ctag, hashess={'id'})
def test_from_swh_none(): assert converters.from_swh(None) is None
def test_from_swh_empty(): assert {} == converters.from_swh({})
def test_from_swh(self): some_input = { 'a': 'something', 'b': 'someone', 'c': b'sharp-0.3.4.tgz', 'd': hashutil.hash_to_bytes('b04caf10e9535160d90e874b45aa426de762f19f'), 'e': b'sharp.html/doc_002dS_005fISREG.html', 'g': [b'utf-8-to-decode', b'another-one'], 'h': 'something filtered', 'i': { 'e': b'something' }, 'j': { 'k': { 'l': [b'bytes thing', b'another thingy', b''], 'n': 'dont care either' }, 'm': 'dont care' }, 'o': 'something', 'p': b'foo', 'q': { 'extra-headers': [['a', b'intact']] }, 'w': None, 'r': { 'p': 'also intact', 'q': 'bar' }, 's': { 'timestamp': 42, 'offset': -420, 'negative_utc': None, }, 's1': { 'timestamp': { 'seconds': 42, 'microseconds': 0 }, 'offset': -420, 'negative_utc': None, }, 's2': datetime.datetime(2013, 7, 1, 20, 0, 0, tzinfo=datetime.timezone.utc), 't': None, 'u': None, 'v': None, 'x': None, } expected_output = { 'a': 'something', 'b': 'someone', 'c': 'sharp-0.3.4.tgz', 'd': 'b04caf10e9535160d90e874b45aa426de762f19f', 'e': 'sharp.html/doc_002dS_005fISREG.html', 'g': ['utf-8-to-decode', 'another-one'], 'i': { 'e': 'something' }, 'j': { 'k': { 'l': ['bytes thing', 'another thingy', ''] } }, 'p': 'foo', 'q': { 'extra-headers': [['a', 'intact']] }, 'w': {}, 'r': { 'p': 'also intact', 'q': 'bar' }, 's': '1969-12-31T17:00:42-07:00', 's1': '1969-12-31T17:00:42-07:00', 's2': '2013-07-01T20:00:00+00:00', 'u': {}, 'v': [], 'x': None, } actual_output = converters.from_swh( some_input, hashess={'d', 'o', 'x'}, bytess={'c', 'e', 'g', 'l'}, dates={'s', 's1', 's2'}, blacklist={'h', 'm', 'n', 'o'}, removables_if_empty={'t'}, empty_dict={'u'}, empty_list={'v'}, convert={'p', 'q', 'w'}, convert_fn=converters.convert_revision_metadata) self.assertEqual(expected_output, actual_output)
def test_from_swh_none(self): # when self.assertIsNone(converters.from_swh(None))
def test_from_swh_empty(self): # when self.assertEqual({}, converters.from_swh({}))
def content_get_license(self, cnt_id): cnt_id_bytes = hash_to_bytes(cnt_id) licenses = self.idx_storage.content_fossology_license_get( [cnt_id_bytes]) for license in licenses: yield converters.from_swh(license.to_dict(), hashess={"id"})
def content_get_language(cls, cnt_id): lang = next(cls.idx_storage.content_language_get( [hash_to_bytes(cnt_id)])) return converters.from_swh(lang, hashess={'id'})
def content_get_ctags(self, cnt_id): cnt_id_bytes = hash_to_bytes(cnt_id) ctags = self.idx_storage.content_ctags_get([cnt_id_bytes]) for ctag in ctags: yield converters.from_swh(ctag, hashess={"id"})
def content_get_metadata(cls, cnt_id): cnt_id_bytes = hash_to_bytes(cnt_id) metadata = next(cls.storage.content_get_metadata([cnt_id_bytes])) return converters.from_swh(metadata, hashess={'sha1', 'sha1_git', 'sha256', 'blake2s256'})