def test_contract(self, tmpdir, test_data, original_data): expanded = JsonExpandOMatic(path=tmpdir).expand(test_data, root_element="root", preserve=False) assert expanded == {"root": {"$ref": f"{tmpdir.basename}/root.json"}} # We can use JsonExpandOMatic() to load the expanded data from the filesystem. # Note that this returns the original data exactly, the `root` wrapper is removed. contracted = JsonExpandOMatic(path=tmpdir).contract( root_element="root") assert contracted == original_data # Or we can use jsonref.load() to do the same. with open(f"{tmpdir}/root.json") as f: assert jsonref.load(f, base_uri=f"file://{tmpdir}/") == original_data
def test_expand_preserve(self, tmpdir, test_data, original_data): expanded = JsonExpandOMatic(path=tmpdir).expand(test_data, root_element="root", preserve=True) # preserve=True prevents mangling of test_data by expand() assert test_data == original_data # expand() returns a new representation of `data` assert expanded == {"root": {"$ref": f"{tmpdir.basename}/root.json"}}
def test_nested1_equivalency(self, tmpdir, test_data, original_data): """ In a nested leaf-node expression the dict key is treated as it would be in the non-nested case. The nested functionality takes the file written by that expression and feeds it back through JsonExpandOMatic with the dict's value as the new leaf_nodes parameter value. You can represent any of the nested expressions as non-tested but, IMO, nested expressions can be easier to follow in some cases. """ import glob JsonExpandOMatic(path=f"{tmpdir}/n").expand( test_data, root_element="root", preserve=False, leaf_nodes=[{ "/root/actors/.*": ["/[^/]+/movies/.*", "/[^/]+/filmography"] }], ) nested_files = [ x.replace(f"{tmpdir}/n", "") for x in glob.glob(f"{tmpdir}/n", recursive=True) ] JsonExpandOMatic(path=f"{tmpdir}/f").expand( test_data, root_element="root", preserve=False, leaf_nodes=[ "/root/actors/.*/movies/.*", "/root/actors/.*/filmography" ], ) flattened_files = [ x.replace(f"{tmpdir}/f", "") for x in glob.glob(f"{tmpdir}/f", recursive=True) ] assert nested_files == flattened_files
def test_expand_mangle(self, tmpdir, test_data, original_data): expanded = JsonExpandOMatic(path=tmpdir).expand(test_data, root_element="root", preserve=False) # preserve=True allows mangling of test_data by expand() assert test_data != original_data # test_data is the content of "{tmpdir.basename}/root.json" assert test_data == {"actors": {"$ref": "root/actors.json"}} assert test_data == json.loads(tmpdir.join("root.json").read()) # expand() returns a new representation of `data` assert expanded == {"root": {"$ref": f"{tmpdir.basename}/root.json"}}
def _charlie_test(self, tmpdir, test_data, original_data, regex): expanded = JsonExpandOMatic(path=tmpdir).expand(test_data, root_element="root", preserve=False, leaf_nodes=[regex]) assert expanded == {"root": {"$ref": f"{tmpdir.basename}/root.json"}} self._assert_root(tmpdir) self._assert_actors(tmpdir) # No recursion for Charlie Chaplin assert not os.path.exists(f"{tmpdir}/root/actors/charlie_chaplin") # Typical recursion for Dwayne Johnson assert os.path.exists(f"{tmpdir}/root/actors/dwayne_johnson") assert os.path.exists(f"{tmpdir}/root/actors/dwayne_johnson/movies")
def test_nested1(self, tmpdir, test_data, original_data): """Test a simple leaf_nodes scenario.""" expanded = JsonExpandOMatic(path=tmpdir).expand( test_data, root_element="root", preserve=False, leaf_nodes=[{ "/root/actors/.*": ["/[^/]+/movies/.*", "/[^/]+/filmography"] }], ) assert expanded == {"root": {"$ref": f"{tmpdir.basename}/root.json"}} # This is the same thing you would expect in the non-nested case. self._assert_root(tmpdir) self._assert_actors(tmpdir) # Unlike the non-nested case with regex "/root/actors/.*", the nested case # will have a directory per actor. # See the discussion in test_nested1_equivalency on why this is. self._assert_actor_dirs(tmpdir) # The nested "/[^/]+/movies/.*" gives us a file-per-movie self._assert_movies(tmpdir) assert os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/movies/modern_times.json") assert os.path.exists( f"{tmpdir}/root/actors/dwayne_johnson/movies/0.json") # It is also worth noting that other dicts not explicitly mentiond in the list # of nested expressions are given no special treatment. assert os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/spouses.json") assert os.path.exists(f"{tmpdir}/root/actors/charlie_chaplin/spouses") assert os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/spouses/oona_oneill.json") assert os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/spouses/oona_oneill") assert os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/spouses/oona_oneill/children.json" ) assert os.path.exists( f"{tmpdir}/root/actors/dwayne_johnson/hobbies.json") assert not os.path.exists( f"{tmpdir}/root/actors/dwayne_johnson/hobbies")
def test_nested2(self, tmpdir, test_data, original_data): """Test a targeted leaf_node exmple. The expressions listed in the dict value are relative to the element matched by the dict key expression. Our previous examlpes used a regex to ignore that but we can do interesting things with it if we want. In this example we will collapse all of Dwayne Johnson's movies and Charlie Chaplin's spouses. """ expanded = JsonExpandOMatic(path=tmpdir).expand( test_data, root_element="root", preserve=False, leaf_nodes=[{ "/root/actors/.*": ["/dwayne_johnson/movies", "/charlie_chaplin/spouses"] }], ) assert expanded == {"root": {"$ref": f"{tmpdir.basename}/root.json"}} # This is the same thing you would expect in the non-nested case. self._assert_root(tmpdir) self._assert_actors(tmpdir) # Unlike the non-nested case with regex "/root/actors/.*", the nested case # will have a directory per actor. # See the discussion in test_nested1_equivalency on why this is. self._assert_actor_dirs(tmpdir) assert os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/movies.json") assert os.path.exists(f"{tmpdir}/root/actors/charlie_chaplin/movies") assert os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/spouses.json") assert not os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/spouses") assert os.path.exists( f"{tmpdir}/root/actors/dwayne_johnson/movies.json") assert not os.path.exists( f"{tmpdir}/root/actors/dwayne_johnson/movies")
def test_file_exixtence(self, tmpdir, test_data, original_data): expanded = JsonExpandOMatic(path=tmpdir).expand(test_data, root_element="root") assert expanded == {"root": {"$ref": f"{tmpdir.basename}/root.json"}} # This is the wrapper around the original data assert os.path.exists(f"{tmpdir}/root.json") assert os.path.exists(f"{tmpdir}/root") # Now we look at the original data's files assert os.path.exists(f"{tmpdir}/root/actors.json") assert os.path.exists(f"{tmpdir}/root/actors") # A file and directory for each actor assert os.path.exists(f"{tmpdir}/root/actors/charlie_chaplin.json") assert os.path.exists(f"{tmpdir}/root/actors/charlie_chaplin") assert os.path.exists(f"{tmpdir}/root/actors/dwayne_johnson.json") assert os.path.exists(f"{tmpdir}/root/actors/dwayne_johnson") # A file and directory for each actor's movies assert os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/movies.json") assert os.path.exists(f"{tmpdir}/root/actors/charlie_chaplin/movies") assert os.path.exists( f"{tmpdir}/root/actors/dwayne_johnson/movies.json") assert os.path.exists(f"{tmpdir}/root/actors/dwayne_johnson/movies") # A file and directory Charlie Chaplin's filmography. assert os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/filmography.json") assert os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/filmography") # I didn't define filmography test data for Dwayne Johnson. assert not os.path.exists( f"{tmpdir}/root/actors/dwayne_johnson/filmography.json") assert not os.path.exists( f"{tmpdir}/root/actors/dwayne_johnson/filmography") # But I did define an empty hobbies directory for Dwayne Johnson so we will have # a file but not a directory (since there was nothing to recurse into). assert os.path.exists( f"{tmpdir}/root/actors/dwayne_johnson/hobbies.json") assert not os.path.exists( f"{tmpdir}/root/actors/dwayne_johnson/hobbies")
def test_jsonref(self, tmpdir, test_data, original_data): expanded = JsonExpandOMatic(path=tmpdir).expand(test_data, root_element="root", preserve=False) # We can use jsonref to load this new representation. # Note that loading in this way exposes the wrapping element `root`. # `tmpdir` must be a fully qualified path. loaded = jsonref.loads(json.dumps(expanded), base_uri=f"file://{tmpdir.dirname}/") assert loaded == {"root": original_data} assert loaded["root"] == original_data # A raw load of the wrapping document has references to the sub-elements. # This assersion assumes that the original data's elements are all dicts. with open(f"{tmpdir}/root.json") as f: assert json.load(f) == { k: { "$ref": f"root/{k}.json" } for k, v in original_data.items() }
def _actors_test(self, tmpdir, test_data, original_data, regex): expanded = JsonExpandOMatic(path=tmpdir).expand(test_data, root_element="root", preserve=False, leaf_nodes=[regex]) # preserve=True allows mangling of test_data by expand() assert test_data != original_data # expand() returns a new representation of `data` assert expanded == {"root": {"$ref": f"{tmpdir.basename}/root.json"}} def _not(x): return not x # We expect to have the root and actors elements fully represented. # Our leaf-node regex (/root/actors/.*) tells expand to create a # per-actor file but not the per-actor directory or anything below that. self._assert_root(tmpdir) self._assert_actors(tmpdir) self._assert_actor_dirs(tmpdir, f=_not) self._assert_movies(tmpdir, f=_not)
def xtest_enhanced_nested1(self, tmpdir, test_data, original_data): """Enhanced nested #1... But what if we want a single json file per actor to include everything about that actor _except_ movies and a separate movies.json for each actor with all of that actor's movie data? You might initially have thought that we would do: leaf_nodes=[{"/root/actors/.*": ["/[^/]+/movies/.*"]}] But we have already established that is equivalent to: leaf_nodes=["/root/actors/.*/movies/.*"] We will stop recursion at each movie but everything else will be done as normal (i.e. - file per dict/list). Or maybe you would consider: leaf_nodes=["/root/actors/.*", "/root/actors/.*/movies/.*"] or: leaf_nodes=["/root/actors/.*/movies/.*", "/root/actors/.*"] But that won't work because "/root/actors/.*" will stop recursion before paths matching "/root/actors/.*/movies/.*" are seen. Remember: All regexes are checked for each path & the first one matching stops recursion. This is what we will do: [ { "/root/actors/.*": [ "/[^/]+/movies/.*", "<A:/.*" ] } ] The key of the nested expression ("/root/actors/.*") tells expand start a new JsonExpandOMatic recursion and save the resulting "mangled" data as {actor}.json when that recursion completes. That's normal nested behavior and during normal nested behavior of "/[^/]+/movies/.*" expand would create {movie}.json but expand any other dict/list found for the actor. The '<A:' prefix, however, alters the behavior for those paths that are matched by the expression "/.*". This expression will be applied after (A) recursion and the result included (<) in their parent. """ JsonExpandOMatic(path=tmpdir).expand( test_data, root_element="root", preserve=False, leaf_nodes=[{ "/root/actors/.*": ["/[^/]+/movies/.*", "<A:/.*"] }], ) # This is the same thing you would expect in the non-nested case. self._assert_root(tmpdir) self._assert_actors(tmpdir) # Unlike the non-nested case with regex "/root/actors/.*", the nested case # will have a directory per actor. # See the discussion in test_nested1_equivalency on why this is. self._assert_actor_dirs(tmpdir) # The nested "/[^/]+/movies/.*" gives us a file-per-movie self._assert_movies(tmpdir) assert os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/movies/modern_times.json") assert os.path.exists( f"{tmpdir}/root/actors/dwayne_johnson/movies/0.json") # TODO: Explain these assertions assert not os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/spouses.json") assert not os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/spouses") assert not os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/spouses/lita_grey.json") assert not os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/spouses/lita_grey") assert not os.path.exists( f"{tmpdir}/root/actors/charlie_chaplin/spouses/lita_grey/children.json" ) assert not os.path.exists( f"{tmpdir}/root/actors/dwayne_johnson/hobbies.json") assert not os.path.exists( f"{tmpdir}/root/actors/dwayne_johnson/hobbies") with open(f"{tmpdir}/root/actors/charlie_chaplin.json") as f: data = json.load(f) assert data.get("spouses", None) assert data.get["spouses"].get("lita_grey", None) assert data.get["spouses"]["lita_grey"].get("children", None)
def test_expecations(self): """Just one big test""" # Save some data that contains a $ref data = JsonExpandOMatic(path="funk").expand( data=json.loads(TestJsonRefKeeper._raw_data1)) assert data == {"root": {"$ref": "funk/root.json"}} # Load the previously expanded data data = JsonExpandOMatic(path="funk").contract(root_element="root") assert data == { "foo": { "bar": { "baz": 1234 } }, "stuff": { "$ref": "#/foo/bar" } } data["foo"]["bar"]["baz"] = -1 # Make a change assert data == { "foo": { "bar": { "baz": -1 } }, "stuff": { "$ref": "#/foo/bar" } } # Use jsonrefkeeper to resolve the $ref entry in data['stuff'] data = jsonrefkeeper.parse(data) # Both jsonref and jsonrefkeeper resolve the $ref such that # data['stuff'] and data['foo']['bar'] point to the same object: {'baz': -1} # Changing either data['stuff']['baz'] or data['foo']['bar']['baz'] # will have the same result. assert data == {"foo": {"bar": {"baz": -1}}, "stuff": {"baz": -1}} data["foo"]["bar"]["baz"] = -2 assert data == {"foo": {"bar": {"baz": -2}}, "stuff": {"baz": -2}} # json.dumps(data, indent=None) # TypeError: Object of type 'dict' is not JSON serializable assert (json.dumps(data, indent=2) == "" '{\n "foo": {\n "bar": {\n "baz": -2\n }\n },\n' ' "stuff": {\n "baz": -2\n }\n}') # Using jsonref to dump the data with no indent will preserve the original $ref. assert jsonref.dumps( data, indent=None ) == '{"foo": {"bar": {"baz": -2}}, "stuff": {"$ref": "#/foo/bar"}}' # However providing an indent will cause the underlying proxy object to be resolved and we lose the $ref. assert (jsonref.dumps(data, indent=2) == "" '{\n "foo": {\n "bar": {\n "baz": -2\n }\n },\n' ' "stuff": {\n "baz": -2\n }\n}') # This is equivalent to jsonref.dumps(data, indent=None) & probably a little slower for large data structures. assert ( jsonrefkeeper.dumps(data, indent=None) == "" '{"foo": {"bar": {"baz": -2}}, "stuff": {"$ref": "#/foo/bar"}}') # This is what jsonrefkeeper was created for. We don't have to choose between # keeping the refs and having nicely indented output. assert (jsonrefkeeper.dumps(data, indent=2) == "" '{\n "foo": {\n "bar": {\n "baz": -2\n }\n },\n' ' "stuff": {\n "$ref": "#/foo/bar"\n }\n}')