def test_04_same_key_different_types_with_list_and_dict(self): # another example with a mix a dict and list (see "p") od1 = {"id": "124", "d": [{"p": 123}, {"p": 456}]} od2 = {"id": "124", "d": [{"p": 123}, {"p": [456, 789]}]} m12 = inspect_docs([od1, od2], mode="type")["type"] m21 = inspect_docs([od2, od1], mode="type")["type"] assert m12 == m21 # "p" is a integer or a list of integer assert m12["d"][list]["p"].keys() == {list, int}
def test_13_merge_with_splitstr(self): # merge_scalar_list when str split involved (?) in list of list doc = {"_id": "1", "f": ["b", ["a 0", "b 1"]]} # merge list of str and splitstr docb = {"_id": "1", "f": ["a 0"]} docg = {"_id": "1", "f": ["a0"]} m = inspect_docs([docb, docg], mode="mapping") assert m["mapping"]["f"] == {"type": "text"} # splitstr > str # same when strings (not list) docb = {"_id": "1", "f": "a 0"} docg = {"_id": "1", "f": "a0"} m = inspect_docs([docb, docg], mode="mapping") assert m["mapping"]["f"] == {"type": "text"} # splitstr > str # same when strings and list of strings doc1 = {"_id": "1", "f": ["a 0"]} doc2 = {"_id": "1", "f": ["a0"]} doc3 = {"_id": "1", "f": "a 0"} doc4 = {"_id": "1", "f": "a0"} m = inspect_docs([doc1, doc2, doc3, doc4], mode="mapping") assert m["mapping"]["f"] == {"type": "text"} # splitstr > str doc1 = {"_id": "1", "f": ["a0"]} doc2 = {"_id": "1", "f": ["a 0"]} m = inspect_docs([doc1, doc2], mode="mapping") assert m["mapping"]["f"] == {"type": "text"} # splitstr > str # splitstr > str whatever the order they appear while inspected (here: splitstr,str,str, in list,list,dict) d1 = { '_id': 'a', 'r': { 'k': [{ 'id': 'one', 'rel': 'is' }, { 'id': 'two', 'rel': 'simil to' }] } } d2 = { '_id': 'b', 'r': { 'k': [{ 'id': 'three', 'rel': 'is' }, { 'id': 'four', 'rel': 'is' }] } } d3 = {'_id': 'c', 'r': {'k': {'id': 'five', 'rel': 'is'}}} m = inspect_docs([d1, d2, d3], mode="mapping") assert "errors" not in m["mapping"]
def test_16_mapping_with_nan_inf(self): # NaN/Inf not allowed (if mode is mapping) n1 = {"_id": "a", "v1": "oula", "v2": math.nan} n2 = {"_id": "b", "v1": "arf", "v2": 13.4} n3 = {"_id": "c", "v1": "mak", "v2": math.nan, "v3": math.inf} m = inspect_docs([n1, n2, n3], mode="mapping") assert "errors" in m["mapping"]
def test_11_stats_with_same_docs(self): d1 = {'go': {'BP': {'term': 'skeletal muscle fiber development', 'qualifier': 'NOT', 'pubmed': 1234, 'id': \ 'GO:0048741', 'evidence': 'IBA'}}, '_id': '101362076'} d2 = {'go': {'BP': [{'term': 'ubiquitin-dependent protein catabolic process', 'pubmed': 5678, 'id': 'GO:0006511', \ 'evidence': 'IEA'}, {'term': 'protein deubiquitination', 'pubmed': [2222, 3333], 'id': 'GO:0016579', 'evidence': \ 'IEA'}]}, '_id': '101241878'} m = inspect_docs([d1, d1, d2, d2], mode="stats")["stats"]
def test_09_mapping_scalar_or_list(self): # ok, "bla" is either a scalar or in a list, test merge md1 = { "_id": "124", 'vals': [{ "oula": "this is great" }, { "bla": "rs24543", "arf": "ENS355432" }] } md2 = { "_id": "5678", 'vals': { "bla": "I am splitable in a scalar", "void": 654 } } # bla is a different type here md3 = {"_id": "5678", 'vals': {"bla": 1234}} m = inspect_docs( [md1, md2], mode="mapping", pre_mapping=True)["mapping"] # "mapping" implies merge=True assert not "bla" in m["vals"] assert m["vals"][list]["bla"] == { splitstr: {} }, m["vals"][list]["bla"] # splittable str from md2 merge to list m = inspect_docs([md1, md3], mode="mapping", pre_mapping=True)["mapping"] assert not "bla" in m["vals"] assert m["vals"][list]["bla"] == { int: {}, str: {} } # keep as both types m = inspect_docs([md1, md2, md3], mode="mapping", pre_mapping=True)["mapping"] assert not "bla" in m["vals"] assert m["vals"][list]["bla"] == { int: {}, splitstr: {} }, m["vals"][list][ "bla"] # splittable kept + merge int to keep both types
def test_01_not_order_specific(self): d1 = { "id": "124", 'lofd': [{ "val": 34.3 }, { "ul": "bla" }], "d": { "start": 134, "end": 5543 } } d2 = { "id": "5", 'lofd': { "oula": "mak", "val": 34 }, "d": { "start": 134, "end": 5543 } } d3 = { "id": "890", 'lofd': [{ "val": 34 }], "d": { "start": 134, "end": 5543 } } # merge either ways in the same m12 = inspect_docs([d1, d2])["type"] m21 = inspect_docs([d2, d1])["type"] #if undordered list, then: assert m21 == m12, "\nm21=%s\n!=\nm12=%s" % (pformat(m21), pformat(m12))
def inspect_data(backend_provider, ids, mode, pre_mapping, **kwargs): col = create_backend(backend_provider).target_collection cur = doc_feeder(col, step=len(ids), inbatch=False, query={'_id': { '$in': ids }}) return btinspect.inspect_docs(cur, mode=mode, pre_mapping=pre_mapping, metadata=False, **kwargs)
def test_08_mapping_with_list_of_list_of_integer(self): # mapping with type of type sd1 = { "_id": "123", "homologene": { "id": "bla", "gene": [[123, 456], [789, 102]] } } m = inspect_docs([sd1], mode="mapping")["mapping"] assert m == { 'homologene': { 'properties': { 'gene': { 'type': 'integer' }, 'id': { 'normalizer': 'keyword_lowercase_normalizer', 'type': 'keyword' } } } }, "mapping %s" % m
def test_05_stats(test): d1 = { "id": "124", 'lofd': [{ "val": 34.3 }, { "ul": "bla" }], "d": { "start": 134, "end": 5543 } } d2 = { "id": "5", 'lofd': { "oula": "mak", "val": 34 }, "d": { "start": 134, "end": 5543 } } # stats m = {} inspect(d1, mapt=m, mode="stats") # some simple check assert set(m["id"].keys()) == {str} assert m["id"][str]["_stats"]["_count"] == 1 assert m["id"][str]["_stats"]["_max"] == 3 assert m["id"][str]["_stats"]["_min"] == 3 assert m["lofd"].keys() == {list} # list's stats assert m["lofd"][list]["_stats"]["_count"] == 1 assert m["lofd"][list]["_stats"]["_max"] == 2 assert m["lofd"][list]["_stats"]["_min"] == 2 # one list's elem stats assert m["lofd"][list]["val"][float]["_stats"]["_count"] == 1 assert m["lofd"][list]["val"][float]["_stats"]["_max"] == 34.3 assert m["lofd"][list]["val"][float]["_stats"]["_min"] == 34.3 # again (should see increment in counts for instance) inspect(d1, mapt=m, mode="stats") assert m["id"][str]["_stats"]["_count"] == 2 assert m["id"][str]["_stats"]["_max"] == 3 assert m["id"][str]["_stats"]["_min"] == 3 assert m["lofd"][list]["_stats"]["_count"] == 2 assert m["lofd"][list]["_stats"]["_max"] == 2 assert m["lofd"][list]["_stats"]["_min"] == 2 assert m["lofd"][list]["val"][float]["_stats"]["_count"] == 2 assert m["lofd"][list]["val"][float]["_stats"]["_max"] == 34.3 assert m["lofd"][list]["val"][float]["_stats"]["_min"] == 34.3 # mix with d2 inspect(d2, mapt=m, mode="stats") assert m["id"][str]["_stats"]["_count"] == 3 assert m["id"][str]["_stats"]["_max"] == 3 assert m["id"][str]["_stats"]["_min"] == 1 # new min assert m["lofd"][list]["_stats"][ "_count"] == 2 # not incremented as in d2 it's not a list assert m["lofd"][list]["_stats"]["_max"] == 2 assert m["lofd"][list]["_stats"]["_min"] == 2 # now float & int assert m["lofd"][list]["val"][float]["_stats"]["_count"] == 2 assert m["lofd"][list]["val"][float]["_stats"]["_max"] == 34.3 assert m["lofd"][list]["val"][float]["_stats"]["_min"] == 34.3 # val{int} wasn't merged assert m["lofd"]["val"][int]["_stats"]["_count"] == 1 assert m["lofd"]["val"][int]["_stats"]["_max"] == 34 assert m["lofd"]["val"][int]["_stats"]["_min"] == 34 # d2 again inspect(d2, mapt=m, mode="stats") assert m["id"][str]["_stats"]["_count"] == 4 assert m["id"][str]["_stats"]["_max"] == 3 assert m["id"][str]["_stats"]["_min"] == 1 assert m["lofd"][list]["_stats"]["_count"] == 2 assert m["lofd"][list]["_stats"]["_max"] == 2 assert m["lofd"][list]["_stats"]["_min"] == 2 assert m["lofd"][list]["val"][float]["_stats"]["_count"] == 2 assert m["lofd"][list]["val"][float]["_stats"]["_max"] == 34.3 assert m["lofd"][list]["val"][float]["_stats"]["_min"] == 34.3 assert m["lofd"]["val"][int]["_stats"]["_count"] == 2 assert m["lofd"]["val"][int]["_stats"]["_max"] == 34 assert m["lofd"]["val"][int]["_stats"]["_min"] == 34 # all counts should be 10 m = inspect_docs([d1] * 10, mode="stats")["stats"] assert m["d"]["end"][int]["_stats"]["_count"] == 10 assert m["d"]["start"][int]["_stats"]["_count"] == 10 assert m["id"][str]["_stats"]["_count"] == 10 assert m["lofd"][list]["_stats"]["_count"] == 10 assert m["lofd"][list]["ul"][str]["_stats"]["_count"] == 10 assert m["lofd"][list]["val"][float]["_stats"]["_count"] == 10
def test_03_same_key_different_types_with_list(self): # even if val is in a list m2 = inspect_docs([{"val": 34}, [{"val": 1.2}]])["type"] # list and val not merged assert set(m2.keys()) == {'val', list}
def test_02_same_key_different_types(self): # val can be an int and a float m1 = inspect_docs([{"val": 34}, {"val": 1.2}])["type"] # set: types can be in any order assert set(m1["val"]) == {int, float}
def test_15_mapping_with_int_float(self): # allow int & float in mapping (keep float) t1 = {"_id": "a", "f": [1, 2]} t2 = {"_id": "a", "f": [1.1, 2.2]} m = inspect_docs([t1, t2], mode="mapping") assert m["mapping"]["f"]["type"] == "float"
def test(): # TODO: Move these test to tests folder, or maybe already moved over? Chunlei from biothings.utils.inspect import typify_inspect_doc, inspect_docs # can't use assert directly, as we can't ensure the order of types (for instance) import biothings.utils.jsondiff biothings.utils.jsondiff.UNORDERED_LIST = True jsondiff = biothings.utils.jsondiff.make # object td1 = {"i": {"a": 456}} s1 = { 'properties': { 'i': { 'properties': {'a': {'type': 'integer'}}, 'type': 'object' } }, 'type': 'object' } m = inspect_docs([td1], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s1) == [], "%s !=\n%s" % (gs, s1) td5 = {"i": [1, 2, 3]} s5 = { 'properties': { 'i': { 'items': {'type': 'integer'}, 'type': 'array' } }, 'type': 'object' } m = inspect_docs([td5], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s5) == [], "%s !=\n%s" % (gs, s5) # array of object td2 = {"i": [{"a": 123}]} s2 = { 'properties': { 'i': { 'items': { 'properties': {'a': {'type': 'integer'}}, 'type': 'object' }, 'type': 'array' } }, 'type': 'object' } m = inspect_docs([td2], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s2) == [], "%s !=\n%s" % (gs, s2) # object in object td3 = {"i": {"a": {"b": 123}}} s3 = { 'properties': { 'i': { 'properties': { 'a': { 'properties': {'b': {'type': 'integer'}}, 'type': 'object' } }, 'type': 'object' } }, 'type': 'object' } m = inspect_docs([td3], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s3) == [], "%s !=\n%s" % (gs, s3) # mixed str/float in array td6 = {"i": [1, 2, "a"]} s6 = { 'properties': { 'i': { 'items': {'type': ['integer', 'string']}, 'type': 'array' } }, 'type': 'object' } m = inspect_docs([td6], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s6) == [], "%s !=\n%s" % (gs, s6) # mixed array/object td1 = {"i": {"a": 456}} td2 = {"i": [{"a": 123}]} s12 = { 'properties': { 'i': { 'items': { 'properties': {'a': {'type': 'integer'}}, 'type': 'object' }, 'properties': {'a': {'type': 'integer'}}, 'type': ['array', 'object'] } }, 'type': 'object' } m = inspect_docs([td1, td2], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s12) == [], "%s !=\n%s" % (gs, s12) # list of integer (list of things which are not objects) td4 = {'a': [5, 5, 3]} s4 = { 'properties': { 'a': { 'items': {'type': 'integer'}, 'type': 'array' } }, 'type': 'object' } m = inspect_docs([td4], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s4) == [], "%s !=\n%s" % (gs, s4) td7 = {"i": {"a": 1, "b": 2}} s7 = { 'type': 'object', 'properties': { 'i': { 'type': 'object', 'properties': { 'a': {'type': 'integer'}, 'b': {'type': 'integer'} } } } } m = inspect_docs([td7], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s7) == [], "%s !=\n%s" % (gs, s7) # int or list of int (not a list of dict, testing scalar there) td81 = {"i": 1} td82 = {"i": [2, 3]} s812 = { 'properties': { 'i': { 'items': {'type': 'integer'}, 'type': ['array', 'integer'] } }, 'type': 'object' } m = inspect_docs([td81, td82], mode="type")["type"] gs = generate_json_schema(m) assert jsondiff(gs, s812) == [], "%s !=\n%s" % (gs, s812) # run from app folder, biothings as symlink # small real-life collection cgi_schema = json.load(open("biothings/tests/cgi_schema.json")) cgi_map = typify_inspect_doc(json.load(open("biothings/tests/cgi_map.json"))) schema = generate_json_schema(cgi_map) assert jsondiff(cgi_schema, schema) == [] clinvar_schema = json.load(open("biothings/tests/clinvar_schema.json")) clinvar_map = typify_inspect_doc(json.load(open("biothings/tests/clinvar_map.json"))) schema = generate_json_schema(clinvar_map) assert jsondiff(clinvar_schema, schema) == [] mygene_schema = json.load(open("biothings/tests/mygene_schema.json")) mygene_map = typify_inspect_doc(json.load(open("biothings/tests/mygene_map.json"))) schema = generate_json_schema(mygene_map) assert jsondiff(mygene_schema, schema) == [] print("All test OK")