Ejemplo n.º 1
0
 def test_04_same_key_different_types_with_list_and_dict(self):
     # another example with a mix a dict and list (see "p")
     od1 = {"id": "124", "d": [{"p": 123}, {"p": 456}]}
     od2 = {"id": "124", "d": [{"p": 123}, {"p": [456, 789]}]}
     m12 = inspect_docs([od1, od2], mode="type")["type"]
     m21 = inspect_docs([od2, od1], mode="type")["type"]
     assert m12 == m21
     # "p" is a integer or a list of integer
     assert m12["d"][list]["p"].keys() == {list, int}
Ejemplo n.º 2
0
 def test_13_merge_with_splitstr(self):
     # merge_scalar_list when str split involved (?) in list of list
     doc = {"_id": "1", "f": ["b", ["a 0", "b 1"]]}
     # merge list of str and splitstr
     docb = {"_id": "1", "f": ["a 0"]}
     docg = {"_id": "1", "f": ["a0"]}
     m = inspect_docs([docb, docg], mode="mapping")
     assert m["mapping"]["f"] == {"type": "text"}  # splitstr > str
     # same when strings (not list)
     docb = {"_id": "1", "f": "a 0"}
     docg = {"_id": "1", "f": "a0"}
     m = inspect_docs([docb, docg], mode="mapping")
     assert m["mapping"]["f"] == {"type": "text"}  # splitstr > str
     # same when strings and list of strings
     doc1 = {"_id": "1", "f": ["a 0"]}
     doc2 = {"_id": "1", "f": ["a0"]}
     doc3 = {"_id": "1", "f": "a 0"}
     doc4 = {"_id": "1", "f": "a0"}
     m = inspect_docs([doc1, doc2, doc3, doc4], mode="mapping")
     assert m["mapping"]["f"] == {"type": "text"}  # splitstr > str
     doc1 = {"_id": "1", "f": ["a0"]}
     doc2 = {"_id": "1", "f": ["a 0"]}
     m = inspect_docs([doc1, doc2], mode="mapping")
     assert m["mapping"]["f"] == {"type": "text"}  # splitstr > str
     # splitstr > str whatever the order they appear while inspected (here: splitstr,str,str, in list,list,dict)
     d1 = {
         '_id': 'a',
         'r': {
             'k': [{
                 'id': 'one',
                 'rel': 'is'
             }, {
                 'id': 'two',
                 'rel': 'simil to'
             }]
         }
     }
     d2 = {
         '_id': 'b',
         'r': {
             'k': [{
                 'id': 'three',
                 'rel': 'is'
             }, {
                 'id': 'four',
                 'rel': 'is'
             }]
         }
     }
     d3 = {'_id': 'c', 'r': {'k': {'id': 'five', 'rel': 'is'}}}
     m = inspect_docs([d1, d2, d3], mode="mapping")
     assert "errors" not in m["mapping"]
Ejemplo n.º 3
0
 def test_16_mapping_with_nan_inf(self):
     # NaN/Inf not allowed (if mode is mapping)
     n1 = {"_id": "a", "v1": "oula", "v2": math.nan}
     n2 = {"_id": "b", "v1": "arf", "v2": 13.4}
     n3 = {"_id": "c", "v1": "mak", "v2": math.nan, "v3": math.inf}
     m = inspect_docs([n1, n2, n3], mode="mapping")
     assert "errors" in m["mapping"]
Ejemplo n.º 4
0
 def test_11_stats_with_same_docs(self):
     d1 = {'go': {'BP': {'term': 'skeletal muscle fiber development', 'qualifier': 'NOT', 'pubmed': 1234, 'id': \
         'GO:0048741', 'evidence': 'IBA'}}, '_id': '101362076'}
     d2 = {'go': {'BP': [{'term': 'ubiquitin-dependent protein catabolic process', 'pubmed': 5678, 'id': 'GO:0006511', \
         'evidence': 'IEA'}, {'term': 'protein deubiquitination', 'pubmed': [2222, 3333], 'id': 'GO:0016579', 'evidence': \
             'IEA'}]}, '_id': '101241878'}
     m = inspect_docs([d1, d1, d2, d2], mode="stats")["stats"]
Ejemplo n.º 5
0
 def test_09_mapping_scalar_or_list(self):
     # ok, "bla" is either a scalar or in a list, test merge
     md1 = {
         "_id":
         "124",
         'vals': [{
             "oula": "this is great"
         }, {
             "bla": "rs24543",
             "arf": "ENS355432"
         }]
     }
     md2 = {
         "_id": "5678",
         'vals': {
             "bla": "I am splitable in a scalar",
             "void": 654
         }
     }
     # bla is a different type here
     md3 = {"_id": "5678", 'vals': {"bla": 1234}}
     m = inspect_docs(
         [md1, md2], mode="mapping",
         pre_mapping=True)["mapping"]  # "mapping" implies merge=True
     assert not "bla" in m["vals"]
     assert m["vals"][list]["bla"] == {
         splitstr: {}
     }, m["vals"][list]["bla"]  # splittable str from md2 merge to list
     m = inspect_docs([md1, md3], mode="mapping",
                      pre_mapping=True)["mapping"]
     assert not "bla" in m["vals"]
     assert m["vals"][list]["bla"] == {
         int: {},
         str: {}
     }  # keep as both types
     m = inspect_docs([md1, md2, md3], mode="mapping",
                      pre_mapping=True)["mapping"]
     assert not "bla" in m["vals"]
     assert m["vals"][list]["bla"] == {
         int: {},
         splitstr: {}
     }, m["vals"][list][
         "bla"]  # splittable kept + merge int to keep both types
Ejemplo n.º 6
0
    def test_01_not_order_specific(self):
        d1 = {
            "id": "124",
            'lofd': [{
                "val": 34.3
            }, {
                "ul": "bla"
            }],
            "d": {
                "start": 134,
                "end": 5543
            }
        }
        d2 = {
            "id": "5",
            'lofd': {
                "oula": "mak",
                "val": 34
            },
            "d": {
                "start": 134,
                "end": 5543
            }
        }
        d3 = {
            "id": "890",
            'lofd': [{
                "val": 34
            }],
            "d": {
                "start": 134,
                "end": 5543
            }
        }

        # merge either ways in the same
        m12 = inspect_docs([d1, d2])["type"]
        m21 = inspect_docs([d2, d1])["type"]
        #if undordered list, then:
        assert m21 == m12, "\nm21=%s\n!=\nm12=%s" % (pformat(m21),
                                                     pformat(m12))
Ejemplo n.º 7
0
def inspect_data(backend_provider, ids, mode, pre_mapping, **kwargs):
    col = create_backend(backend_provider).target_collection
    cur = doc_feeder(col,
                     step=len(ids),
                     inbatch=False,
                     query={'_id': {
                         '$in': ids
                     }})
    return btinspect.inspect_docs(cur,
                                  mode=mode,
                                  pre_mapping=pre_mapping,
                                  metadata=False,
                                  **kwargs)
Ejemplo n.º 8
0
 def test_08_mapping_with_list_of_list_of_integer(self):
     # mapping with type of type
     sd1 = {
         "_id": "123",
         "homologene": {
             "id": "bla",
             "gene": [[123, 456], [789, 102]]
         }
     }
     m = inspect_docs([sd1], mode="mapping")["mapping"]
     assert m == {
         'homologene': {
             'properties': {
                 'gene': {
                     'type': 'integer'
                 },
                 'id': {
                     'normalizer': 'keyword_lowercase_normalizer',
                     'type': 'keyword'
                 }
             }
         }
     }, "mapping %s" % m
Ejemplo n.º 9
0
    def test_05_stats(test):
        d1 = {
            "id": "124",
            'lofd': [{
                "val": 34.3
            }, {
                "ul": "bla"
            }],
            "d": {
                "start": 134,
                "end": 5543
            }
        }
        d2 = {
            "id": "5",
            'lofd': {
                "oula": "mak",
                "val": 34
            },
            "d": {
                "start": 134,
                "end": 5543
            }
        }
        # stats
        m = {}
        inspect(d1, mapt=m, mode="stats")
        # some simple check
        assert set(m["id"].keys()) == {str}
        assert m["id"][str]["_stats"]["_count"] == 1
        assert m["id"][str]["_stats"]["_max"] == 3
        assert m["id"][str]["_stats"]["_min"] == 3
        assert m["lofd"].keys() == {list}
        # list's stats
        assert m["lofd"][list]["_stats"]["_count"] == 1
        assert m["lofd"][list]["_stats"]["_max"] == 2
        assert m["lofd"][list]["_stats"]["_min"] == 2
        # one list's elem stats
        assert m["lofd"][list]["val"][float]["_stats"]["_count"] == 1
        assert m["lofd"][list]["val"][float]["_stats"]["_max"] == 34.3
        assert m["lofd"][list]["val"][float]["_stats"]["_min"] == 34.3
        # again (should see increment in counts for instance)
        inspect(d1, mapt=m, mode="stats")
        assert m["id"][str]["_stats"]["_count"] == 2
        assert m["id"][str]["_stats"]["_max"] == 3
        assert m["id"][str]["_stats"]["_min"] == 3
        assert m["lofd"][list]["_stats"]["_count"] == 2
        assert m["lofd"][list]["_stats"]["_max"] == 2
        assert m["lofd"][list]["_stats"]["_min"] == 2
        assert m["lofd"][list]["val"][float]["_stats"]["_count"] == 2
        assert m["lofd"][list]["val"][float]["_stats"]["_max"] == 34.3
        assert m["lofd"][list]["val"][float]["_stats"]["_min"] == 34.3
        # mix with d2
        inspect(d2, mapt=m, mode="stats")
        assert m["id"][str]["_stats"]["_count"] == 3
        assert m["id"][str]["_stats"]["_max"] == 3
        assert m["id"][str]["_stats"]["_min"] == 1  # new min
        assert m["lofd"][list]["_stats"][
            "_count"] == 2  # not incremented as in d2 it's not a list
        assert m["lofd"][list]["_stats"]["_max"] == 2
        assert m["lofd"][list]["_stats"]["_min"] == 2
        # now float & int
        assert m["lofd"][list]["val"][float]["_stats"]["_count"] == 2
        assert m["lofd"][list]["val"][float]["_stats"]["_max"] == 34.3
        assert m["lofd"][list]["val"][float]["_stats"]["_min"] == 34.3
        # val{int} wasn't merged
        assert m["lofd"]["val"][int]["_stats"]["_count"] == 1
        assert m["lofd"]["val"][int]["_stats"]["_max"] == 34
        assert m["lofd"]["val"][int]["_stats"]["_min"] == 34
        # d2 again
        inspect(d2, mapt=m, mode="stats")
        assert m["id"][str]["_stats"]["_count"] == 4
        assert m["id"][str]["_stats"]["_max"] == 3
        assert m["id"][str]["_stats"]["_min"] == 1
        assert m["lofd"][list]["_stats"]["_count"] == 2
        assert m["lofd"][list]["_stats"]["_max"] == 2
        assert m["lofd"][list]["_stats"]["_min"] == 2
        assert m["lofd"][list]["val"][float]["_stats"]["_count"] == 2
        assert m["lofd"][list]["val"][float]["_stats"]["_max"] == 34.3
        assert m["lofd"][list]["val"][float]["_stats"]["_min"] == 34.3
        assert m["lofd"]["val"][int]["_stats"]["_count"] == 2
        assert m["lofd"]["val"][int]["_stats"]["_max"] == 34
        assert m["lofd"]["val"][int]["_stats"]["_min"] == 34

        # all counts should be 10
        m = inspect_docs([d1] * 10, mode="stats")["stats"]
        assert m["d"]["end"][int]["_stats"]["_count"] == 10
        assert m["d"]["start"][int]["_stats"]["_count"] == 10
        assert m["id"][str]["_stats"]["_count"] == 10
        assert m["lofd"][list]["_stats"]["_count"] == 10
        assert m["lofd"][list]["ul"][str]["_stats"]["_count"] == 10
        assert m["lofd"][list]["val"][float]["_stats"]["_count"] == 10
Ejemplo n.º 10
0
 def test_03_same_key_different_types_with_list(self):
     # even if val is in a list
     m2 = inspect_docs([{"val": 34}, [{"val": 1.2}]])["type"]
     # list and val not merged
     assert set(m2.keys()) == {'val', list}
Ejemplo n.º 11
0
 def test_02_same_key_different_types(self):
     # val can be an int and a float
     m1 = inspect_docs([{"val": 34}, {"val": 1.2}])["type"]
     # set: types can be in any order
     assert set(m1["val"]) == {int, float}
Ejemplo n.º 12
0
 def test_15_mapping_with_int_float(self):
     # allow int & float in mapping (keep float)
     t1 = {"_id": "a", "f": [1, 2]}
     t2 = {"_id": "a", "f": [1.1, 2.2]}
     m = inspect_docs([t1, t2], mode="mapping")
     assert m["mapping"]["f"]["type"] == "float"
Ejemplo n.º 13
0
def test():
    # TODO: Move these test to tests folder, or maybe already moved over? Chunlei
    from biothings.utils.inspect import typify_inspect_doc, inspect_docs

    # can't use assert directly, as we can't ensure the order of types (for instance)
    import biothings.utils.jsondiff
    biothings.utils.jsondiff.UNORDERED_LIST = True
    jsondiff = biothings.utils.jsondiff.make

    # object
    td1 = {"i": {"a": 456}}
    s1 = {
        'properties': {
            'i': {
                'properties': {'a': {'type': 'integer'}},
                'type': 'object'
            }
        },
        'type': 'object'
    }
    m = inspect_docs([td1], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s1) == [], "%s  !=\n%s" % (gs, s1)

    td5 = {"i": [1, 2, 3]}
    s5 = {
        'properties': {
            'i': {
                'items': {'type': 'integer'},
                'type': 'array'
            }
        },
        'type': 'object'
    }
    m = inspect_docs([td5], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s5) == [], "%s  !=\n%s" % (gs, s5)

    # array of object
    td2 = {"i": [{"a": 123}]}
    s2 = {
        'properties': {
            'i': {
                'items': {
                    'properties': {'a': {'type': 'integer'}},
                    'type': 'object'
                },
                'type': 'array'
            }
        },
        'type': 'object'
    }
    m = inspect_docs([td2], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s2) == [], "%s  !=\n%s" % (gs, s2)

    # object in object
    td3 = {"i": {"a": {"b": 123}}}
    s3 = {
        'properties': {
            'i': {
                'properties': {
                    'a': {
                        'properties': {'b': {'type': 'integer'}},
                        'type': 'object'
                    }
                },
                'type': 'object'
            }
        },
        'type': 'object'
    }
    m = inspect_docs([td3], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s3) == [], "%s  !=\n%s" % (gs, s3)

    # mixed str/float in array
    td6 = {"i": [1, 2, "a"]}
    s6 = {
        'properties': {
            'i': {
                'items': {'type': ['integer', 'string']},
                'type': 'array'
            }
        },
        'type': 'object'
    }
    m = inspect_docs([td6], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s6) == [], "%s  !=\n%s" % (gs, s6)

    # mixed array/object
    td1 = {"i": {"a": 456}}
    td2 = {"i": [{"a": 123}]}
    s12 = {
        'properties': {
            'i': {
                'items': {
                    'properties': {'a': {'type': 'integer'}},
                    'type': 'object'
                },
                'properties': {'a': {'type': 'integer'}},
                'type': ['array', 'object']
            }
        },
        'type': 'object'
    }
    m = inspect_docs([td1, td2], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s12) == [], "%s  !=\n%s" % (gs, s12)

    # list of integer (list of things which are not objects)
    td4 = {'a': [5, 5, 3]}
    s4 = {
        'properties': {
            'a': {
                'items': {'type': 'integer'},
                'type': 'array'
            }
        },
        'type': 'object'
    }
    m = inspect_docs([td4], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s4) == [], "%s  !=\n%s" % (gs, s4)

    td7 = {"i": {"a": 1, "b": 2}}
    s7 = {
        'type': 'object',
        'properties': {
            'i': {
                'type': 'object',
                'properties': {
                    'a': {'type': 'integer'},
                    'b': {'type': 'integer'}
                }
            }
        }
    }
    m = inspect_docs([td7], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s7) == [], "%s  !=\n%s" % (gs, s7)

    # int or list of int (not a list of dict, testing scalar there)
    td81 = {"i": 1}
    td82 = {"i": [2, 3]}
    s812 = {
        'properties': {
            'i': {
                'items': {'type': 'integer'},
                'type': ['array', 'integer']
            }
        },
        'type': 'object'
    }
    m = inspect_docs([td81, td82], mode="type")["type"]
    gs = generate_json_schema(m)
    assert jsondiff(gs, s812) == [], "%s  !=\n%s" % (gs, s812)

    # run from app folder, biothings as symlink

    # small real-life collection
    cgi_schema = json.load(open("biothings/tests/cgi_schema.json"))
    cgi_map = typify_inspect_doc(json.load(open("biothings/tests/cgi_map.json")))
    schema = generate_json_schema(cgi_map)
    assert jsondiff(cgi_schema, schema) == []

    clinvar_schema = json.load(open("biothings/tests/clinvar_schema.json"))
    clinvar_map = typify_inspect_doc(json.load(open("biothings/tests/clinvar_map.json")))
    schema = generate_json_schema(clinvar_map)
    assert jsondiff(clinvar_schema, schema) == []

    mygene_schema = json.load(open("biothings/tests/mygene_schema.json"))
    mygene_map = typify_inspect_doc(json.load(open("biothings/tests/mygene_map.json")))
    schema = generate_json_schema(mygene_map)
    assert jsondiff(mygene_schema, schema) == []

    print("All test OK")