Exemple #1
0
def stringify_inspect_doc(dmap):
    def stringify(val):
        if type(val) == type:
            return "__type__:%s" % val.__name__  # prevent having dots in the field (not storable in mongo)
        else:
            return str(val)

    return dict_walk(dmap, stringify)
Exemple #2
0
def typify_inspect_doc(dmap):
    """
    dmap is an inspect which was converted to be stored in a database,
    namely actual python types were stringify to be storabled. This function
    does the oposite and restore back python types within the inspect doc
    """
    def typify(val):
        if type(val) != type and val.startswith("__type__:"):
            return eval(val.replace("__type__:", ""))
        else:
            return val

    return dict_walk(dmap, typify)
Exemple #3
0
def typify_inspect_doc(dmap):
    """
    dmap is an inspect which was converted to be stored in a database,
    namely actual python types were stringify to be storabled. This function
    does the oposite and restore back python types within the inspect doc
    """
    def typify(val):
        if type(val) != type and val.startswith("__type__:"):
            typ = val.replace("__type__:", "")
            # special cases
            if typ == "NoneType":
                return None
            elif typ == "Int64":  # bson's Int64
                return bson.int64.Int64
            else:
                return eval(val.replace("__type__:", ""))
        else:
            return val

    return dict_walk(dmap, typify)
Exemple #4
0
def generate_es_mapping(inspect_doc, init=True, level=0):
    """Generate an ES mapping according to "inspect_doc", which is
    produced by biothings.utils.inspect module"""
    map_tpl = {
        int: {
            "type": "integer"
        },
        bool: {
            "type": "boolean"
        },
        float: {
            "type": "float"
        },
        str: {
            "type": "keyword",
            "normalizer": "keyword_lowercase_normalizer"
        },  # not splittable (like an ID for instance)
        splitstr: {
            "type": "text"
        },
    }
    # inspect_doc, if it's been jsonified, contains keys with type as string,
    # such as "<class 'str'>". This is not a real type and we need to convert them
    # back to actual types. This is transparent if inspect_doc isalready in proper format
    pat = re.compile(r"<class '(\w+)'>")

    def str2type(k):
        if isinstance(k, str):
            mat = pat.findall(k)
            if mat:
                return eval(mat[0])  # actual type
            else:
                return k
        else:
            return k

    inspect_doc = dict_walk(inspect_doc, str2type)

    mapping = {}
    errors = []
    none_type = type(None)
    if init and "_id" not in inspect_doc:
        errors.append(
            "No _id key found, document won't be indexed. (doc: %s)" %
            inspect_doc)
    for rootk in inspect_doc:
        if rootk == "_id":
            keys = list(inspect_doc[rootk].keys())
            if str in keys and splitstr in keys:
                keys.remove(str)
            if not len(keys) == 1 or (keys[0] != str and keys[0] != splitstr):
                errors.append(
                    "_id fields should all be a string type (got: %s)" % keys)
            # it was just a check, it's not part of the mapping
            continue
        if rootk == "_stats":
            continue
        if isinstance(rootk, type(None)):  # if rootk == type(None):
            # value can be null, just skip it
            continue
        # some inspect report have True as value, others have dict (will all have dict eventually)
        if inspect_doc[rootk] is True:
            inspect_doc[rootk] = {}
        keys = list(inspect_doc[rootk].keys())
        # if dict, it can be a dict containing the type (no explore needed) or a dict
        # containing more keys (explore needed)
        if list in keys:
            # we explore directly the list w/ inspect_doc[rootk][list] as param.
            # (similar to skipping list type, as there's no such list type in ES mapping)
            # carefull: there could be list of list, if which case we move further into the structure
            # to skip them
            toexplore = inspect_doc[rootk][list]
            while list in toexplore:
                toexplore = toexplore[list]
            if len(toexplore) > 1:
                # we want to make sure that, whatever the structure, the types involved were the same
                # Exception: None is allowed with other types (translates to 'null' in ES)
                # other_types = set([k for k in toexplore.keys() if k != list and isinstance(k, type) and k is not type(None)])    # TODO: Confirm this line
                other_types = {
                    k
                    for k in toexplore.keys() if k != list
                    and isinstance(k, type) and not isinstance(k, none_type)
                }
                # some mixes are allowed by ES
                if {int, float}.issubset(other_types):
                    other_types.discard(int)  # float > int
                    toexplore.pop(int)
                if len(other_types) > 1:
                    raise Exception("Mixing types for key '%s': %s" %
                                    (rootk, other_types))
            res = generate_es_mapping(toexplore, init=False, level=level + 1)
            # is it the only key or do we have more ? (ie. some docs have data as "x", some
            # others have list("x")
            # list was either a list of values (end of tree) or a list of dict. Depending
            # on that, we add "properties" (when list of dict) or not (when list of values)
            if type in set(map(type, inspect_doc[rootk][list])):
                mapping[rootk] = res
            else:
                mapping[rootk] = {"properties": {}}
                mapping[rootk]["properties"] = res
        elif set(map(type, keys)) == {type}:
            # it's a type declaration, no explore
            # typs = list(map(type, [k for k in keys if k is not type(None)]))    # TODO: Confirm this line
            typs = list(
                map(type, [k for k in keys if not isinstance(k, none_type)]))
            if len(typs) > 1:
                errors.append("More than one type (key:%s,types:%s)" %
                              (repr(rootk), repr(keys)))
            try:
                typ = list(inspect_doc[rootk].keys())
                # ther can still be more than one type, if we have a None combined with
                # the "correct" one. We allow None as a combined type, but we want to ignore
                # it when we want to find the mapping
                if len(typ) == 1:
                    typ = typ[0]
                else:
                    # typ = [t for t in typ if t is not type(None)][0]      # TODO: Confirm this line
                    typ = [t for t in typ if not isinstance(t, none_type)][0]
                if typ is nan or typ is inf:
                    raise TypeError(typ)
                mapping[rootk] = map_tpl[typ]
            except KeyError:
                errors.append("Can't find map type %s for key %s" %
                              (inspect_doc[rootk], rootk))
            except TypeError:
                errors.append(
                    "Type %s for key %s isn't allowed in ES mapping" %
                    (typ, rootk))
        elif inspect_doc[rootk] == {}:
            typ = rootk
            return map_tpl[typ]
        else:
            mapping[rootk] = {"properties": {}}
            mapping[rootk]["properties"] = generate_es_mapping(
                inspect_doc[rootk], init=False, level=level + 1)
    if errors:
        raise MappingError("Error while generating mapping", errors)
    return mapping