def stringify_inspect_doc(dmap): def stringify(val): if type(val) == type: return "__type__:%s" % val.__name__ # prevent having dots in the field (not storable in mongo) else: return str(val) return dict_walk(dmap, stringify)
def typify_inspect_doc(dmap): """ dmap is an inspect which was converted to be stored in a database, namely actual python types were stringify to be storabled. This function does the oposite and restore back python types within the inspect doc """ def typify(val): if type(val) != type and val.startswith("__type__:"): return eval(val.replace("__type__:", "")) else: return val return dict_walk(dmap, typify)
def typify_inspect_doc(dmap): """ dmap is an inspect which was converted to be stored in a database, namely actual python types were stringify to be storabled. This function does the oposite and restore back python types within the inspect doc """ def typify(val): if type(val) != type and val.startswith("__type__:"): typ = val.replace("__type__:", "") # special cases if typ == "NoneType": return None elif typ == "Int64": # bson's Int64 return bson.int64.Int64 else: return eval(val.replace("__type__:", "")) else: return val return dict_walk(dmap, typify)
def generate_es_mapping(inspect_doc, init=True, level=0): """Generate an ES mapping according to "inspect_doc", which is produced by biothings.utils.inspect module""" map_tpl = { int: { "type": "integer" }, bool: { "type": "boolean" }, float: { "type": "float" }, str: { "type": "keyword", "normalizer": "keyword_lowercase_normalizer" }, # not splittable (like an ID for instance) splitstr: { "type": "text" }, } # inspect_doc, if it's been jsonified, contains keys with type as string, # such as "<class 'str'>". This is not a real type and we need to convert them # back to actual types. This is transparent if inspect_doc isalready in proper format pat = re.compile(r"<class '(\w+)'>") def str2type(k): if isinstance(k, str): mat = pat.findall(k) if mat: return eval(mat[0]) # actual type else: return k else: return k inspect_doc = dict_walk(inspect_doc, str2type) mapping = {} errors = [] none_type = type(None) if init and "_id" not in inspect_doc: errors.append( "No _id key found, document won't be indexed. (doc: %s)" % inspect_doc) for rootk in inspect_doc: if rootk == "_id": keys = list(inspect_doc[rootk].keys()) if str in keys and splitstr in keys: keys.remove(str) if not len(keys) == 1 or (keys[0] != str and keys[0] != splitstr): errors.append( "_id fields should all be a string type (got: %s)" % keys) # it was just a check, it's not part of the mapping continue if rootk == "_stats": continue if isinstance(rootk, type(None)): # if rootk == type(None): # value can be null, just skip it continue # some inspect report have True as value, others have dict (will all have dict eventually) if inspect_doc[rootk] is True: inspect_doc[rootk] = {} keys = list(inspect_doc[rootk].keys()) # if dict, it can be a dict containing the type (no explore needed) or a dict # containing more keys (explore needed) if list in keys: # we explore directly the list w/ inspect_doc[rootk][list] as param. # (similar to skipping list type, as there's no such list type in ES mapping) # carefull: there could be list of list, if which case we move further into the structure # to skip them toexplore = inspect_doc[rootk][list] while list in toexplore: toexplore = toexplore[list] if len(toexplore) > 1: # we want to make sure that, whatever the structure, the types involved were the same # Exception: None is allowed with other types (translates to 'null' in ES) # other_types = set([k for k in toexplore.keys() if k != list and isinstance(k, type) and k is not type(None)]) # TODO: Confirm this line other_types = { k for k in toexplore.keys() if k != list and isinstance(k, type) and not isinstance(k, none_type) } # some mixes are allowed by ES if {int, float}.issubset(other_types): other_types.discard(int) # float > int toexplore.pop(int) if len(other_types) > 1: raise Exception("Mixing types for key '%s': %s" % (rootk, other_types)) res = generate_es_mapping(toexplore, init=False, level=level + 1) # is it the only key or do we have more ? (ie. some docs have data as "x", some # others have list("x") # list was either a list of values (end of tree) or a list of dict. Depending # on that, we add "properties" (when list of dict) or not (when list of values) if type in set(map(type, inspect_doc[rootk][list])): mapping[rootk] = res else: mapping[rootk] = {"properties": {}} mapping[rootk]["properties"] = res elif set(map(type, keys)) == {type}: # it's a type declaration, no explore # typs = list(map(type, [k for k in keys if k is not type(None)])) # TODO: Confirm this line typs = list( map(type, [k for k in keys if not isinstance(k, none_type)])) if len(typs) > 1: errors.append("More than one type (key:%s,types:%s)" % (repr(rootk), repr(keys))) try: typ = list(inspect_doc[rootk].keys()) # ther can still be more than one type, if we have a None combined with # the "correct" one. We allow None as a combined type, but we want to ignore # it when we want to find the mapping if len(typ) == 1: typ = typ[0] else: # typ = [t for t in typ if t is not type(None)][0] # TODO: Confirm this line typ = [t for t in typ if not isinstance(t, none_type)][0] if typ is nan or typ is inf: raise TypeError(typ) mapping[rootk] = map_tpl[typ] except KeyError: errors.append("Can't find map type %s for key %s" % (inspect_doc[rootk], rootk)) except TypeError: errors.append( "Type %s for key %s isn't allowed in ES mapping" % (typ, rootk)) elif inspect_doc[rootk] == {}: typ = rootk return map_tpl[typ] else: mapping[rootk] = {"properties": {}} mapping[rootk]["properties"] = generate_es_mapping( inspect_doc[rootk], init=False, level=level + 1) if errors: raise MappingError("Error while generating mapping", errors) return mapping