def json2value(json_string, params=Null, flexible=False, leaves=False): """ :param json_string: THE JSON :param params: STANDARD JSON PARAMS :param flexible: REMOVE COMMENTS :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED :return: Python value """ if isinstance(json_string, str): Log.error("only unicode json accepted") try: if flexible: # REMOVE """COMMENTS""", # COMMENTS, //COMMENTS, AND \n \r # DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py# L58 json_string = re.sub(r"\"\"\".*?\"\"\"", r"\n", json_string, flags=re.MULTILINE) json_string = "\n".join(remove_line_comment(l) for l in json_string.split("\n")) # ALLOW DICTIONARY'S NAME:VALUE LIST TO END WITH COMMA json_string = re.sub(r",\s*\}", r"}", json_string) # ALLOW LISTS TO END WITH COMMA json_string = re.sub(r",\s*\]", r"]", json_string) if params: # LOOKUP REFERENCES json_string = expand_template(json_string, params) try: value = wrap(json_decoder(unicode(json_string))) except Exception as e: Log.error("can not decode\n{{content}}", content=json_string, cause=e) if leaves: value = wrap_leaves(value) return value except Exception as e: e = Except.wrap(e) if not json_string.strip(): Log.error("JSON string is only whitespace") c = e while "Expecting '" in c.cause and "' delimiter: line" in c.cause: c = c.cause if "Expecting '" in c and "' delimiter: line" in c: line_index = int(strings.between(c.message, " line ", " column ")) - 1 column = int(strings.between(c.message, " column ", " ")) - 1 line = json_string.split("\n")[line_index].replace("\t", " ") if column > 20: sample = "..." + line[column - 20:] pointer = " " + (" " * 20) + "^" else: sample = line pointer = (" " * column) + "^" if len(sample) > 43: sample = sample[:43] + "..." Log.error("Can not decode JSON at:\n\t" + sample + "\n\t" + pointer + "\n") base_str = strings.limit(json_string, 1000).encode('utf8') hexx_str = bytes2hex(base_str, " ") try: char_str = " " + " ".join((c.decode("latin1") if ord(c) >= 32 else ".") for c in base_str) except Exception as e: char_str = " " Log.error("Can not decode JSON:\n" + char_str + "\n" + hexx_str + "\n", e)
def json2value(json_string, params=Null, flexible=False, leaves=False): """ :param json_string: THE JSON :param params: STANDARD JSON PARAMS :param flexible: REMOVE COMMENTS :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED :return: Python value """ json_string = text(json_string) if not is_text( json_string) and json_string.__class__.__name__ != "FileString": Log.error("only unicode json accepted") try: if params: # LOOKUP REFERENCES json_string = expand_template(json_string, params) if flexible: value = hjson2value(json_string) else: value = to_data(json_decoder(text(json_string))) if leaves: value = leaves_to_data(value) return value except Exception as e: e = Except.wrap(e) if not json_string.strip(): Log.error("JSON string is only whitespace") c = e while c.cause and "Expecting '" in c.cause and "' delimiter: line" in c.cause: c = c.cause if "Expecting '" in c and "' delimiter: line" in c: line_index = int(strings.between(c.message, " line ", " column ")) - 1 column = int(strings.between(c.message, " column ", " ")) - 1 line = json_string.split("\n")[line_index].replace("\t", " ") if column > 20: sample = "..." + line[column - 20:] pointer = " " + (" " * 20) + "^" else: sample = line pointer = (" " * column) + "^" if len(sample) > 43: sample = sample[:43] + "..." Log.error( CAN_NOT_DECODE_JSON + " at:\n\t{{sample}}\n\t{{pointer}}\n", sample=sample, pointer=pointer, ) base_str = strings.limit(json_string, 1000).encode("utf8") hexx_str = bytes2hex(base_str, " ") try: char_str = " " + " ".join( (c.decode("latin1") if ord(c) >= 32 else ".") for c in base_str) except Exception: char_str = " " Log.error( CAN_NOT_DECODE_JSON + ":\n{{char_str}}\n{{hexx_str}}\n", char_str=char_str, hexx_str=hexx_str, cause=e, )
def json2value(json_string, params=Null, flexible=False, leaves=False): """ :param json_string: THE JSON :param params: STANDARD JSON PARAMS :param flexible: REMOVE COMMENTS :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED :return: Python value """ if not is_text(json_string) and json_string.__class__.__name__ != "FileString": Log.error("only unicode json accepted") try: if flexible: # REMOVE """COMMENTS""", # COMMENTS, //COMMENTS, AND \n \r # DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py# L58 json_string = re.sub(r"\"\"\".*?\"\"\"", r"\n", json_string, flags=re.MULTILINE) json_string = "\n".join(remove_line_comment(l) for l in json_string.split("\n")) # ALLOW DICTIONARY'S NAME:VALUE LIST TO END WITH COMMA json_string = re.sub(r",\s*\}", r"}", json_string) # ALLOW LISTS TO END WITH COMMA json_string = re.sub(r",\s*\]", r"]", json_string) if params: # LOOKUP REFERENCES json_string = expand_template(json_string, params) try: value = wrap(json_decoder(text(json_string))) except Exception as e: Log.error("can not decode\n{{content}}", content=json_string, cause=e) if leaves: value = wrap_leaves(value) return value except Exception as e: e = Except.wrap(e) if not json_string.strip(): Log.error("JSON string is only whitespace") c = e while "Expecting '" in c.cause and "' delimiter: line" in c.cause: c = c.cause if "Expecting '" in c and "' delimiter: line" in c: line_index = int(strings.between(c.message, " line ", " column ")) - 1 column = int(strings.between(c.message, " column ", " ")) - 1 line = json_string.split("\n")[line_index].replace("\t", " ") if column > 20: sample = "..." + line[column - 20:] pointer = " " + (" " * 20) + "^" else: sample = line pointer = (" " * column) + "^" if len(sample) > 43: sample = sample[:43] + "..." Log.error(CAN_NOT_DECODE_JSON + " at:\n\t{{sample}}\n\t{{pointer}}\n", sample=sample, pointer=pointer) base_str = strings.limit(json_string, 1000).encode('utf8') hexx_str = bytes2hex(base_str, " ") try: char_str = " " + " ".join((c.decode("latin1") if ord(c) >= 32 else ".") for c in base_str) except Exception: char_str = " " Log.error(CAN_NOT_DECODE_JSON + ":\n{{char_str}}\n{{hexx_str}}\n", char_str=char_str, hexx_str=hexx_str, cause=e)
def _scrub(value, is_done, stack, scrub_text, scrub_number): if FIND_LOOPS: _id = id(value) if _id in stack and type(_id).__name__ not in ["int"]: Log.error("loop in JSON") stack = stack + [_id] type_ = value.__class__ if type_ in null_types: return None elif type_ is text: return scrub_text(value) elif type_ is float: if math.isnan(value) or math.isinf(value): return None return scrub_number(value) elif type_ is bool: return value elif type_ in integer_types: return scrub_number(value) elif type_ in (date, datetime): return scrub_number(datetime2unix(value)) elif type_ is timedelta: return value.total_seconds() elif type_ is Date: return scrub_number(value.unix) elif type_ is Duration: return scrub_number(value.seconds) elif type_ is str: return value.decode("utf8") elif type_ is Decimal: return scrub_number(value) elif type_ is Data: return _scrub(_get(value, SLOT), is_done, stack, scrub_text, scrub_number) elif is_data(value): _id = id(value) if _id in is_done: Log.warning("possible loop in structure detected") return '"<LOOP IN STRUCTURE>"' is_done.add(_id) output = {} for k, v in value.items(): if is_text(k): pass elif is_binary(k): k = k.decode("utf8") else: Log.error("keys must be strings") v = _scrub(v, is_done, stack, scrub_text, scrub_number) if v != None or is_data(v): output[k] = v is_done.discard(_id) return output elif type_ in (tuple, list, FlatList): output = [] for v in value: v = _scrub(v, is_done, stack, scrub_text, scrub_number) output.append(v) return output # if output else None elif type_ is type: return value.__name__ elif type_.__name__ == "bool_": # DEAR ME! Numpy has it's own booleans (value==False could be used, but 0==False in Python. DOH!) if value == False: return False else: return True elif not isinstance(value, Except) and isinstance(value, Exception): return _scrub(Except.wrap(value), is_done, stack, scrub_text, scrub_number) elif hasattr(value, "__json__"): try: j = value.__json__() if is_text(j): data = json_decoder(j) else: data = json_decoder("".join(j)) return _scrub(data, is_done, stack, scrub_text, scrub_number) except Exception as cause: Log.error("problem with calling __json__()", cause) elif hasattr(value, "__data__"): try: return _scrub(value.__data__(), is_done, stack, scrub_text, scrub_number) except Exception as cause: Log.error("problem with calling __data__()", cause) elif hasattr(value, "co_code") or hasattr(value, "f_locals"): return None elif hasattr(value, "__iter__"): output = [] for v in value: v = _scrub(v, is_done, stack, scrub_text, scrub_number) output.append(v) return output elif hasattr(value, "__call__"): return text(repr(value)) elif is_number(value): # for numpy values return scrub_number(value) else: return _scrub(DataObject(value), is_done, stack, scrub_text, scrub_number)