def do_rephrase(data, do_schema_check=True, linecnt=0): """ Modify the provided data dictionary in place to rephrase certain pieces of data for easy loading to BigQuery :TODO: Move the inner functions outside this function. :type: dict :param data: A tracking log record from the edX nightly data files :type: bool :param do_schema_check: Whether or not the provided record should be checked against the target schema :type: int :param linecnt: Some line count value :rtype: None :return: Nothing is returned since the data parameter is modified in place """ # add course_id? if 'course_id' not in data: cid = data.get('context', {}).get('course_id', '') if cid: data['course_id'] = cid # add module_id? if 'module_id' not in data: add_module_id(data) if not "event" in data: data['event'] = "" # ensure event is dict when possible if not 'event_js' in data: event = data.get('event') try: if not isinstance(event, dict): event = json.loads(event) event_js = True except Exception as err: # note - do not erase event even if it can't be loaded as JSON: see how it becomes JSONified below event_js = False data['event'] = event data['event_js'] = event_js #---------------------------------------- # "event" is used for too many kinds of data, with colliding types for fields. # thus, in general, we can't store it as a fixed schema field. # # so we turn "event" into a JSON string. # # for specific kinds of events ("event_type"), we keep a parsed version of # "event", e.g. for problem_* event types. # # store that parsed version as "event_struct" event = None if 'event' in data: event = data['event'] data['event'] = json.dumps(data['event']) # now the real rephrasing event_type = data.get('event_type', '') #---------------------------------------- # types to keep KNOWN_TYPES = [ 'play_video', 'seq_goto', 'seq_next', 'seq_prev', 'seek_video', 'load_video', 'save_problem_success', 'save_problem_fail', 'reset_problem_success', 'reset_problem_fail', 'show_answer', 'edx.course.enrollment.activated', 'edx.course.enrollment.deactivated', 'edx.course.enrollment.mode_changed', 'edx.course.enrollment.upgrade.succeeded', 'speed_change_video', 'problem_check', 'problem_save', 'problem_reset' ] if isinstance(event, dict): outs = ('video_embedded', 'harvardx.button', 'harvardx.') out_conds = not any(k in event_type for k in outs) in_conds = 'problem_' in event_type or event_type in KNOWN_TYPES if in_conds and out_conds: data['event_struct'] = event else: data['event_struct'] = { 'GET': json.dumps(event.get('GET')), 'POST': json.dumps(event.get('POST')), 'query': event.get('query'), } else: if 'event_struct' in data: data.pop('event_struct') #---------------------------------------- # special cases if '_id' in data: data['mongoid'] = data['_id']['$oid'] data.pop('_id') if isinstance(event, dict) and 'POST' in event: post_str = json.dumps(event['POST']) event['POST'] = post_str if isinstance(event, dict) and 'GET' in event: get_str = json.dumps(event['GET']) event['GET'] = get_str if event_type in ['problem_check', 'problem_save', 'problem_reset' ] and data['event_source'] == 'browser': if isinstance(event, (str, unicode)): event = {'data': json.dumps(event)} if isinstance(event, (str, unicode)): #if event and data['event_js']: # sys.stderr.write('unexpected STRING event: ' + json.dumps(data, indent=4) + '\n') event = {'data': json.dumps(event)} if isinstance(event, (list, )): event = {'data': json.dumps(event)} def make_str(key): if event is not None and 'state' in event: state = event['state'] if key in state: state[key] = json.dumps(state[key]) make_str('input_state') make_str('correct_map') make_str('student_answers') def make_str0(key): ev = event or {} if key in ev: ev[key] = json.dumps(ev[key]) make_str0('correct_map') make_str0('answers') make_str0('submission') make_str0('old_state') make_str0('new_state') make_str0('permutation') make_str0('options_selected') make_str0('corrections') def make_str2(key): context = data.get('context', {}) if key in context: context[key] = json.dumps(context[key]) make_str2('course_user_tags') def move_unknown_fields_from_context_to_context_agent( keys): # needed to handle new fields from mobile client context = data.get('context', {}) agent = {'oldagent': context.get('agent', "")} for key in keys: if '.' in key: (prefix, subkey) = key.split('.', 1) if prefix in context: subcontext = context[prefix] if subkey in subcontext: agent[key] = subcontext[subkey] subcontext.pop(subkey) else: if key in context: agent[key] = context[key] context.pop(key) context['agent'] = json.dumps(agent) # 31-Jan-15: handle new "module.usage_key" field in context, e.g.: # # "module": { # "display_name": "Radiation Exposure", # "usage_key": "i4x://MITx/6.00.1x_5/problem/ps03:ps03-Radiation-Exposure" # }, # 28-May-16: context.asides mobile_api_context_fields = [ 'application', 'client', 'received_at', 'component', "open_in_browser_url", "module.usage_key", "module.original_usage_version", "module.original_usage_key", "asides", ] move_unknown_fields_from_context_to_context_agent( mobile_api_context_fields) #---------------------------------------- # new fields which are not in schema get moved as JSON strings to the pre-existing "mongoid" field, # which is unused except in very old records # do this, for example, for the "referer" and "accept_language" fields def move_fields_to_mongoid(field_paths): ''' field_path is a list of lists which gives the recursive dict keys to traverse to get to the field to move. Move that field, with the path intact, into the mongoid field. ''' mongoid = data.get('mongoid') if not isinstance(mongoid, dict): mongoid = {'old_mongoid': mongoid} def move_field_value(ddict, vdict, fp): '''recursively traverse dict to get and move value for specified field path''' key = fp[0] if len(fp) == 1: # base case if key in ddict: fval = ddict.get(key) vdict[key] = fval # copy to new values dict ddict.pop( key) # remove key from current path within data dict return fval return None if not key in vdict: vdict[key] = {} return move_field_value(ddict.get(key, {}), vdict[key], fp[1:]) vdict = mongoid for field_path in field_paths: move_field_value(data, vdict, field_path) data['mongoid'] = json.dumps(vdict) # 16-Mar-15: remove event_struct.requested_skip_interval move_fields_to_mongoid([ ['referer'], ['accept_language'], ['event_struct', 'requested_skip_interval'], ['event_struct', 'submitted_answer'], ['event_struct', 'num_attempts'], ['event_struct', 'task_id'], # 05oct15 ['event_struct', 'content'], # 11jan16 ['nonInteraction'], # 24aug15 ['label'], # 24aug15 ['event_struct', 'widget_placement'], # 08may16 ['event_struct', 'tab_count'], # 08may16 ['event_struct', 'current_tab'], # 08may16 ['event_struct', 'target_tab'], # 08may16 ['event_struct', 'state', 'has_saved_answers'], # 06dec2016 ['context', 'label'], # 24aug15 ['roles'], # 06sep2017 rp ['environment'], # 06sep2017 rp ['minion_id'], # 06sep2017 rp ['event_struct', 'duration'], # 22nov2017 ic ['event_struct', 'play_medium'] ]) #---------------------------------------- # general checks def fix_dash(key): ev = event or {} if key in ev: newkey = key.replace('-', '_') ev[newkey] = ev[key] ev.pop(key) fix_dash('child-id') def check_empty(data, *keys): # print "--> keys=%s, data=%s" % (str(keys), data) key = keys[0] if isinstance(data, dict) and key in data: if len(keys) == 1: if data[key] in ["", u'']: # print "---> popped %s" % key data.pop(key) else: check_empty(data[key], *keys[1:]) check_empty(data, 'context', "user_id") data.pop('event_js') # leftover from mongo import script #----------------------------------------- # check for null values in speed_change_video # Error encountered parsing LAC data from Oct. 2013 # Requires that we also be able to convert the value to a float def string_is_float(s): try: float(s) return True except ValueError: return False if data.get('event_type') == 'speed_change_video': if 'event_struct' in data and 'new_speed' in data['event_struct']: # First check if string is float if string_is_float(data['event_struct']['new_speed']): # Second check if value is null if isnan(float(data['event_struct']['new_speed'])): data['event_struct'].pop('new_speed') # check for any funny keys, recursively funny_key_sections = [] def check_for_funny_keys(entry, name='toplevel'): for key, val in entry.iteritems(): if key.startswith('i4x-') or key.startswith('xblock.'): sys.stderr.write( "[rephrase] oops, funny key at %s in entry: %s, data=%s\n" % (name, entry, '')) funny_key_sections.append(name) return True if len(key) > 25: sys.stderr.write( "[rephrase] suspicious key at %s in entry: %s, data=%s\n" % (name, entry, '')) if key[0].isdigit(): sys.stderr.write( "[rephrase] oops, funny key at %s in entry: %s, data=%s\n" % (name, entry, '')) funny_key_sections.append(name) return True if '-' in key or '.' in key: # bad key name! rename it, chaning "-" to "_" newkey = key.replace('-', '_').replace('.', '__') sys.stderr.write( "[rephrase] oops, bad keyname at %s in entry: %s newkey+%s\n" % (name, entry, newkey)) entry[newkey] = val entry.pop(key) key = newkey if isinstance(val, dict): ret = check_for_funny_keys(val, name + '/' + key) if ret is True: sys.stderr.write( " coercing section %s to become a string\n" % (name + "/" + key)) entry[key] = json.dumps(val) return False check_for_funny_keys(data) try: check_schema(linecnt, data, coerce=True) except Exception as err: sys.stderr.write('[%d] oops, err=%s, failed in check_schema %s\n' % (linecnt, str(err), json.dumps(data, indent=4))) sys.stderr.write(traceback.format_exc()) return
def do_rephrase(data, do_schema_check=True, linecnt=0): # add course_id? if 'course_id' not in data: cid = data.get('context',{}).get('course_id','') if cid: data['course_id'] = cid # add module_id? if 'module_id' not in data: add_module_id(data) # ensure event is dict when possible if not 'event_js' in data: try: event = data['event'] if not type(event)==dict: event = json.loads(event) event_js = True except Exception as err: event_js = False data['event'] = event data['event_js'] = event_js #---------------------------------------- # "event" is used for too many kinds of data, with colliding types for fields. # thus, in general, we can't store it as a fixed schema field. # # so we turn "event" into a JSON string. # # for specific kinds of events ("event_type"), we keep a parsed version of # "event", e.g. for problem_* event types. # # store that parsed version as "event_struct" event = None if 'event' in data: event = data['event'] data['event'] = json.dumps(data['event']) # now the real rephrasing event_type = data.get('event_type', None) #---------------------------------------- # types to keep KNOWN_TYPES = ['play_video', 'seq_goto', 'seq_next', 'seq_prev', 'seek_video', 'load_video', 'save_problem_success', 'save_problem_fail', 'reset_problem_success', 'reset_problem_fail', 'show_answer', 'edx.course.enrollment.activated', 'edx.course.enrollment.deactivated', 'edx.course.enrollment.mode_changed', 'edx.course.enrollment.upgrade.succeeded', 'speed_change_video', 'problem_check', 'problem_save', 'problem_reset' ] if (type(event)==dict and (('problem_' in event_type) or event_type in KNOWN_TYPES) and not ('video_embedded' in event_type or 'harvardx.button' in event_type or 'harvardx.' in event_type )): data['event_struct'] = event elif type(event)==dict: # default to always including GET and POST when available data['event_struct'] = {'GET': json.dumps(event.get('GET')), 'POST': json.dumps(event.get('POST'))} data['event_struct']['query'] = event.get('query') else: if 'event_struct' in data: data.pop('event_struct') #---------------------------------------- # special cases if '_id' in data: data['mongoid'] = data['_id']['$oid'] data.pop('_id') if type(event)==dict and 'POST' in event: post_str = json.dumps(event['POST']) event['POST'] = post_str if type(event)==dict and 'GET' in event: get_str = json.dumps(event['GET']) event['GET'] = get_str if event_type in ['problem_check', 'problem_save', 'problem_reset'] and data['event_source']=='browser': if type(event) in [str, unicode]: event = {'data': json.dumps(event)} if type(event) in [str, unicode]: #if event and data['event_js']: # sys.stderr.write('unexpected STRING event: ' + json.dumps(data, indent=4) + '\n') event = {'data': json.dumps(event)} if type(event) in [list]: event = {'data': json.dumps(event)} def make_str(key): if event is not None and 'state' in event: state = event['state'] if key in state: state[key] = json.dumps(state[key]) make_str('input_state') make_str('correct_map') make_str('student_answers') def make_str0(key): ev = event or {} if key in ev: ev[key] = json.dumps(ev[key]) make_str0('correct_map') make_str0('answers') make_str0('submission') make_str0('old_state') make_str0('new_state') make_str0('permutation') make_str0('options_selected') make_str0('corrections') def make_str2(key): context = data.get('context', {}) if key in context: context[key] = json.dumps(context[key]) make_str2('course_user_tags') def move_unknown_fields_from_context_to_context_agent(keys): # needed to handle new fields from mobile client context = data.get('context', {}) agent = {'oldagent': context.get('agent', "")} for key in keys: if '.' in key: (prefix, subkey) = key.split('.',1) if prefix in context: subcontext = context[prefix] if subkey in subcontext: agent[key] = subcontext[subkey] subcontext.pop(subkey) else: if key in context: agent[key] = context[key] context.pop(key) context['agent'] = json.dumps(agent) # 31-Jan-15: handle new "module.usage_key" field in context, e.g.: # # "module": { # "display_name": "Radiation Exposure", # "usage_key": "i4x://MITx/6.00.1x_5/problem/ps03:ps03-Radiation-Exposure" # }, # 28-May-16: context.asides mobile_api_context_fields = ['application', 'client', 'received_at', 'component', "open_in_browser_url", "module.usage_key", "module.original_usage_version", "module.original_usage_key", "asides", ] move_unknown_fields_from_context_to_context_agent(mobile_api_context_fields) #---------------------------------------- # new fields which are not in schema get moved as JSON strings to the pre-existing "mongoid" field, # which is unused except in very old records # do this, for example, for the "referer" and "accept_language" fields def move_fields_to_mongoid(field_paths): ''' field_path is a list of lists which gives the recursive dict keys to traverse to get to the field to move. Move that field, with the path intact, into the mongoid field. ''' mongoid = data.get('mongoid') if not type(mongoid)==dict: mongoid = {'old_mongoid' : mongoid} def move_field_value(ddict, vdict, fp): '''recursively traverse dict to get and move value for specified field path''' key = fp[0] if len(fp)==1: # base case if key in ddict: fval = ddict.get(key) vdict[key] = fval # copy to new values dict ddict.pop(key) # remove key from current path within data dict return fval return None if not key in vdict: vdict[key] = {} return move_field_value(ddict.get(key, {}), vdict[key], fp[1:]) vdict = mongoid for field_path in field_paths: move_field_value(data, vdict, field_path) data['mongoid'] = json.dumps(vdict) # 16-Mar-15: remove event_struct.requested_skip_interval move_fields_to_mongoid([ ['referer'], ['accept_language'], ['event_struct', 'requested_skip_interval'], ['event_struct', 'submitted_answer'], ['event_struct', 'num_attempts'], ['event_struct', 'task_id'], # 05oct15 ['event_struct', 'content'], # 11jan16 ['nonInteraction'], # 24aug15 ['label'], # 24aug15 ['event_struct', 'widget_placement'], # 08may16 ['event_struct', 'tab_count'], # 08may16 ['event_struct', 'current_tab'], # 08may16 ['event_struct', 'target_tab'], # 08may16 ]) #---------------------------------------- # general checks def fix_dash(key): ev = event or {} if key in ev: newkey = key.replace('-', '_') ev[newkey] = ev[key] ev.pop(key) fix_dash('child-id') def check_empty(data, *keys): # print "--> keys=%s, data=%s" % (str(keys), data) key = keys[0] if type(data)==dict and key in data: if len(keys)==1: if data[key] in ["", u'']: # print "---> popped %s" % key data.pop(key) else: check_empty(data[key], *keys[1:]) check_empty(data, 'context', "user_id") data.pop('event_js') # leftover from mongo import script #----------------------------------------- # check for null values in speed_change_video # Error encountered parsing LAC data from Oct. 2013 # Requires that we also be able to convert the value to a float def string_is_float(s): try: float(s) return True except ValueError: return False if data['event_type']=='speed_change_video': if 'event_struct' in data and 'new_speed' in data['event_struct']: # First check if string is float if string_is_float(data['event_struct']['new_speed']): # Second check if value is null if isnan(float(data['event_struct']['new_speed'])): data['event_struct'].pop('new_speed') # check for any funny keys, recursively funny_key_sections = [] def check_for_funny_keys(entry, name='toplevel'): for key, val in entry.iteritems(): if key.startswith('i4x-') or key.startswith('xblock.'): sys.stderr.write("[rephrase] oops, funny key at %s in entry: %s, data=%s\n" % (name, entry, '')) funny_key_sections.append(name) return True if len(key)>25: sys.stderr.write("[rephrase] suspicious key at %s in entry: %s, data=%s\n" % (name, entry, '')) if key[0] in '0123456789': sys.stderr.write("[rephrase] oops, funny key at %s in entry: %s, data=%s\n" % (name, entry, '')) funny_key_sections.append(name) return True if '-' in key or '.' in key: # bad key name! rename it, chaning "-" to "_" newkey = key.replace('-','_').replace('.','__') sys.stderr.write("[rephrase] oops, bad keyname at %s in entry: %s newkey+%s\n" % (name, entry, newkey)) entry[newkey] = val entry.pop(key) key = newkey if type(val)==dict: ret = check_for_funny_keys(val, name + '/' + key) if ret is True: sys.stderr.write(" coercing section %s to become a string\n" % (name+"/"+key) ) entry[key] = json.dumps(val) return False check_for_funny_keys(data) try: check_schema(linecnt, data, coerce=True) except Exception as err: sys.stderr.write('[%d] oops, err=%s, failed in check_schema %s\n' % (linecnt, str(err), json.dumps(data, indent=4))) sys.stderr.write(traceback.format_exc()) return
def do_rephrase(data, do_schema_check=True, linecnt=0): # add course_id? if "course_id" not in data: cid = data.get("context", {}).get("course_id", "") if cid: data["course_id"] = cid # add module_id? if "module_id" not in data: add_module_id(data) # ensure event is dict when possible if not "event_js" in data: try: event = data["event"] if not type(event) == dict: event = json.loads(event) event_js = True except Exception as err: event_js = False data["event"] = event data["event_js"] = event_js # ---------------------------------------- # "event" is used for too many kinds of data, with colliding types for fields. # thus, in general, we can't store it as a fixed schema field. # # so we turn "event" into a JSON string. # # for specific kinds of events ("event_type"), we keep a parsed version of # "event", e.g. for problem_* event types. # # store that parsed version as "event_struct" event = None if "event" in data: event = data["event"] data["event"] = json.dumps(data["event"]) # now the real rephrasing event_type = data.get("event_type", None) # ---------------------------------------- # types to keep KNOWN_TYPES = [ "play_video", "seq_goto", "seq_next", "seq_prev", "seek_video", "load_video", "save_problem_success", "save_problem_fail", "reset_problem_success", "reset_problem_fail", "show_answer", "edx.course.enrollment.activated", "edx.course.enrollment.deactivated", "edx.course.enrollment.mode_changed", "edx.course.enrollment.upgrade.succeeded", "speed_change_video", "problem_check", "problem_save", "problem_reset", ] if ( type(event) == dict and (("problem_" in event_type) or event_type in KNOWN_TYPES) and not ("video_embedded" in event_type or "harvardx.button" in event_type or "harvardx." in event_type) ): data["event_struct"] = event elif type(event) == dict: # default to always including GET and POST when available data["event_struct"] = {"GET": json.dumps(event.get("GET")), "POST": json.dumps(event.get("POST"))} data["event_struct"]["query"] = event.get("query") else: if "event_struct" in data: data.pop("event_struct") # ---------------------------------------- # special cases if "_id" in data: data["mongoid"] = data["_id"]["$oid"] data.pop("_id") if type(event) == dict and "POST" in event: post_str = json.dumps(event["POST"]) event["POST"] = post_str if type(event) == dict and "GET" in event: get_str = json.dumps(event["GET"]) event["GET"] = get_str if event_type in ["problem_check", "problem_save", "problem_reset"] and data["event_source"] == "browser": if type(event) in [str, unicode]: event = {"data": json.dumps(event)} if type(event) in [str, unicode]: # if event and data['event_js']: # sys.stderr.write('unexpected STRING event: ' + json.dumps(data, indent=4) + '\n') event = {"data": json.dumps(event)} if type(event) in [list]: event = {"data": json.dumps(event)} def make_str(key): if event is not None and "state" in event: state = event["state"] if key in state: state[key] = json.dumps(state[key]) make_str("input_state") make_str("correct_map") make_str("student_answers") def make_str0(key): ev = event or {} if key in ev: ev[key] = json.dumps(ev[key]) make_str0("correct_map") make_str0("answers") make_str0("submission") make_str0("old_state") make_str0("new_state") make_str0("permutation") make_str0("options_selected") make_str0("corrections") def make_str2(key): context = data.get("context", {}) if key in context: context[key] = json.dumps(context[key]) make_str2("course_user_tags") def move_unknown_fields_from_context_to_context_agent(keys): # needed to handle new fields from mobile client context = data.get("context", {}) agent = {"oldagent": context.get("agent", "")} for key in keys: if "." in key: (prefix, subkey) = key.split(".", 1) if prefix in context: subcontext = context[prefix] if subkey in subcontext: agent[key] = subcontext[subkey] subcontext.pop(subkey) else: if key in context: agent[key] = context[key] context.pop(key) context["agent"] = json.dumps(agent) # 31-Jan-15: handle new "module.usage_key" field in context, e.g.: # # "module": { # "display_name": "Radiation Exposure", # "usage_key": "i4x://MITx/6.00.1x_5/problem/ps03:ps03-Radiation-Exposure" # }, mobile_api_context_fields = [ "application", "client", "received_at", "component", "open_in_browser_url", "module.usage_key", "module.original_usage_version", "module.original_usage_key", ] move_unknown_fields_from_context_to_context_agent(mobile_api_context_fields) # ---------------------------------------- # new fields which are not in schema get moved as JSON strings to the pre-existing "mongoid" field, # which is unused except in very old records # do this, for example, for the "referer" and "accept_language" fields def move_fields_to_mongoid(field_paths): """ field_path is a list of lists which gives the recursive dict keys to traverse to get to the field to move. Move that field, with the path intact, into the mongoid field. """ mongoid = data.get("mongoid") if not type(mongoid) == dict: mongoid = {"old_mongoid": mongoid} def move_field_value(ddict, vdict, fp): """recursively traverse dict to get and move value for specified field path""" key = fp[0] if len(fp) == 1: # base case if key in ddict: fval = ddict.get(key) vdict[key] = fval # copy to new values dict ddict.pop(key) # remove key from current path within data dict return fval return None if not key in vdict: vdict[key] = {} return move_field_value(ddict.get(key, {}), vdict[key], fp[1:]) vdict = mongoid for field_path in field_paths: move_field_value(data, vdict, field_path) data["mongoid"] = json.dumps(vdict) # 16-Mar-15: remove event_struct.requested_skip_interval move_fields_to_mongoid( [ ["referer"], ["accept_language"], ["event_struct", "requested_skip_interval"], ["event_struct", "submitted_answer"], ["event_struct", "num_attempts"], ["event_struct", "task_id"], # 05oct15 ["nonInteraction"], # 24aug15 ["label"], # 24aug15 ] ) # ---------------------------------------- # general checks def fix_dash(key): ev = event or {} if key in ev: newkey = key.replace("-", "_") ev[newkey] = ev[key] ev.pop(key) fix_dash("child-id") def check_empty(data, *keys): # print "--> keys=%s, data=%s" % (str(keys), data) key = keys[0] if type(data) == dict and key in data: if len(keys) == 1: if data[key] in ["", u""]: # print "---> popped %s" % key data.pop(key) else: check_empty(data[key], *keys[1:]) check_empty(data, "context", "user_id") data.pop("event_js") # leftover from mongo import script # ----------------------------------------- # check for null values in speed_change_video # Error encountered parsing LAC data from Oct. 2013 # Requires that we also be able to convert the value to a float def string_is_float(s): try: float(s) return True except ValueError: return False if data["event_type"] == "speed_change_video": if "event_struct" in data and "new_speed" in data["event_struct"]: # First check if string is float if string_is_float(data["event_struct"]["new_speed"]): # Second check if value is null if isnan(float(data["event_struct"]["new_speed"])): data["event_struct"].pop("new_speed") # check for any funny keys, recursively funny_key_sections = [] def check_for_funny_keys(entry, name="toplevel"): for key, val in entry.iteritems(): if key.startswith("i4x-") or key.startswith("xblock."): sys.stderr.write("[rephrase] oops, funny key at %s in entry: %s, data=%s\n" % (name, entry, "")) funny_key_sections.append(name) return True if len(key) > 25: sys.stderr.write("[rephrase] suspicious key at %s in entry: %s, data=%s\n" % (name, entry, "")) if key[0] in "0123456789": sys.stderr.write("[rephrase] oops, funny key at %s in entry: %s, data=%s\n" % (name, entry, "")) funny_key_sections.append(name) return True if "-" in key or "." in key: # bad key name! rename it, chaning "-" to "_" newkey = key.replace("-", "_").replace(".", "__") sys.stderr.write("[rephrase] oops, bad keyname at %s in entry: %s newkey+%s\n" % (name, entry, newkey)) entry[newkey] = val entry.pop(key) key = newkey if type(val) == dict: ret = check_for_funny_keys(val, name + "/" + key) if ret is True: sys.stderr.write(" coercing section %s to become a string\n" % (name + "/" + key)) entry[key] = json.dumps(val) return False check_for_funny_keys(data) try: check_schema(linecnt, data, coerce=True) except Exception as err: sys.stderr.write( "[%d] oops, err=%s, failed in check_schema %s\n" % (linecnt, str(err), json.dumps(data, indent=4)) ) sys.stderr.write(traceback.format_exc()) return