def update_entries_from_real_tables(modified_pks): for table in TABLES_TO_UPDATE: print('Updating {}...'.format(table['table_name'])) t = table.copy() table = getattr(t['real_schema'], t['table_name']) if t['table_name'] == 'Subject': uuid_field = 'subject_uuid' else: uuid_field = [ f for f in table.heading.secondary_attributes if '_uuid' in f and 'subject' not in f ][0] pks_important = get_important_pks(modified_pks) query = table & [{uuid_field: pk} for pk in pks_important] if query: members = t.pop('members') update_fields(**t, pks=query.fetch('KEY'), insert_to_table=True) if members: for m in members: sub_t = getattr(t['real_schema'], m) if sub_t & query: update_fields(t['real_schema'], t['shadow_schema'], m, (sub_t & query).fetch('KEY'), insert_to_table=True)
def compare_json_dumps(previous_dump='/data/alyxfull.json', latest_dump='/data/alyxfull.json.last', create_files=True, insert_to_table=True, filter_pks_for_unused_models=True, filter_pks_for_unused_session_fields=True): """Compare two json dumps from alyx and created files with the added, deleted, modified fields. Args: previous_dump (json filepath, optional): filepath of alyx json dump of the last ingestion Defaults to /data/alyxfull.json. latest_dump (json filepath, optional): filepath of alyx json dump of the current ingestion. Defaults to '/data/alyxfull.json.last' create_files (bool, optional): whether to create files saving the created, deleted, modified keys. Defaults to True. insert_to_table (bool, optional): whether to insert the result to DataJoint job table. Defaults to True. filter_pks_for_unused_models (bool, optional): filter modified pks in models of interest. Defaults to True. filter_pks_for_unused_session_fields (bool, optional): only keep the modified keys when there is a change in fields of interest. Defaults to True. """ print("Loading first JSON dump...") with open(previous_dump, 'r') as f: data0 = json.load(f) print("Loading second JSON dump...") with open(latest_dump, 'r') as f: data1 = json.load(f) print("Finished loading JSON dumps.") print("Computing differences...") modified_pks = get_modified_pks(data0, data1) print("Finished creating modified keys.") print("Computing created and deleted_keys...") created_pks, deleted_pks = get_created_deleted_pks(data0, data1) print("Finished creating created_pks and deleted_pks.") if filter_pks_for_unused_session_fields: print( 'Filtering modified sessions that does not have a change in fields of interest...' ) modified_pks = filter_modified_keys_session(data0, data1, modified_pks) if filter_pks_for_unused_models: print( 'Remove modified entries in tables data.filerecord and jobs.task') modified_pks_important = get_important_pks(modified_pks) # figure out job date and timezone latest_modified_time = datetime.datetime.fromtimestamp( os.path.getmtime(latest_dump)) d = latest_modified_time.date() t = latest_modified_time.time() previous_modified_time = datetime.datetime.fromtimestamp( os.path.getmtime(previous_dump)) timezone = get_timezone(t) if create_files: suffix = f'_{latest_modified_time.strftime("%Y-%m-%d")}_{timezone}' root_dir = '/data/daily_increments/' print(f"New objects: {len(created_pks)}") with open(f"{root_dir}created_pks_{suffix}.json", "w") as f: json.dump(created_pks, f) print(f"Deleted objects: {len(deleted_pks)}") with open(f"{root_dir}deleted_pks_{suffix}.json", "w") as f: json.dump(deleted_pks, f) print(f"Modified objects: {len(modified_pks)}") with open(f"{root_dir}modified_pks_{suffix}.json", "w") as f: json.dump(modified_pks, f) print(f"Important modified objects: {len(modified_pks_important)}") if filter_pks_for_unused_models: with open(f"{root_dir}modified_pks_important{suffix}.json", "w") as f: json.dump(modified_pks_important, f) if insert_to_table: entry = dict(job_date=d, job_timezone=timezone, alyx_current_timestamp=latest_modified_time, alyx_previous_timestamp=previous_modified_time, created_pks=created_pks, modified_pks=modified_pks, deleted_pks=deleted_pks, session_prefiltered=filter_pks_for_unused_session_fields) if not filter_pks_for_unused_models: modified_pks_important = None job.Job.insert1(dict(**entry, modified_pks_important=modified_pks_important), skip_duplicates=True)