def compare_json_dumps(previous_dump='/data/alyxfull.json', latest_dump='/data/alyxfull.json.last', create_files=True, insert_to_table=True, filter_pks_for_unused_models=True, filter_pks_for_unused_session_fields=True): """Compare two json dumps from alyx and created files with the added, deleted, modified fields. Args: previous_dump (json filepath, optional): filepath of alyx json dump of the last ingestion Defaults to /data/alyxfull.json. latest_dump (json filepath, optional): filepath of alyx json dump of the current ingestion. Defaults to '/data/alyxfull.json.last' create_files (bool, optional): whether to create files saving the created, deleted, modified keys. Defaults to True. insert_to_table (bool, optional): whether to insert the result to DataJoint job table. Defaults to True. filter_pks_for_unused_models (bool, optional): filter modified pks in models of interest. Defaults to True. filter_pks_for_unused_session_fields (bool, optional): only keep the modified keys when there is a change in fields of interest. Defaults to True. """ print("Loading first JSON dump...") with open(previous_dump, 'r') as f: data0 = json.load(f) print("Loading second JSON dump...") with open(latest_dump, 'r') as f: data1 = json.load(f) print("Finished loading JSON dumps.") print("Computing differences...") modified_pks = get_modified_pks(data0, data1) print("Finished creating modified keys.") print("Computing created and deleted_keys...") created_pks, deleted_pks = get_created_deleted_pks(data0, data1) print("Finished creating created_pks and deleted_pks.") if filter_pks_for_unused_session_fields: print( 'Filtering modified sessions that does not have a change in fields of interest...' ) modified_pks = filter_modified_keys_session(data0, data1, modified_pks) if filter_pks_for_unused_models: print( 'Remove modified entries in tables data.filerecord and jobs.task') modified_pks_important = get_important_pks(modified_pks) # figure out job date and timezone latest_modified_time = datetime.datetime.fromtimestamp( os.path.getmtime(latest_dump)) d = latest_modified_time.date() t = latest_modified_time.time() previous_modified_time = datetime.datetime.fromtimestamp( os.path.getmtime(previous_dump)) timezone = get_timezone(t) if create_files: suffix = f'_{latest_modified_time.strftime("%Y-%m-%d")}_{timezone}' root_dir = '/data/daily_increments/' print(f"New objects: {len(created_pks)}") with open(f"{root_dir}created_pks_{suffix}.json", "w") as f: json.dump(created_pks, f) print(f"Deleted objects: {len(deleted_pks)}") with open(f"{root_dir}deleted_pks_{suffix}.json", "w") as f: json.dump(deleted_pks, f) print(f"Modified objects: {len(modified_pks)}") with open(f"{root_dir}modified_pks_{suffix}.json", "w") as f: json.dump(modified_pks, f) print(f"Important modified objects: {len(modified_pks_important)}") if filter_pks_for_unused_models: with open(f"{root_dir}modified_pks_important{suffix}.json", "w") as f: json.dump(modified_pks_important, f) if insert_to_table: entry = dict(job_date=d, job_timezone=timezone, alyx_current_timestamp=latest_modified_time, alyx_previous_timestamp=previous_modified_time, created_pks=created_pks, modified_pks=modified_pks, deleted_pks=deleted_pks, session_prefiltered=filter_pks_for_unused_session_fields) if not filter_pks_for_unused_models: modified_pks_important = None job.Job.insert1(dict(**entry, modified_pks_important=modified_pks_important), skip_duplicates=True)
from ibl_pipeline.process import autoprocess, get_timezone, process_histology, process_qc, populate_wheel autoprocess.process_new(timezone=get_timezone()) process_histology.main() process_qc.main() populate_wheel.main()