コード例 #1
0
def compare_json_dumps(previous_dump='/data/alyxfull.json',
                       latest_dump='/data/alyxfull.json.last',
                       create_files=True,
                       insert_to_table=True,
                       filter_pks_for_unused_models=True,
                       filter_pks_for_unused_session_fields=True):
    """Compare two json dumps from alyx and created files with the added, deleted, modified fields.

    Args:
        previous_dump (json filepath, optional): filepath of alyx json dump of the last ingestion Defaults to /data/alyxfull.json.
        latest_dump (json filepath, optional): filepath of alyx json dump of the current ingestion. Defaults to '/data/alyxfull.json.last'
        create_files (bool, optional): whether to create files saving the created, deleted, modified keys. Defaults to True.
        insert_to_table (bool, optional): whether to insert the result to DataJoint job table. Defaults to True.
        filter_pks_for_unused_models (bool, optional): filter modified pks in models of interest. Defaults to True.
        filter_pks_for_unused_session_fields (bool, optional): only keep the modified keys when there is a change in fields of interest. Defaults to True.

    """

    print("Loading first JSON dump...")
    with open(previous_dump, 'r') as f:
        data0 = json.load(f)
    print("Loading second JSON dump...")
    with open(latest_dump, 'r') as f:
        data1 = json.load(f)
    print("Finished loading JSON dumps.")

    print("Computing differences...")
    modified_pks = get_modified_pks(data0, data1)

    print("Finished creating modified keys.")
    print("Computing created and deleted_keys...")

    created_pks, deleted_pks = get_created_deleted_pks(data0, data1)

    print("Finished creating created_pks and deleted_pks.")

    if filter_pks_for_unused_session_fields:
        print(
            'Filtering modified sessions that does not have a change in fields of interest...'
        )
        modified_pks = filter_modified_keys_session(data0, data1, modified_pks)

    if filter_pks_for_unused_models:
        print(
            'Remove modified entries in tables data.filerecord and jobs.task')
        modified_pks_important = get_important_pks(modified_pks)

    # figure out job date and timezone
    latest_modified_time = datetime.datetime.fromtimestamp(
        os.path.getmtime(latest_dump))
    d = latest_modified_time.date()
    t = latest_modified_time.time()
    previous_modified_time = datetime.datetime.fromtimestamp(
        os.path.getmtime(previous_dump))

    timezone = get_timezone(t)

    if create_files:
        suffix = f'_{latest_modified_time.strftime("%Y-%m-%d")}_{timezone}'
        root_dir = '/data/daily_increments/'
        print(f"New objects: {len(created_pks)}")
        with open(f"{root_dir}created_pks_{suffix}.json", "w") as f:
            json.dump(created_pks, f)
        print(f"Deleted objects: {len(deleted_pks)}")
        with open(f"{root_dir}deleted_pks_{suffix}.json", "w") as f:
            json.dump(deleted_pks, f)
        print(f"Modified objects: {len(modified_pks)}")
        with open(f"{root_dir}modified_pks_{suffix}.json", "w") as f:
            json.dump(modified_pks, f)
        print(f"Important modified objects: {len(modified_pks_important)}")

        if filter_pks_for_unused_models:
            with open(f"{root_dir}modified_pks_important{suffix}.json",
                      "w") as f:
                json.dump(modified_pks_important, f)

    if insert_to_table:
        entry = dict(job_date=d,
                     job_timezone=timezone,
                     alyx_current_timestamp=latest_modified_time,
                     alyx_previous_timestamp=previous_modified_time,
                     created_pks=created_pks,
                     modified_pks=modified_pks,
                     deleted_pks=deleted_pks,
                     session_prefiltered=filter_pks_for_unused_session_fields)
        if not filter_pks_for_unused_models:
            modified_pks_important = None

        job.Job.insert1(dict(**entry,
                             modified_pks_important=modified_pks_important),
                        skip_duplicates=True)
コード例 #2
0
from ibl_pipeline.process import autoprocess, get_timezone, process_histology, process_qc, populate_wheel

autoprocess.process_new(timezone=get_timezone())
process_histology.main()
process_qc.main()
populate_wheel.main()