Exemple #1
0
def update_entries_from_real_tables(modified_pks):

    for table in TABLES_TO_UPDATE:

        print('Updating {}...'.format(table['table_name']))
        t = table.copy()
        table = getattr(t['real_schema'], t['table_name'])

        if t['table_name'] == 'Subject':
            uuid_field = 'subject_uuid'
        else:
            uuid_field = [
                f for f in table.heading.secondary_attributes
                if '_uuid' in f and 'subject' not in f
            ][0]

        pks_important = get_important_pks(modified_pks)

        query = table & [{uuid_field: pk} for pk in pks_important]

        if query:
            members = t.pop('members')
            update_fields(**t, pks=query.fetch('KEY'), insert_to_table=True)

            if members:
                for m in members:
                    sub_t = getattr(t['real_schema'], m)
                    if sub_t & query:
                        update_fields(t['real_schema'],
                                      t['shadow_schema'],
                                      m, (sub_t & query).fetch('KEY'),
                                      insert_to_table=True)
def compare_json_dumps(previous_dump='/data/alyxfull.json',
                       latest_dump='/data/alyxfull.json.last',
                       create_files=True,
                       insert_to_table=True,
                       filter_pks_for_unused_models=True,
                       filter_pks_for_unused_session_fields=True):
    """Compare two json dumps from alyx and created files with the added, deleted, modified fields.

    Args:
        previous_dump (json filepath, optional): filepath of alyx json dump of the last ingestion Defaults to /data/alyxfull.json.
        latest_dump (json filepath, optional): filepath of alyx json dump of the current ingestion. Defaults to '/data/alyxfull.json.last'
        create_files (bool, optional): whether to create files saving the created, deleted, modified keys. Defaults to True.
        insert_to_table (bool, optional): whether to insert the result to DataJoint job table. Defaults to True.
        filter_pks_for_unused_models (bool, optional): filter modified pks in models of interest. Defaults to True.
        filter_pks_for_unused_session_fields (bool, optional): only keep the modified keys when there is a change in fields of interest. Defaults to True.

    """

    print("Loading first JSON dump...")
    with open(previous_dump, 'r') as f:
        data0 = json.load(f)
    print("Loading second JSON dump...")
    with open(latest_dump, 'r') as f:
        data1 = json.load(f)
    print("Finished loading JSON dumps.")

    print("Computing differences...")
    modified_pks = get_modified_pks(data0, data1)

    print("Finished creating modified keys.")
    print("Computing created and deleted_keys...")

    created_pks, deleted_pks = get_created_deleted_pks(data0, data1)

    print("Finished creating created_pks and deleted_pks.")

    if filter_pks_for_unused_session_fields:
        print(
            'Filtering modified sessions that does not have a change in fields of interest...'
        )
        modified_pks = filter_modified_keys_session(data0, data1, modified_pks)

    if filter_pks_for_unused_models:
        print(
            'Remove modified entries in tables data.filerecord and jobs.task')
        modified_pks_important = get_important_pks(modified_pks)

    # figure out job date and timezone
    latest_modified_time = datetime.datetime.fromtimestamp(
        os.path.getmtime(latest_dump))
    d = latest_modified_time.date()
    t = latest_modified_time.time()
    previous_modified_time = datetime.datetime.fromtimestamp(
        os.path.getmtime(previous_dump))

    timezone = get_timezone(t)

    if create_files:
        suffix = f'_{latest_modified_time.strftime("%Y-%m-%d")}_{timezone}'
        root_dir = '/data/daily_increments/'
        print(f"New objects: {len(created_pks)}")
        with open(f"{root_dir}created_pks_{suffix}.json", "w") as f:
            json.dump(created_pks, f)
        print(f"Deleted objects: {len(deleted_pks)}")
        with open(f"{root_dir}deleted_pks_{suffix}.json", "w") as f:
            json.dump(deleted_pks, f)
        print(f"Modified objects: {len(modified_pks)}")
        with open(f"{root_dir}modified_pks_{suffix}.json", "w") as f:
            json.dump(modified_pks, f)
        print(f"Important modified objects: {len(modified_pks_important)}")

        if filter_pks_for_unused_models:
            with open(f"{root_dir}modified_pks_important{suffix}.json",
                      "w") as f:
                json.dump(modified_pks_important, f)

    if insert_to_table:
        entry = dict(job_date=d,
                     job_timezone=timezone,
                     alyx_current_timestamp=latest_modified_time,
                     alyx_previous_timestamp=previous_modified_time,
                     created_pks=created_pks,
                     modified_pks=modified_pks,
                     deleted_pks=deleted_pks,
                     session_prefiltered=filter_pks_for_unused_session_fields)
        if not filter_pks_for_unused_models:
            modified_pks_important = None

        job.Job.insert1(dict(**entry,
                             modified_pks_important=modified_pks_important),
                        skip_duplicates=True)