Beispiel #1
0
def ingest_all():

    ib_brainregion = QueryBuffer(reference_ingest.BrainRegion)

    for key in tqdm(keys, position=0):
        fields = key['fields']
        graph_order = atlas[atlas['id'] == key['pk']]['graph_order']

        if np.isnan(graph_order.to_list()[0]):
            graph_order = None
        else:
            graph_order = int(graph_order)

        ib_brainregion.add_to_queue1(
            dict(brain_region_pk=key['pk'],
                 acronym=fields['acronym'],
                 brain_region_name=fields['name'],
                 parent=fields['parent'],
                 brain_region_level=fields['level'],
                 graph_order=graph_order))
        if ib_brainregion.flush_insert(skip_duplicates=True, chunksz=1000):
            print('Inserted 1000 raw tuples.')

    if ib_brainregion.flush_insert(skip_duplicates=True):
        print('Inserted all remaining raw field tuples')
Beispiel #2
0
def get_important_pks(pks, return_original_dict=False):
    '''
    Filter out modified keys that belongs to data.filerecord and jobs.task
    :params modified_keys: list of pks
    :params optional return original_dict: boolean, if True, return the list of dictionaries with uuids to be the key
    :returns pks_important: list of filtered pks
    :returns pks_dict: list of dictionary with uuid as the key
    '''

    pks = [pk for pk in pks if is_valid_uuid(pk)]
    pks_dict = [{'uuid': pk} for pk in pks]

    models_ignored = '"data.dataset", "data.filerecord", "jobs.task", "actions.wateradministration", "experiments.trajectoryestimate", "experiments.channel"'

    if len(pks) < 1000:
        pks_unimportant = [
            str(pk['uuid'])
            for pk in (alyxraw.AlyxRaw &
                       f'model in ({models_ignored})' &
                       pks_dict).fetch('KEY')]
    else:
        buffer = QueryBuffer(
            alyxraw.AlyxRaw & f'model in ({models_ignored})')
        for pk in tqdm(pks_dict):
            buffer.add_to_queue1(pk)
            buffer.flush_fetch('KEY', chunksz=200)

        buffer.flush_fetch('KEY')
        pks_unimportant = [str(pk['uuid']) for pk in buffer.fetched_results]

    pks_important = list(set(pks) - set(pks_unimportant))

    if return_original_dict:
        return pks_important, pks_dict
    else:
        return pks_important
Beispiel #3
0
def insert_alyx_entries_model(alyx_model, backtrack_days=None):
    """Insert alyx entries for a particular alyx model

    Args:
        alyx_model (django.model object): alyx model
        backtrack_days (int, optional): number of days the data are within to backtrack and ingest.
    """
    if backtrack_days:
        # only ingest the latest data
        date_cut = datetime.datetime.strptime(
                os.getenv('ALYX_DL_DATE'), '%Y-%m-%d').date() - \
            datetime.timedelta(days=backtrack_days)
        if alyx_model in get_tables_with_auto_datetime():
            entries = alyx_model.objects.filter(
                auto_datetime__date__gte=date_cut)
        elif alyx_model == data.models.FileRecord:
            entries = alyx_model.objects.filter(
                dataset__auto_datetime__date__gte=date_cut, exists=True)
        else:
            entries = alyx_model.objects.all()
    elif alyx_model == data.models.FileRecord:
        entries = alyx_model.objects.filter(exists=True)
    else:
        entries = alyx_model.objects.all()

    # ingest into main table
    model_name = alyx_model._meta.db_table.replace('_', '.')
    pk_list = entries.values_list('id', flat=True)

    alyxraw.AlyxRaw.insert([dict(uuid=s, model=model_name) for s in pk_list],
                           skip_duplicates=True)

    # ingest into part table
    ib_part = QueryBuffer(alyxraw.AlyxRaw.Field)
    for r in tqdm(entries.values()):
        try:
            field_entry = dict(uuid=r['id'])
            for field_name, field_value in r.items():
                if field_name == 'id':
                    continue
                field_entry['fname'] = field_name
                if field_name == 'json' and field_value:
                    field_entry['value_idx'] = 0
                    field_entry['fvalue'] = json.dumps(field_value)
                    if len(field_entry['fvalue']) < 10000:
                        ib_part.add_to_queue1(field_entry)
                    else:
                        continue
                elif field_name == 'narrative' and field_value is not None:
                    # filter out emoji
                    emoji_pattern = re.compile(
                        "["
                        u"\U0001F600-\U0001F64F"  # emoticons
                        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                        u"\U0001F680-\U0001F6FF"  # transport & map symbols
                        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                        u"\U00002702-\U000027B0"
                        u"\U000024C2-\U0001F251"
                        "]+",
                        flags=re.UNICODE)

                    field_entry['value_idx'] = 0
                    field_entry['fvalue'] = emoji_pattern.sub(r'', field_value)
                elif (not field_value) or \
                        (isinstance(field_value, float) and math.isnan(field_value)):
                    field_entry['value_idx'] = 0
                    field_entry['fvalue'] = 'None'
                    ib_part.add_to_queue1(field_entry)
                elif isinstance(field_value, list) and \
                        (isinstance(field_value[0], dict) or isinstance(field_value[0], str)):
                    for value_idx, value in enumerate(field_value):
                        field_entry['value_idx'] = value_idx
                        field_entry['fvalue'] = str(value)
                        ib_part.add_to_queue1(field_entry)
                else:
                    field_entry['value_idx'] = 0
                    field_entry['fvalue'] = str(field_value)
                    ib_part.add_to_queue1(field_entry)

                if ib_part.flush_insert(skip_duplicates=True, chunksz=10000):
                    logger.log(25, 'Inserted 10000 raw field tuples')

        except Exception as e:
            logger.log(
                25, 'Problematic entry {} of model {} with error {}'.format(
                    r['id'], model_name, str(e)))

    if ib_part.flush_insert(skip_duplicates=True):
        logger.log(25, 'Inserted all remaining raw field tuples')
def delete_non_published_records():

    with dj.config(safemode=False):

        logger.log(25, 'Deleting non-published probe insertions...')
        probe_insertion_table = QueryBuffer(ephys.ProbeInsertion)
        for key in tqdm(
                (ephys.ProbeInsertion - public.PublicProbeInsertion - ephys.DefaultCluster).fetch('KEY')):
            probe_insertion_table.add_to_queue1(key)
            if probe_insertion_table.flush_delete(quick=False, chunksz=100):
                logger.log(25, 'Deleted 100 probe insertions')

        probe_insertion_table.flush_delete(quick=False)
        logger.log(25, 'Deleted the rest of the probe insertions')

        logger.log(25, 'Deleting non-published sessions...')
        session_table = QueryBuffer(acquisition.Session)
        for key in tqdm(
                (acquisition.Session - public.PublicSession - behavior.TrialSet).fetch('KEY')):
            session_table.add_to_queue1(key)
            if session_table.flush_delete(quick=False, chunksz=100):
                logger.log(25, 'Deleted 100 sessions')

        session_table.flush_delete(quick=False)
        logger.log(25, 'Deleted the rest of the sessions')

        logger.log(25, 'Deleting non-published subjects...')
        subjs = subject.Subject & acquisition.Session

        for key in tqdm(
                (subject.Subject - public.PublicSubjectUuid - subjs.proj()).fetch('KEY')):
            (subject.Subject & key).delete()
Beispiel #5
0
    # if no argument given, assume a canonical file location and name
    filename = path.join('/data', 'alyxfull.json')
else:
    filename = path.join(dir_name, sys.argv[1])

with open(filename, 'r') as fid:
    keys = json.load(fid)

# remove invalid uuid from unused tables
keys = [
    key for key in keys if key['model'] not in
    ['auth.group', 'sessions.session', 'authtoken.token']
]

# use insert buffer to speed up the insersion process
ib_main = QueryBuffer(alyxraw.AlyxRaw)
ib_part = QueryBuffer(alyxraw.AlyxRaw.Field)

# insert into AlyxRaw table
for key in keys:
    ib_main.add_to_queue1(dict(uuid=uuid.UUID(key['pk']), model=key['model']))
    if ib_main.flush_insert(skip_duplicates=True, chunksz=10000):
        logger.debug('Inserted 10000 raw tuples.')

if ib_main.flush_insert(skip_duplicates=True):
    logger.debug('Inserted remaining raw tuples')

# insert into the part table AlyxRaw.Field
for key in keys:
    key_field = dict(uuid=uuid.UUID(key['pk']))
    for field_name, field_value in key['fields'].items():
Beispiel #6
0
from ibl_pipeline.ingest import alyxraw, data, reference, acquisition, QueryBuffer
from ibl_pipeline.ingest import get_raw_field as grf
import uuid
from tqdm import tqdm

# ingest dataset entries
key_source = (alyxraw.AlyxRaw & 'model="data.dataset"').proj(
    dataset_uuid="uuid") - data.DataSet

data_set = QueryBuffer(data.DataSet)

for key in tqdm(key_source.fetch('KEY'), position=0):
    key_ds = key.copy()
    key['uuid'] = key['dataset_uuid']

    session = grf(key, 'session')
    if not len(acquisition.Session & dict(session_uuid=uuid.UUID(session))):
        print('Session {} is not in the table acquisition.Session'.format(
            session))
        print('dataset_uuid: {}'.format(str(key['uuid'])))
        continue

    key_ds['subject_uuid'], key_ds['session_start_time'] = \
        (acquisition.Session &
            dict(session_uuid=uuid.UUID(session))).fetch1(
            'subject_uuid', 'session_start_time')

    key_ds['dataset_name'] = grf(key, 'name')

    dt = grf(key, 'dataset_type')
    key_ds['dataset_type_name'] = \
Beispiel #7
0
            the table acquisition.Session'.format(parent_session))
        continue
    key_cs['parent_session_start_time'] = \
        (acquisition.Session &
            dict(session_uuid=uuid.UUID(parent_session))).fetch1(
                'session_start_time')
    acquisition.ChildSession.insert1(key_cs, skip_duplicates=True)

# acquisition.SessionUser
print('Ingesting acquisition.SessionUser...')
sessions = alyxraw.AlyxRaw & 'model="actions.session"'
sessions_with_users = alyxraw.AlyxRaw.Field & sessions & \
    'fname="users"' & 'fvalue!="None"'
keys = (alyxraw.AlyxRaw & sessions_with_users).proj(session_uuid='uuid')

session_user = QueryBuffer(acquisition.SessionUser)

for key in tqdm(keys, position=0):

    key['uuid'] = key['session_uuid']

    if not len(acquisition.Session & key):
        print('Session {} is not in the table acquisition.Session'.format(
            key['session_uuid']))
        continue

    key_s = dict()
    key_s['subject_uuid'], key_s['session_start_time'] = \
        (acquisition.Session & key).fetch1(
            'subject_uuid', 'session_start_time')
def ingest_membership_table(dj_current_table,
                            alyx_parent_model,
                            alyx_field,
                            dj_parent_table,
                            dj_other_table,
                            dj_parent_fields,
                            dj_other_field,
                            dj_parent_uuid_name,
                            dj_other_uuid_name,
                            renamed_other_field_name=None,
                            new_pks=None):
    '''
    Ingest shadow membership table.
    This function works for the pattern that an alyx parent model contain one or multiple entries of one field
    that have the information in the membership table.


    Arguments:  dj_current_table : datajoint table object, current membership table to ingest
                alyx_parent_model: string, model name inside alyx that contains information of the current table.
                alyx_field       : field of alyx that contains information of current table
                dj_parent_table  : datajoint parent table, corresponding to alyx parent model
                dj_other_table   : datajoint other table to fetch the field from
                dj_parent_fields : string or list of strings, field names to be fetched from the parent table
                dj_other_field   : string, the field table to be fetched from the other table
                dj_parent_uuid_name: string, uuid id name of the parent table
                dj_other_uuid_name: string, uuid id name of the other table
                renamed_other_field_name: string the other field name sometimes renamed in the real table,
                                        the default is None if the field is not renamed
                new_pks          : list of strings of valid uuids, this is the new entries to process, the
                                default is None if all entries are inserted.
    '''
    if new_pks:
        restr = [{'uuid': pk} for pk in new_pks if is_valid_uuid(pk)]
    else:
        restr = {}

    if len(restr) > 1000:
        print('More than 1000 entries to insert, using buffer...')
        buffer = QueryBuffer(alyxraw.AlyxRaw & {'model': alyx_parent_model})
        for r in tqdm(restr):
            buffer.add_to_queue1(r)
            buffer.flush_fetch('KEY', chunksz=200)
        buffer.flush_fetch('KEY')
        alyxraw_to_insert = buffer.fetched_results

    else:
        alyxraw_to_insert = (alyxraw.AlyxRaw & restr & {
            'model': alyx_parent_model
        }).fetch('KEY')

    if not alyxraw_to_insert:
        return

    alyx_field_entries = alyxraw.AlyxRaw.Field & alyxraw_to_insert & \
        {'fname': alyx_field} & 'fvalue!="None"'

    keys = (alyxraw.AlyxRaw
            & alyx_field_entries).proj(**{dj_parent_uuid_name: 'uuid'})

    if type(dj_parent_fields) == str:
        dj_parent_fields = [dj_parent_fields]

    insert_buffer = QueryBuffer(dj_current_table)

    for key in keys:

        if not dj_parent_table & key:
            print(
                f'The entry {key} is not parent table {dj_parent_table.__name__}'
            )
            continue

        entry_base = (dj_parent_table & key).fetch(*dj_parent_fields,
                                                   as_dict=True)[0]

        key['uuid'] = key[dj_parent_uuid_name]
        uuids = grf(key,
                    alyx_field,
                    multiple_entries=True,
                    model=alyx_parent_model)
        if len(uuids):
            for uuid in uuids:
                if uuid == 'None':
                    continue
                else:
                    if not dj_other_table & {dj_other_uuid_name: uuid}:
                        print(
                            f'The uuid {uuid} is not datajoint table {dj_other_table.__name__}'
                        )
                        continue
                    entry = entry_base.copy()
                    field_value = (dj_other_table & {
                        dj_other_uuid_name: uuid
                    }).fetch1(dj_other_field)
                    if renamed_other_field_name:
                        entry[renamed_other_field_name] = field_value
                    else:
                        entry[dj_other_field] = field_value

                    insert_buffer.add_to_queue1(entry)
                    insert_buffer.flush_insert(skip_duplicates=True,
                                               chunksz=1000)

    insert_buffer.flush_insert(skip_duplicates=True)
Beispiel #9
0
def main(excluded_tables=[], modified_pks=None):

    kwargs = dict(display_progress=True, suppress_errors=True)

    for t in SHADOW_TABLES:
        if t.__name__ in excluded_tables:
            continue
        print(f'Ingesting shadow table {t.__name__}...')

        # if a session entry is modified, replace the entry without deleting
        # this is to keep the session entry when uuid is not changed but start time changed
        # by one sec. We don't update start_time in alyxraw in this case.
        if t.__name__ == 'Session' and modified_pks:
            modified_session_keys = [{
                'session_uuid': pk
            } for pk in modified_pks]
            sessions = acquisition.Session & modified_session_keys
            if sessions:
                modified_session_entries = []
                for key in sessions.fetch('KEY'):
                    try:
                        entry = acquisition.Session.create_entry(key)
                        modified_session_entries.append(entry)
                    except:
                        print("Error creating entry for key: {}".format(key))
                if modified_session_entries:
                    try:
                        t.insert(modified_session_entries,
                                 allow_direct_insert=True,
                                 replace=True)
                    except DataJointError:
                        for entry in modified_session_entries:
                            t.insert1(entry,
                                      allow_direct_insert=True,
                                      replace=True)

        t.populate(**kwargs)

    if 'DataSet' not in excluded_tables:

        print('Ingesting dataset entries...')
        key_source = (alyxraw.AlyxRaw & 'model="data.dataset"').proj(
            dataset_uuid="uuid") - data.DataSet

        data_set = QueryBuffer(data.DataSet)

        for key in tqdm(key_source.fetch('KEY'), position=0):
            key_ds = key.copy()
            key['uuid'] = key['dataset_uuid']

            session = grf(key, 'session')
            if not len(acquisition.Session
                       & dict(session_uuid=uuid.UUID(session))):
                print('Session {} is not in the table acquisition.Session'.
                      format(session))
                print('dataset_uuid: {}'.format(str(key['uuid'])))
                continue

            key_ds['subject_uuid'], key_ds['session_start_time'] = \
                (acquisition.Session &
                    dict(session_uuid=uuid.UUID(session))).fetch1(
                    'subject_uuid', 'session_start_time')

            key_ds['dataset_name'] = grf(key, 'name')

            dt = grf(key, 'dataset_type')
            key_ds['dataset_type_name'] = \
                (data.DataSetType & dict(dataset_type_uuid=uuid.UUID(dt))).fetch1(
                    'dataset_type_name')

            user = grf(key, 'created_by')

            if user != 'None':
                try:
                    key_ds['dataset_created_by'] = \
                        (reference.LabMember & dict(user_uuid=uuid.UUID(user))).fetch1(
                            'user_name')
                except:
                    print(user)
            else:
                key_ds['dataset_created_by'] = None

            format = grf(key, 'data_format')
            key_ds['format_name'] = \
                (data.DataFormat & dict(format_uuid=uuid.UUID(format))).fetch1(
                    'format_name')

            key_ds['created_datetime'] = grf(key, 'created_datetime')

            software = grf(key, 'generating_software')
            if software != 'None':
                key_ds['generating_software'] = software
            else:
                key_ds['generating_software'] = None

            directory = grf(key, 'provenance_directory')
            if directory != 'None':
                key_ds['provenance_directory'] = directory
            else:
                key_ds['provenance_directory'] = None

            md5 = grf(key, 'md5')
            if md5 != 'None':
                key_ds['md5'] = md5
            else:
                key_ds['md5'] = None

            file_size = grf(key, 'file_size')
            if file_size != 'None':
                key_ds['file_size'] = file_size
            else:
                key_ds['file_size'] = None

            data_set.add_to_queue1(key_ds)

            if data_set.flush_insert(skip_duplicates=True,
                                     allow_direct_insert=True,
                                     chunksz=100):
                print('Inserted 100 dataset tuples')

        if data_set.flush_insert(skip_duplicates=True,
                                 allow_direct_insert=True):
            print('Inserted all remaining dataset tuples')

    if 'FileRecord' not in excluded_tables:
        print('Ingesting file record entries...')
        records = alyxraw.AlyxRaw & 'model="data.filerecord"'
        repos = (data.DataRepository
                 & 'repo_name LIKE "flatiron%"').fetch('repo_uuid')
        records_flatiron = alyxraw.AlyxRaw.Field & records & \
            'fname = "data_repository"' & [{'fvalue': str(repo)} for repo in repos]
        record_exists = alyxraw.AlyxRaw.Field & records & \
            'fname = "exists"' & 'fvalue="True"'
        key_source = (alyxraw.AlyxRaw & record_exists & records_flatiron).proj(
            record_uuid='uuid') - data.FileRecord

        file_record = QueryBuffer(data.FileRecord)

        for key in tqdm(key_source.fetch('KEY'), position=0):
            key_fr = key.copy()
            key['uuid'] = key['record_uuid']
            key_fr['exists'] = True

            dataset = grf(key, 'dataset')
            if not len(data.DataSet & dict(dataset_uuid=uuid.UUID(dataset))):
                print('Dataset {} is not in the table data.DataSet')
                print('Record_uuid: {}'.format(str(key['uuid'])))
                continue

            key_fr['subject_uuid'], key_fr['session_start_time'], \
                key_fr['dataset_name'] = \
                (data.DataSet & dict(dataset_uuid=uuid.UUID(dataset))).fetch1(
                    'subject_uuid', 'session_start_time', 'dataset_name')

            repo = grf(key, 'data_repository')
            key_fr['repo_name'] = \
                (data.DataRepository & dict(repo_uuid=uuid.UUID(repo))).fetch1(
                    'repo_name')

            key_fr['relative_path'] = grf(key, 'relative_path')

            file_record.add_to_queue1(key_fr)

            if file_record.flush_insert(skip_duplicates=True,
                                        allow_direct_insert=True,
                                        chunksz=1000):
                print('Inserted 1000 raw field tuples')

        if file_record.flush_insert(skip_duplicates=True,
                                    allow_direct_insert=True):
            print('Inserted all remaining file record tuples')
Beispiel #10
0
def insert_to_alyxraw(keys):

    # use insert buffer to speed up the insertion process
    ib_main = QueryBuffer(alyxraw.AlyxRaw)
    ib_part = QueryBuffer(alyxraw.AlyxRaw.Field)

    # insert into AlyxRaw table
    for key in tqdm(keys, position=0):
        try:
            pk = uuid.UUID(key['pk'])
        except Exception:
            print('Error for key: {}'.format(key))
            continue

        ib_main.add_to_queue1(dict(uuid=pk, model=key['model']))
        if ib_main.flush_insert(skip_duplicates=True, chunksz=10000):
            logger.debug('Inserted 10000 raw tuples.')
            # print('Inserted 10000 raw tuples.')

    if ib_main.flush_insert(skip_duplicates=True):
        logger.debug('Inserted remaining raw tuples')
        # print('Inserted remaining raw tuples')

    # insert into the part table AlyxRaw.Field
    for ikey, key in tqdm(enumerate(keys), position=0):
        try:
            try:
                pk = uuid.UUID(key['pk'])
            except ValueError:
                print('Error for key: {}'.format(key))
                continue

            key_field = dict(uuid=uuid.UUID(key['pk']))
            for field_name, field_value in key['fields'].items():
                key_field = dict(key_field, fname=field_name)

                if field_name == 'json' and field_value is not None:

                    key_field['value_idx'] = 0
                    key_field['fvalue'] = json.dumps(field_value)
                    if len(key_field['fvalue']) < 10000:
                        ib_part.add_to_queue1(key_field)
                    else:
                        continue
                if field_name == 'narrative' and field_value is not None:
                    # filter out emoji
                    emoji_pattern = re.compile(
                        "["
                        u"\U0001F600-\U0001F64F"  # emoticons
                        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                        u"\U0001F680-\U0001F6FF"  # transport & map symbols
                        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                        u"\U00002702-\U000027B0"
                        u"\U000024C2-\U0001F251"
                        "]+",
                        flags=re.UNICODE)

                    key_field['value_idx'] = 0
                    key_field['fvalue'] = emoji_pattern.sub(r'', field_value)

                elif field_value is None or field_value == '' or field_value == [] or \
                        (isinstance(field_value, float) and math.isnan(field_value)):
                    key_field['value_idx'] = 0
                    key_field['fvalue'] = 'None'
                    ib_part.add_to_queue1(key_field)

                elif type(field_value) is list and \
                        (type(field_value[0]) is dict or type(field_value[0]) is str):
                    for value_idx, value in enumerate(field_value):
                        key_field['value_idx'] = value_idx
                        key_field['fvalue'] = str(value)
                        ib_part.add_to_queue1(key_field)
                else:
                    key_field['value_idx'] = 0
                    key_field['fvalue'] = str(field_value)
                    ib_part.add_to_queue1(key_field)

                if ib_part.flush_insert(skip_duplicates=True, chunksz=10000):
                    logger.debug('Inserted 10000 raw field tuples')
                    # print('Inserted 10000 raw field tuples')
        except Exception as e:
            print('Problematic entry:{}'.format(ikey))
            raise

    if ib_part.flush_insert(skip_duplicates=True):
        logger.debug('Inserted all remaining raw field tuples')
Beispiel #11
0
def delete_histology_alyx_shadow(verbose=False):

    CHANNEL_TABLES = [
        histology_ingest.ChannelBrainLocationTemp,
        alyxraw.AlyxRaw.Field,
        alyxraw.AlyxRaw
    ]

    channel_loc_keys = update_utils.get_deleted_keys('experiments.channel')
    for t in CHANNEL_TABLES:
        print(f'Deleting from table {t.__name__}')
        uuid_name = t.heading.primary_key[0]
        keys = [{uuid_name: k['uuid']} for k in tqdm(channel_loc_keys)]
        table = QueryBuffer(t)

        for k in tqdm(keys, position=0):
            table.add_to_queue1(k)
            if table.flush_delete(chunksz=1000, quick=True) and verbose:
                print(f'Deleted 1000 entries from {t.__name__}')

        table.flush_delete(quick=True)

    traj_keys = update_utils.get_deleted_keys('experiments.trajectoryestimate') + \
        update_utils.get_updated_keys('experiments.trajectoryestimate')

    TRAJ_TABLES = [
        histology_ingest.ProbeTrajectoryTemp,
        alyxraw.AlyxRaw.Field,
        alyxraw.AlyxRaw
    ]

    for t in TRAJ_TABLES:
        uuid_name = t.heading.primary_key[0]
        keys = [{uuid_name: k['uuid']} for k in traj_keys]
        table = QueryBuffer(t)
        for k in tqdm(keys, position=0):
            table.add_to_queue1(k)
            if table.flush_delete(chunksz=1000, quick=True) and verbose:
                print(f'Deleted 1000 entries from {t.__name__}')
        table.flush_delete(quick=True)
Beispiel #12
0
def delete_entries_from_alyxraw(pks_to_be_deleted=[], modified_pks_important=[]):

    '''
    Delete entries from alyxraw and shadow membership_tables, excluding the membership table.
    '''

    print('Deleting alyxraw entries corresponding to file records...')

    if pks_to_be_deleted:
        if len(pks_to_be_deleted) > 5000:
            file_record_fields = alyxraw.AlyxRaw.Field & \
                'fname = "exists"' & 'fvalue = "false"'
        else:
            file_record_fields = alyxraw.AlyxRaw.Field & \
                'fname = "exists"' & 'fvalue = "false"' & \
                [{'uuid': pk} for pk in pks_to_be_deleted]

        for key in tqdm(file_record_fields):
            (alyxraw.AlyxRaw.Field & key).delete_quick()

    if modified_pks_important:
        pk_list = [{'uuid': pk} for pk in modified_pks_important
                   if is_valid_uuid(pk)]
        if len(pk_list) > 1000:

            print('Long pk list, deleting from alyxraw.AlyxRaw ...')
            alyxraw_buffer = QueryBuffer(alyxraw.AlyxRaw & 'model != "actions.session"')
            for pk in tqdm(pk_list):
                alyxraw_buffer.add_to_queue1(pk)
                alyxraw_buffer.flush_delete(chunksz=50, quick=False)

            alyxraw_buffer.flush_delete(quick=False)

            # Delete session fields without deleting the AlyxRaw entries and start time field.
            # This is to handle the case where uuid is not changed but start time changed for 1 sec.
            print('Long pk list, deleting from alyxraw.AlyxRaw.Field ...')
            alyxraw_field_buffer = QueryBuffer(
                alyxraw.AlyxRaw.Field & 'fname!="start_time"' &
                (alyxraw.AlyxRaw & 'model="actions.session"'))

            for pk in tqdm(pk_list):
                alyxraw_field_buffer.add_to_queue1(pk)
                alyxraw_field_buffer.flush_delete(chunksz=50, quick=True)
            alyxraw_field_buffer.flush_delete(quick=True)