from ibl_pipeline.ingest import alyxraw, reference, subject, action, acquisition, data
from ibl_pipeline.ingest import get_raw_field as grf


if __name__ == '__main__':
    # reference.ProjectLabMember
    print('Ingesting reference.ProjectLabMember...')
    projects = alyxraw.AlyxRaw & 'model="subjects.project"'
    users = alyxraw.AlyxRaw.Field & projects & 'fname="users"' & 'fvalue!="None"'
    keys = (alyxraw.AlyxRaw & users).proj(project_uuid='uuid')

    for key in keys:
        key_p = dict()
        key_p['project_name'] = (reference.Project & key).fetch1('project_name')

        user_uuids = grf(key, 'users', multiple_entries=True,
                        model='subjects.project')

        if len(user_uuids):
            for user_uuid in user_uuids:
                if user_uuid == 'None':
                    continue
                key_pl = key_p.copy()
                key_pl['user_name'] = \
                    (reference.LabMember &
                        dict(user_uuid=uuid.UUID(user_uuid))).fetch1(
                            'user_name')

                reference.ProjectLabMember.insert1(key_pl, skip_duplicates=True)

    # subject.AlleleSequence
    print('Ingesting subject.AlleleSequence...')
Example #2
0
def ingest_membership_table(dj_current_table,
                            alyx_parent_model,
                            alyx_field,
                            dj_parent_table,
                            dj_other_table,
                            dj_parent_fields,
                            dj_other_field,
                            dj_parent_uuid_name,
                            dj_other_uuid_name,
                            renamed_other_field_name=None,
                            new_pks=None):
    '''
    Ingest shadow membership table.
    This function works for the pattern that an alyx parent model contain one or multiple entries of one field
    that have the information in the membership table.


    Arguments:  dj_current_table : datajoint table object, current membership table to ingest
                alyx_parent_model: string, model name inside alyx that contains information of the current table.
                alyx_field       : field of alyx that contains information of current table
                dj_parent_table  : datajoint parent table, corresponding to alyx parent model
                dj_other_table   : datajoint other table to fetch the field from
                dj_parent_fields : string or list of strings, field names to be fetched from the parent table
                dj_other_field   : string, the field table to be fetched from the other table
                dj_parent_uuid_name: string, uuid id name of the parent table
                dj_other_uuid_name: string, uuid id name of the other table
                renamed_other_field_name: string the other field name sometimes renamed in the real table,
                                        the default is None if the field is not renamed
                new_pks          : list of strings of valid uuids, this is the new entries to process, the
                                default is None if all entries are inserted.
    '''
    if new_pks:
        restr = [{'uuid': pk} for pk in new_pks if is_valid_uuid(pk)]
    else:
        restr = {}

    alyxraw_to_insert = (alyxraw.AlyxRaw & restr & {
        'model': alyx_parent_model
    }).fetch('KEY')

    if not alyxraw_to_insert:
        return

    alyx_field_entries = alyxraw.AlyxRaw.Field & alyxraw_to_insert & \
                         {'fname': alyx_field} & 'fvalue!="None"'

    keys = (alyxraw.AlyxRaw
            & alyx_field_entries).proj(**{dj_parent_uuid_name: 'uuid'})

    if type(dj_parent_fields) == str:
        dj_parent_fields = [dj_parent_fields]

    for key in keys:

        if not dj_parent_table & key:
            print(
                f'The entry {key} is not parent table {dj_parent_table.__name__}'
            )
            continue

        entry_base = (dj_parent_table & key).fetch(*dj_parent_fields,
                                                   as_dict=True)[0]

        key['uuid'] = key[dj_parent_uuid_name]
        uuids = grf(key,
                    alyx_field,
                    multiple_entries=True,
                    model=alyx_parent_model)
        if len(uuids):
            for uuid in uuids:
                if uuid == 'None':
                    continue
                else:
                    if not dj_other_table & {dj_other_uuid_name: uuid}:
                        print(
                            f'The uuid {uuid} is not datajoint table {dj_other_table.__name__}'
                        )
                        continue
                    entry = entry_base.copy()
                    field_value = (dj_other_table & {
                        dj_other_uuid_name: uuid
                    }).fetch1(dj_other_field)
                    if renamed_other_field_name:
                        entry[renamed_other_field_name] = field_value
                    else:
                        entry[dj_other_field] = field_value

                    dj_current_table.insert1(entry, skip_duplicates=True)
Example #3
0
def main(excluded_tables=[], modified_pks=None):

    kwargs = dict(display_progress=True, suppress_errors=True)

    for t in SHADOW_TABLES:
        if t.__name__ in excluded_tables:
            continue
        print(f'Ingesting shadow table {t.__name__}...')

        if t.__name__ == 'Session' and modified_pks:
            modified_session_keys = [{
                'session_uuid': pk
            } for pk in modified_pks]
            sessions = acquisition.Session & modified_session_keys
            if sessions:
                modified_session_entries = []
                for key in sessions.fetch('KEY'):
                    try:
                        entry = acquisition.Session.create_entry(key)
                        modified_session_entries.append(entry)
                    except:
                        print("Error creating entry for key: {}".format(key))
                if modified_session_entries:
                    t.insert(modified_session_entries,
                             allow_direct_insert=True,
                             replace=True)

        t.populate(**kwargs)

    if 'DataSet' not in excluded_tables:

        print('Ingesting dataset entries...')
        key_source = (alyxraw.AlyxRaw & 'model="data.dataset"').proj(
            dataset_uuid="uuid") - data.DataSet

        data_set = InsertBuffer(data.DataSet)

        for key in tqdm(key_source.fetch('KEY'), position=0):
            key_ds = key.copy()
            key['uuid'] = key['dataset_uuid']

            session = grf(key, 'session')
            if not len(acquisition.Session
                       & dict(session_uuid=uuid.UUID(session))):
                print('Session {} is not in the table acquisition.Session'.
                      format(session))
                print('dataset_uuid: {}'.format(str(key['uuid'])))
                continue

            key_ds['subject_uuid'], key_ds['session_start_time'] = \
                (acquisition.Session &
                    dict(session_uuid=uuid.UUID(session))).fetch1(
                    'subject_uuid', 'session_start_time')

            key_ds['dataset_name'] = grf(key, 'name')

            dt = grf(key, 'dataset_type')
            key_ds['dataset_type_name'] = \
                (data.DataSetType & dict(dataset_type_uuid=uuid.UUID(dt))).fetch1(
                    'dataset_type_name')

            user = grf(key, 'created_by')

            if user != 'None':
                try:
                    key_ds['dataset_created_by'] = \
                        (reference.LabMember & dict(user_uuid=uuid.UUID(user))).fetch1(
                            'user_name')
                except:
                    print(user)
            else:
                key_ds['dataset_created_by'] = None

            format = grf(key, 'data_format')
            key_ds['format_name'] = \
                (data.DataFormat & dict(format_uuid=uuid.UUID(format))).fetch1(
                    'format_name')

            key_ds['created_datetime'] = grf(key, 'created_datetime')

            software = grf(key, 'generating_software')
            if software != 'None':
                key_ds['generating_software'] = software
            else:
                key_ds['generating_software'] = None

            directory = grf(key, 'provenance_directory')
            if directory != 'None':
                key_ds['provenance_directory'] = directory
            else:
                key_ds['provenance_directory'] = None

            md5 = grf(key, 'md5')
            if md5 != 'None':
                key_ds['md5'] = md5
            else:
                key_ds['md5'] = None

            file_size = grf(key, 'file_size')
            if file_size != 'None':
                key_ds['file_size'] = file_size
            else:
                key_ds['file_size'] = None

            data_set.insert1(key_ds)

            if data_set.flush(skip_duplicates=True,
                              allow_direct_insert=True,
                              chunksz=100):
                print('Inserted 100 dataset tuples')

        if data_set.flush(skip_duplicates=True, allow_direct_insert=True):
            print('Inserted all remaining dataset tuples')

    if 'FileRecord' not in excluded_tables:
        print('Ingesting file record entries...')
        records = alyxraw.AlyxRaw & 'model="data.filerecord"'
        repos = (data.DataRepository
                 & 'repo_name LIKE "flatiron%"').fetch('repo_uuid')
        records_flatiron = alyxraw.AlyxRaw.Field & records & \
            'fname = "data_repository"' & [{'fvalue': str(repo)} for repo in repos]
        record_exists = alyxraw.AlyxRaw.Field & records & \
            'fname = "exists"' & 'fvalue="True"'
        key_source = (alyxraw.AlyxRaw & record_exists & records_flatiron).proj(
            record_uuid='uuid') - data.FileRecord

        file_record = InsertBuffer(data.FileRecord)

        for key in tqdm(key_source.fetch('KEY'), position=0):
            key_fr = key.copy()
            key['uuid'] = key['record_uuid']
            key_fr['exists'] = True

            dataset = grf(key, 'dataset')
            if not len(data.DataSet & dict(dataset_uuid=uuid.UUID(dataset))):
                print('Dataset {} is not in the table data.DataSet')
                print('Record_uuid: {}'.format(str(key['uuid'])))
                continue

            key_fr['subject_uuid'], key_fr['session_start_time'], \
                key_fr['dataset_name'] = \
                (data.DataSet & dict(dataset_uuid=uuid.UUID(dataset))).fetch1(
                    'subject_uuid', 'session_start_time', 'dataset_name')

            repo = grf(key, 'data_repository')
            key_fr['repo_name'] = \
                (data.DataRepository & dict(repo_uuid=uuid.UUID(repo))).fetch1(
                    'repo_name')

            key_fr['relative_path'] = grf(key, 'relative_path')

            file_record.insert1(key_fr)

            if file_record.flush(skip_duplicates=True,
                                 allow_direct_insert=True,
                                 chunksz=1000):
                print('Inserted 1000 raw field tuples')

        if file_record.flush(skip_duplicates=True, allow_direct_insert=True):
            print('Inserted all remaining file record tuples')
Example #4
0
from ibl_pipeline.ingest import alyxraw, data, reference, acquisition, QueryBuffer
from ibl_pipeline.ingest import get_raw_field as grf
import uuid
from tqdm import tqdm

# ingest dataset entries
key_source = (alyxraw.AlyxRaw & 'model="data.dataset"').proj(
    dataset_uuid="uuid") - data.DataSet

data_set = QueryBuffer(data.DataSet)

for key in tqdm(key_source.fetch('KEY'), position=0):
    key_ds = key.copy()
    key['uuid'] = key['dataset_uuid']

    session = grf(key, 'session')
    if not len(acquisition.Session & dict(session_uuid=uuid.UUID(session))):
        print('Session {} is not in the table acquisition.Session'.format(
            session))
        print('dataset_uuid: {}'.format(str(key['uuid'])))
        continue

    key_ds['subject_uuid'], key_ds['session_start_time'] = \
        (acquisition.Session &
            dict(session_uuid=uuid.UUID(session))).fetch1(
            'subject_uuid', 'session_start_time')

    key_ds['dataset_name'] = grf(key, 'name')

    dt = grf(key, 'dataset_type')
    key_ds['dataset_type_name'] = \
Example #5
0
import datajoint as dj
import json
from ibl_pipeline.ingest import alyxraw, reference, subject, action, acquisition, data
from ibl_pipeline.ingest import get_raw_field as grf

subjects = alyxraw.AlyxRaw.Field & (alyxraw.AlyxRaw & 'model="subjects.subject"') & 'fname="lab"' & 'fvalue!="None"'

# reference.ProjectLabMember
print('Ingesting reference.ProjectLabMember...')
keys = (alyxraw.AlyxRaw & 'model="subjects.project"').proj(project_uuid='uuid')

for key in keys:
    key_p = dict()
    key_p['project_name'] = (reference.Project & key).fetch1('project_name')

    user_uuids = grf(key, 'users', multiple_entries=True)

    for user_uuid in user_uuids:
        key_pl = key_p.copy()
        key_pl['user_name'] = (reference.LabMember & 'user_uuid="{}"'.format(user_uuid)).fetch1('user_name')
        reference.ProjectLabMember.insert1(key_pl, skip_duplicates=True)


# subject.AlleleSequence
print('Ingesting subject.AlleleSequence...')
keys = (alyxraw.AlyxRaw & 'model="subjects.allele"').proj(allele_uuid='uuid')
for key in keys:
    key_a = dict()
    key_a['allele_name'] = (subject.Allele & key).fetch1('allele_name')
    key['uuid'] = key['allele_uuid']
    sequences = grf(key, 'sequences', multiple_entries=True)
subjects = alyxraw.AlyxRaw.Field & (
    alyxraw.AlyxRaw
    & 'model="subjects.subject"') & 'fname="lab"' & 'fvalue!="None"'

# reference.ProjectLabMember
print('Ingesting reference.ProjectLabMember...')
projects = alyxraw.AlyxRaw & 'model="subjects.project"'
users = alyxraw.AlyxRaw.Field & projects & 'fname="users"' & 'fvalue!="None"'
keys = (alyxraw.AlyxRaw & users).proj(project_uuid='uuid')

for key in keys:
    key_p = dict()
    key_p['project_name'] = (reference.Project & key).fetch1('project_name')

    user_uuids = grf(key,
                     'user_uuids',
                     multiple_entries=True,
                     model='subjects.project')

    for user_uuid in user_uuids:
        key_pl = key_p.copy()
        key_pl['user_name'] = (
            reference.LabMember
            & 'user_uuid="{}"'.format(user_uuid)).fetch1('user_name')
        reference.ProjectLabMember.insert1(key_pl, skip_duplicates=True)

# subject.AlleleSequence
print('Ingesting subject.AlleleSequence...')
keys = (alyxraw.AlyxRaw & 'model="subjects.allele"').proj(allele_uuid='uuid')
for key in keys:
    key_a = dict()
    key_a['allele_name'] = (subject.Allele & key).fetch1('allele_name')