subjects = rd3.get(entity='rd3_freeze1_subject', attributes='id,subjectID,patch', batch_size=10000) statusMsg('File metadata entries pulled: {}'.format(len(files))) statusMsg('Subject metadata entries pulled: {}'.format(len(subjects))) # extract subject ID for file in files: file['subject'] = re.sub( pattern=r'((.[0-9]{4}-[0-9]{2}-[0-9]{2})?(.json))$', repl='', string=file['name']) ids = [file['subject'] for file in files] # update ptch for s in subjects: if s['subjectID'] in ids: patches = ['freeze1_patch1'] for patch in s.get('patch'): patches.append(patch.get('id', None)) s['patch'] = ','.join(list(set(patches))) data = list( map(lambda x: {k: v for k, v in x.items() if k in ['id', 'patch']}, subjects)) # import into RD3 rd3.updateColumn(entity='rd3_freeze1_subject', attr='patch', data=data) rd3.updateColumn(entity='rd3_freeze1_subjectinfo', attr='patch', data=data)
# statusMsg( # 'Not all records were processed. There are', # shipment.nrows - shipmentUpdates.nrows, # 'records remaining.' # ) # else: # statusMsg('All records were processed! :-)') # import # rd3_shipment_updates = dtFrameToRecords(shipmentUpdates) rd3_shipment_updates = dtFrameToRecords(shipment[:, { 'molgenis_id': f.molgenis_id, 'processed': True }]) rd3.updateColumn(entity='rd3_portal_novelomics_shipment', attr='processed', data=rd3_shipment_updates) # ~ 6e.ii ~ # update experiment table experimentUpdates = experiment[functools.reduce(operator.or_, ( f.project_experiment_dataset_id == id for id in labinfo[:, f.experimentID].to_list()[0])), { 'molgenis_id': f.molgenis_id, 'processed': True }] # check processed rows if experimentUpdates.nrows != experiment.nrows: statusMsg('Not all records were processed. There are still',
# Update Patch table with new release info (DO THIS FIRST!) rd3.add(entity='rd3_patch', data={ 'id': patchinfo.get('id'), 'type': patchinfo.get('type'), 'date': patchinfo.get('date'), 'description': patchinfo.get('description') }) # import new orgs; import ERNs if needed, but highly unlikely rd3.importData(entity='rd3_organisation', data=dtFrameToRecords(newOrgs)) # prep data for import into RD3 rd3_subjects = dtFrameToRecords(data=subjects) rd3_subjectInfo = dtFrameToRecords(data=subjectInfo) rd3_samples = dtFrameToRecords(data=samples) rd3_labinfo = dtFrameToRecords(data=labinfo) # import data rd3.importData(entity=f'rd3_{patchinfo["name"]}_subject', data=rd3_subjects) rd3.importData(entity=f'rd3_{patchinfo["name"]}_subjectinfo', data=rd3_subjectInfo) rd3.importData(entity=f'rd3_{patchinfo["name"]}_sample', data=rd3_samples) rd3.importData(entity=f'rd3_{patchinfo["name"]}_labinfo', data=rd3_labinfo) # upodate portal rd3.updateColumn(entity=releaseName, attr='processed', data=dtFrameToRecords(portalUpdates))
# recode attribute newSamplesData['percentageTumorCells'] = dt.Frame([ None if d == 'UK' else d for d in newSamplesData['percentageTumorCells'].to_list()[0] ]) # newSamplesData[:, dt.update( # percentageTumorCells = as_type(f.percentageTumorCells, dt.Type.int8) # )] #/////////////////////////////////////// # ~ 2 ~ # Import data # prep data for import pathologicalState = dtFrameToRecords( newSamplesData[:, ['id', 'pathologicalState']]) percentageTumorCells = dtFrameToRecords( newSamplesData[:, ['id', 'percentageTumorCells']]) # import data rd3.updateColumn(entity='rd3_noveldeepwes_sample', attr='pathologicalState', data=pathologicalState) rd3.updateColumn(entity='rd3_noveldeepwes_sample', attr='percentageTumorCells', data=percentageTumorCells)