class ped_writer(unittest.TestCase): def setUp(self): self.kb = KB(driver='omero')('localhost', 'root', 'romeo') def tearDown(self): pass def test_base(self): def extract_data_sample(group, mset, dsample_name): by_individual = {} for i in self.kb.get_individuals(group): gds = filter(lambda x: x.snpMarkersSet == mset, self.kb.get_data_samples(i, dsample_name)) assert (len(gds) == 1) by_individual[i.id] = gds[0] return by_individual study = self.kb.get_study('TEST01') family = self.kb.get_individuals(study) mset = self.kb.get_snp_markers_set(label='FakeTaqSet01') gds_by_individual = extract_data_sample(study, mset, 'GenotypeDataSample') pw = PedWriter(mset, base_path="./foo") pw.write_map() pw.write_family(study.id, family, gds_by_individual) pw.close()
class ped_writer(unittest.TestCase): def setUp(self): self.kb = KB(driver="omero")("localhost", "root", "romeo") def tearDown(self): pass def test_base(self): def extract_data_sample(group, mset, dsample_name): by_individual = {} for i in self.kb.get_individuals(group): gds = filter(lambda x: x.snpMarkersSet == mset, self.kb.get_data_samples(i, dsample_name)) assert len(gds) == 1 by_individual[i.id] = gds[0] return by_individual study = self.kb.get_study("TEST01") family = self.kb.get_individuals(study) mset = self.kb.get_snp_markers_set(label="FakeTaqSet01") gds_by_individual = extract_data_sample(study, mset, "GenotypeDataSample") pw = PedWriter(mset, base_path="./foo") pw.write_map() pw.write_family(study.id, family, gds_by_individual) pw.close()
class VidMapper(object): def __init__(self, host, user, passwd, logger, study): self.logger = logger self.study = study self.kb = KB(driver="omero")(host, user, passwd) plates = self.kb.get_objects(self.kb.TiterPlate) self.logger.info("fetched %d plates" % len(plates)) self.plate_map = {} self.enroll_map = {} for p in plates: self.plate_map[p.omero_id] = p.barcode s = self.kb.get_study(self.study) enrolls = self.kb.get_enrolled(s) self.logger.info("fetched %d enrollments" % len(enrolls)) for e in enrolls: self.logger.debug('Retrieving wells for %s' % e.studyCode) wells = [ w for w in self.kb.get_vessels_by_individual( e.individual, "PlateWell") ] self.enroll_map[e.studyCode] = wells def map_vid(self, r): en_code = r["source"] pl_barcode = en_code.split("|")[1] try: wells = self.enroll_map[en_code] except KeyError: msg = "%s is not enrolled in %s" % (en_code, self.study) self.logger.error(msg) raise ValueError(msg) self.logger.info("found %d wells for %s" % (len(wells), en_code)) imm_wells = [ w for w in wells if self.plate_map[w.container.omero_id] == pl_barcode ] if len(imm_wells) > 1: msg = ("more than 1 (%d) immuno wells for plate %s" % (len(imm_wells), pl_barcode)) self.logger.error(msg) raise ValueError(msg) elif len(imm_wells) == 0: msg = "no immuno well for plate %s" % pl_barcode self.logger.warn(msg) raise ValueError(msg) else: r["source"] = imm_wells[0].id
class VidMapper(object): def __init__(self, host, user, passwd, logger, study): self.logger = logger self.study = study self.kb = KB(driver="omero")(host, user, passwd) plates = self.kb.get_objects(self.kb.TiterPlate) self.logger.info("fetched %d plates" % len(plates)) self.plate_map = {} self.enroll_map = {} for p in plates: self.plate_map[p.omero_id] = p.barcode s = self.kb.get_study(self.study) enrolls = self.kb.get_enrolled(s) self.logger.info("fetched %d enrollments" % len(enrolls)) for e in enrolls: self.logger.debug("Retrieving wells for %s" % e.studyCode) wells = [w for w in self.kb.get_vessels_by_individual(e.individual, "PlateWell")] self.enroll_map[e.studyCode] = wells def map_vid(self, r): en_code = r["source"] pl_barcode = en_code.split("|")[1] try: wells = self.enroll_map[en_code] except KeyError: msg = "%s is not enrolled in %s" % (en_code, self.study) self.logger.error(msg) raise ValueError(msg) self.logger.info("found %d wells for %s" % (len(wells), en_code)) imm_wells = [w for w in wells if self.plate_map[w.container.omero_id] == pl_barcode] if len(imm_wells) > 1: msg = "more than 1 (%d) immuno wells for plate %s" % (len(imm_wells), pl_barcode) self.logger.error(msg) raise ValueError(msg) elif len(imm_wells) == 0: msg = "no immuno well for plate %s" % pl_barcode self.logger.warn(msg) raise ValueError(msg) else: r["source"] = imm_wells[0].id
The first element of a marker defining tuple is its label, the second is the dbSNP db label, if available, while the third is the marker mask. .. todo:: put a reference to reference documentation Now we will load the markers set definition into Omero.biobank. **Note:** We are considering an ideal case where none of the markers is already in the db. """ study = kb.get_study('TEST01') action = kb.create_an_action(study, doc='importing markers') action.reload() source, context, release = 'foobar', 'fooctx', 'foorel' ref_rs_genome, dbsnp_build = 'foo-rs-genome', 13200 lvs = kb.create_markers(source, context, release, ref_rs_genome, dbsnp_build, taq_man_markers, action) """ .. where lvs is a list of (label, vid) tuples. We can assume that the markers above have been aligned against a
host = args.host or vlu.ome_host() user = args.user or vlu.ome_user() passwd = args.passwd or vlu.ome_passwd() except ValueError, ve: logger.critical(ve) sys.exit(ve) kb = KB(driver='omero')(host, user, passwd) logger.debug('Reading codes from source list') with open(args.source_list) as f: codes = [row.strip() for row in f.readlines()] logger.debug('Found %d codes to discard' % len(codes)) logger.debug('Retrieving enrollments for study %s' % args.source_study) source_enrolls = kb.get_enrolled(kb.get_study(args.source_study)) logger.debug('Retrieved %d enrollments' % len(source_enrolls)) src_st_lookup = {} for sen in source_enrolls: src_st_lookup[sen.studyCode] = sen to_be_discarded = [] discard_st = kb.get_study(args.discard_study) if discard_st is None: logger.critical('Study with label %s not found!' % args.discard_study) sys.exit(2) for c in codes: try: src_st_lookup[c].study = discard_st to_be_discarded.append(src_st_lookup[c])
class TestKB(KBObjectCreator): def __init__(self, name): super(TestKB, self).__init__(name) self.kill_list = [] def setUp(self): self.kb = KB(driver='omero')(OME_HOST, OME_USER, OME_PASS) def tearDown(self): self.kill_list.reverse() for x in self.kill_list: self.kb.delete(x) self.kill_list = [] def check_object(self, o, conf, otype): try: self.assertTrue(isinstance(o, otype)) for k in conf.keys(): v = conf[k] # FIXME this is omero specific... if hasattr(v, 'ome_obj'): self.assertEqual(getattr(o, k).id, v.id) self.assertEqual(type(getattr(o, k)), type(v)) elif hasattr(v, '_id'): self.assertEqual(getattr(o, k)._id, v._id) else: self.assertEqual(getattr(o, k), v) except: pass def test_study(self): conf, s = self.create_study() self.kill_list.append(s.save()) self.check_object(s, conf, self.kb.Study) def test_study_ops(self): conf, s = self.create_study() s.save() xs = self.kb.get_study(conf['label']) self.assertTrue(not xs is None) self.assertEqual(xs.id, s.id) self.assertEqual(xs.label, s.label) self.kb.delete(s) self.assertEqual(self.kb.get_study(conf['label']), None) def test_device(self): conf, d = self.create_device() self.kill_list.append(d.save()) self.check_object(d, conf, self.kb.Device) def test_hardware_device(self): conf, d = self.create_hardware_device() self.kill_list.append(d.save()) self.check_object(d, conf, self.kb.HardwareDevice) def test_device_ops(self): conf, d = self.create_device() d.save() xs = self.kb.get_device(conf['label']) self.assertTrue(not xs is None) self.check_object(xs, conf, self.kb.Device) self.kb.delete(d) self.assertEqual(self.kb.get_device(conf['label']), None) def test_action_setup(self): conf, a = self.create_action_setup() self.kill_list.append(a.save()) self.check_object(a, conf, self.kb.ActionSetup) def test_action(self): conf, action = self.create_action() self.kill_list.append(action.save()) self.check_object(action, conf, self.kb.Action) def test_action_on_vessel(self): conf, action = self.create_action_on_vessel() self.kill_list.append(action.save()) self.check_object(action, conf, self.kb.ActionOnVessel) def test_action_on_data_sample(self): conf, action = self.create_action_on_data_sample() self.kill_list.append(action.save()) self.check_object(action, conf, self.kb.ActionOnDataSample) def test_action_on_data_collection_item(self): conf, action = self.create_action_on_data_collection_item() self.kill_list.append(action.save()) self.check_object(action, conf, self.kb.ActionOnDataSample)
supported on mset0, linked to an individual contained in a given group. Just to keep things simple, we will select, for each individual, the first of the list of known GenotypeDataSample for that mset, if there is at least one, otherwise we will skip the individual. """ def extract_data_sample(group, mset, dsample_name): by_individual = {} for i in kb.get_individuals(group): gds = filter(lambda x: x.snpMarkersSet == mset, kb.get_data_samples(i, dsample_name)) assert(len(gds) == 1) by_individual[i.id] = gds[0] return by_individual group = kb.get_study(label='TEST01') gds0_by_individual = extract_data_sample(group, mset0, 'GenotypeDataSample') """ .. Note that what we have now is a dictionary that maps individual ids to GenotypeDataSample objects and the latter are only handlers to get to the actual genotyping data, not the data itself. We can, now, do a global check on data quality. """ def do_check(s): counts = algo.count_homozygotes(s) mafs = algo.maf(None, counts) hwe = algo.hwe(None, counts)
class App(object): def __init__(self, host, user, passwd, study_label, maker, model, release): self.kb = KB(driver='omero')(host, user, passwd) self.mset = self.kb.get_snp_markers_set(maker, model, release) self.logger = logging.getLogger() if not self.mset: raise ValueError('SNPMarkersSet[%s,%s,%s] has not been defined.' % (maker, model, release)) #-- alabel = 'load_genotypes-setup-%s' % time.time() self.asetup = self.kb.factory.create(self.kb.ActionSetup, { 'label': alabel, 'conf': '' }).save() #-- dmaker, dmodel, drelease = 'CRS4', 'load_genotypes', '0.1' dlabel = '%s-%s-%s' % (dmaker, dmodel, drelease) device = self.kb.get_device(dlabel) if not device: device = self.kb.factory.create( self.kb.Device, { 'label': dlabel, 'maker': dmaker, 'model': dmodel, 'release': drelease }).save() self.device = device #-- FIXME this will break if study is not defined. self.study = self.kb.get_study(study_label) def check_snp_markers_set(self, marker_types, marker_names): self.logger.info('start checking snp_markers_set') mdefs, msetc = self.kb.get_snp_markers_set_content(self.mset) rs_labels = mdefs['rs_label'] for t, n in it.izip(marker_types, marker_names): if t == 'M': if not n in rs_labels: msg = 'marker %s is not in the specified SNPMarkersSet' % n self.logger.critical(msg) raise ValueError(msg) self.logger.info('done checking snp_markers_set') def create_action(self, target): conf = { 'setup': self.asetup, 'device': self.device, 'actionCategory': self.kb.ActionCategory.MEASUREMENT, 'operator': 'Alfred E. Neumann', 'context': self.study, 'target': target, } action = self.kb.factory.create(self.kb.ActionOnVessel, conf).save() return action def create_data_sample(self, action, label): conf = { 'snpMarkersSet': self.mset, 'label': label, 'status': self.kb.DataSampleStatus.USABLE, 'action': action } return self.kb.factory.create(self.kb.GenotypeDataSample, conf).save() def load(self, pedfile, datfile, conf_value=1.0): pr = PedReader(pedfile, datfile, conf_value) self.check_snp_markers_set(pr.marker_types, pr.marker_names) #-- self.logger.info('start loading from pedfile %s' % pedfile.name) for x in pr: sample = self.kb.get_vessel(x['sample_label']) if not sample: self.logger.error('No sample with label %s in VL' % x['sample_label']) continue action = self.create_action(sample) avid = action.id action.unload() data_sample = self.create_data_sample(action, x['label']) data_object = self.kb.add_gdo_data_object(avid, data_sample, x['probs'], x['confs']) self.logger.info('-- loaded %s' % x['label']) self.logger.info('done loading from pedfile %s' % pedfile.name)
class App(object): def __init__(self, host, user, passwd, study_label, maker, model, release): self.kb = KB(driver='omero')(host, user, passwd) self.mset = self.kb.get_snp_markers_set(maker, model, release) self.logger = logging.getLogger() if not self.mset: raise ValueError('SNPMarkersSet[%s,%s,%s] has not been defined.' % (maker, model, release)) #-- alabel = 'load_genotypes-setup-%s' % time.time() self.asetup = self.kb.factory.create(self.kb.ActionSetup, {'label' : alabel, 'conf' : ''}).save() #-- dmaker, dmodel, drelease = 'CRS4', 'load_genotypes', '0.1' dlabel = '%s-%s-%s' % (dmaker, dmodel, drelease) device = self.kb.get_device(dlabel) if not device: device = self.kb.factory.create(self.kb.Device, {'label' : dlabel, 'maker' : dmaker, 'model' : dmodel, 'release' : drelease}).save() self.device = device #-- FIXME this will break if study is not defined. self.study = self.kb.get_study(study_label) def check_snp_markers_set(self, marker_types, marker_names): self.logger.info('start checking snp_markers_set') mdefs, msetc = self.kb.get_snp_markers_set_content(self.mset) rs_labels = mdefs['rs_label'] for t, n in it.izip(marker_types, marker_names): if t == 'M': if not n in rs_labels: msg = 'marker %s is not in the specified SNPMarkersSet' % n self.logger.critical(msg) raise ValueError(msg) self.logger.info('done checking snp_markers_set') def create_action(self, target): conf = {'setup' : self.asetup, 'device' : self.device, 'actionCategory' : self.kb.ActionCategory.MEASUREMENT, 'operator' : 'Alfred E. Neumann', 'context' : self.study, 'target' : target, } action = self.kb.factory.create(self.kb.ActionOnVessel, conf).save() return action def create_data_sample(self, action, label): conf = {'snpMarkersSet' : self.mset, 'label' : label, 'status' : self.kb.DataSampleStatus.USABLE, 'action' : action} return self.kb.factory.create(self.kb.GenotypeDataSample, conf).save() def load(self, pedfile, datfile, conf_value=1.0): pr = PedReader(pedfile, datfile, conf_value) self.check_snp_markers_set(pr.marker_types, pr.marker_names) #-- self.logger.info('start loading from pedfile %s' % pedfile.name) for x in pr: sample = self.kb.get_vessel(x['sample_label']) if not sample: self.logger.error('No sample with label %s in VL' % x['sample_label']) continue action = self.create_action(sample) avid = action.id action.unload() data_sample = self.create_data_sample(action, x['label']) data_object = self.kb.add_gdo_data_object(avid, data_sample, x['probs'], x['confs']) self.logger.info('-- loaded %s' % x['label']) self.logger.info('done loading from pedfile %s' % pedfile.name)
class Core(object): def __init__(self, host=None, user=None, passwd=None, group=None, keep_tokens=1, study_label=None, logger=None): self.kb = KB(driver='omero')(host, user, passwd, group, keep_tokens) self.logger = logger if logger else logging.getLogger() self.record_counter = 0 self.default_study = None if study_label: s = self.kb.get_study(study_label) if not s: raise ValueError('No known study with label %s' % study_label) self.logger.info('Selecting %s[%d,%s] as default study' % (s.label, s.omero_id, s.id)) self.default_study = s @classmethod def find_action_setup_conf(klass, args): action_setup_conf = {} for x in dir(args): if not (x.startswith('_') or x.startswith('func')): action_setup_conf[x] = getattr(args, x) # HACKS action_setup_conf['ifile'] = action_setup_conf['ifile'].name action_setup_conf['ofile'] = action_setup_conf['ofile'].name action_setup_conf['report_file'] = action_setup_conf['report_file'].name return action_setup_conf @classmethod def get_action_setup_options(klass, record, action_setup_conf = None, object_history = None): options = {} if 'options' in record and record['options']: kvs = record['options'].split(',') for kv in kvs: k, v = kv.split('=') options[k] = v if action_setup_conf: options['importer_setup'] = action_setup_conf if object_history: options['object_history'] = object_history return json.dumps(options) def get_device(self, label, maker, model, release): device = self.kb.get_device(label) if not device: self.logger.debug('creating a device') device = self.kb.create_device(label, maker, model, release) return device def get_action_setup(self, label, conf): """ Return the ActionSetup corresponding to label if there is one, else create a new one using conf. """ asetup = self.kb.get_action_setup(label) if not asetup: kb_conf = { 'label': label, 'conf': json.dumps(conf), } asetup = self.kb.factory.create(self.kb.ActionSetup, kb_conf).save() return asetup def get_study(self, label): if self.default_study: return self.default_study study = self.kb.get_study(label) if not study: study = self.kb.factory.create(self.kb.Study, {'label': label}).save() return study def find_study(self, records): study_label = records[0]['study'] for r in records: if r['study'] != study_label: m = 'all records should have the same study label' self.logger.critical(m) raise ValueError(m) return self.get_study(study_label) def find_klass(self, col_name, records): o_type = records[0][col_name] for r in records: if r[col_name] != o_type: m = 'all records should have the same %s' % col_name self.logger.critical(m) raise ValueError(m) return getattr(self.kb, o_type) def __preload_items__(self, key_field, klass, preloaded): objs = self.kb.get_objects(klass) for o in objs: assert not getattr(o, key_field) in preloaded preloaded[getattr(o, key_field)] = o def preload_by_type(self, name, klass, preloaded): self.logger.info('start preloading %s' % name) self.__preload_items__('id', klass, preloaded) self.logger.info('done preloading %s' % name) def preload_studies(self, preloaded): self.logger.info('start preloading studies') self.__preload_items__('label', self.kb.Study, preloaded) self.logger.info('done preloading studies') def missing_fields(self, fields, r): for f in fields: if f not in r: return f return False
def main(argv): parser = make_parser() args = parser.parse_args(argv) log_level = getattr(logging, args.loglevel) kwargs = {'format': LOG_FORMAT, 'datefmt': LOG_DATEFMT, 'level': log_level} if args.logfile: kwargs['filename'] = args.logfile logging.basicConfig(**kwargs) logger = logging.getLogger() kb = KB(driver='omero')(args.host, args.user, args.passwd) # Load enrollments and individual (needed to build sample label and for gender field) enrolls = [] for sl in STUDY_LABELS: logger.debug('Loading enrollments for study %s' % sl) enrolls.extend(kb.get_enrolled(kb.get_study(sl))) logger.debug('Fetched %d individuals' % len(enrolls)) wells_lookup = get_wells_enrolls_lookup(enrolls, kb) logger.debug('Loading EHR records') ehr_records = kb.get_ehr_records('(valid == True)') ehr_records_map = {} for r in ehr_records: ehr_records_map.setdefault(r['i_id'], []).append(r) # Read plate barcodes with open(args.plates_list) as pl_list: barcodes = [row.strip() for row in pl_list] # Load plate for plate_barcode in barcodes: logger.info('Creating datasheet for plate %s' % plate_barcode) pl = load_plate(plate_barcode, kb) if not pl: logger.error('No plate with barcode %s exists, skipping it.' % (plate_barcode)) continue # Load wells for selected plate pl_wells = get_wells_by_plate(plate_barcode, kb) with open( os.path.join(args.out_dir, '%s_datasheet.csv' % plate_barcode), 'w') as of: writer = csv.DictWriter(of, CSV_FIELDS, delimiter='\t') writer.writeheader() last_slot = 0 for slot, well in sorted(pl_wells.iteritems()): cl_record = ehr_records_map[wells_lookup[well] [0].individual.id] t1d, ms = get_affections(cl_record) # Fill empty slots while (last_slot != slot - 1): last_slot += 1 writer.writerow({ 'Sample_ID': 'X', 'PLATE_barcode': pl.barcode, 'PLATE_name': pl.label, 'WELL_label': get_well_label(last_slot), 'INDIVIDUAL_gender': 'X', 'INDIVIDUAL_vid': 'X', 'T1D_affected': 'X', 'MS_affected': 'X' }) writer.writerow({ 'Sample_ID': get_ichip_sample_code(wells_lookup[well], pl.barcode), 'PLATE_barcode': pl.barcode, 'PLATE_name': pl.label, 'WELL_label': well.label, 'INDIVIDUAL_gender': map_gender(wells_lookup[well][0].individual), 'INDIVIDUAL_vid': wells_lookup[well][0].individual.id, 'T1D_affected': t1d, 'MS_affected': ms }) last_slot = slot
passwd = args.passwd or vlu.ome_passwd() except ValueError, ve: logger.critical(ve) sys.exit(ve) kb = KB(driver='omero')(host, user, passwd) with open(args.couples_list) as f: reader = csv.reader(f, delimiter='\t') couples = [] for row in reader: couples.append((row[0], row[1])) logger.info('%d couples are going to be swapped' % len(couples)) logger.debug('Retrieving enrollments for study %s' % args.study) enrolls = kb.get_enrolled(kb.get_study(args.study)) logger.debug('Retrieved %d enrollments' % len(enrolls)) en_lookup = {} for en in enrolls: en_lookup[en.studyCode] = en for en_code1, en_code2 in couples: logger.info('Swapping couple %s - %s' % (en_code1, en_code2)) try: en1 = en_lookup[en_code1] en2 = en_lookup[en_code2] except KeyError, ke: logger.error('Code %s not found in study %s' % (ke, args.study)) sys.exit(2)
class Core(object): def __init__(self, host=None, user=None, passwd=None, group=None, keep_tokens=1, study_label=None, logger=None): self.kb = KB(driver='omero')(host, user, passwd, group, keep_tokens) self.logger = logger if logger else logging.getLogger() self.record_counter = 0 self.default_study = None if study_label: s = self.kb.get_study(study_label) if not s: raise ValueError('No known study with label %s' % study_label) self.logger.info('Selecting %s[%d,%s] as default study' % (s.label, s.omero_id, s.id)) self.default_study = s @classmethod def find_action_setup_conf(klass, args): action_setup_conf = {} for x in dir(args): if not (x.startswith('_') or x.startswith('func')): action_setup_conf[x] = getattr(args, x) # HACKS action_setup_conf['ifile'] = action_setup_conf['ifile'].name action_setup_conf['ofile'] = action_setup_conf['ofile'].name action_setup_conf['report_file'] = action_setup_conf[ 'report_file'].name return action_setup_conf @classmethod def get_action_setup_options(klass, record, action_setup_conf=None, object_history=None): options = {} if 'options' in record and record['options']: kvs = record['options'].split(',') for kv in kvs: k, v = kv.split('=') options[k] = v if action_setup_conf: options['importer_setup'] = action_setup_conf if object_history: options['object_history'] = object_history return json.dumps(options) def get_device(self, label, maker, model, release): device = self.kb.get_device(label) if not device: self.logger.debug('creating a device') device = self.kb.create_device(label, maker, model, release) return device def get_action_setup(self, label, conf): """ Return the ActionSetup corresponding to label if there is one, else create a new one using conf. """ asetup = self.kb.get_action_setup(label) if not asetup: kb_conf = { 'label': label, 'conf': json.dumps(conf), } asetup = self.kb.factory.create(self.kb.ActionSetup, kb_conf).save() return asetup def get_study(self, label): if self.default_study: return self.default_study study = self.kb.get_study(label) if not study: study = self.kb.factory.create(self.kb.Study, { 'label': label }).save() return study def find_study(self, records): study_label = records[0]['study'] for r in records: if r['study'] != study_label: m = 'all records should have the same study label' self.logger.critical(m) raise ValueError(m) return self.get_study(study_label) def find_klass(self, col_name, records): o_type = records[0][col_name] for r in records: if r[col_name] != o_type: m = 'all records should have the same %s' % col_name self.logger.critical(m) raise ValueError(m) return getattr(self.kb, o_type) def __preload_items__(self, key_field, klass, preloaded): objs = self.kb.get_objects(klass) for o in objs: assert not getattr(o, key_field) in preloaded preloaded[getattr(o, key_field)] = o def preload_by_type(self, name, klass, preloaded): self.logger.info('start preloading %s' % name) self.__preload_items__('id', klass, preloaded) self.logger.info('done preloading %s' % name) def preload_studies(self, preloaded): self.logger.info('start preloading studies') self.__preload_items__('label', self.kb.Study, preloaded) self.logger.info('done preloading studies') def missing_fields(self, fields, r): for f in fields: if f not in r: return f return False
def main(argv): parser = make_parser() args = parser.parse_args(argv) log_level = getattr(logging, args.loglevel) kwargs = {"format": LOG_FORMAT, "datefmt": LOG_DATEFMT, "level": log_level} if args.logfile: kwargs["filename"] = args.logfile logging.basicConfig(**kwargs) logger = logging.getLogger() kb = KB(driver="omero")(args.host, args.user, args.passwd) # Load enrollments and individual (needed to build sample label and for gender field) enrolls = [] for sl in STUDY_LABELS: logger.debug("Loading enrollments for study %s" % sl) enrolls.extend(kb.get_enrolled(kb.get_study(sl))) logger.debug("Fetched %d individuals" % len(enrolls)) wells_lookup = get_wells_enrolls_lookup(enrolls, kb) logger.debug("Loading EHR records") ehr_records = kb.get_ehr_records("(valid == True)") ehr_records_map = {} for r in ehr_records: ehr_records_map.setdefault(r["i_id"], []).append(r) # Read plate barcodes with open(args.plates_list) as pl_list: barcodes = [row.strip() for row in pl_list] # Load plate for plate_barcode in barcodes: logger.info("Creating datasheet for plate %s" % plate_barcode) pl = load_plate(plate_barcode, kb) if not pl: logger.error("No plate with barcode %s exists, skipping it." % (plate_barcode)) continue # Load wells for selected plate pl_wells = get_wells_by_plate(plate_barcode, kb) with open(os.path.join(args.out_dir, "%s_datasheet.csv" % plate_barcode), "w") as of: writer = csv.DictWriter(of, CSV_FIELDS, delimiter="\t") writer.writeheader() last_slot = 0 for slot, well in sorted(pl_wells.iteritems()): cl_record = ehr_records_map[wells_lookup[well][0].individual.id] t1d, ms = get_affections(cl_record) # Fill empty slots while last_slot != slot - 1: last_slot += 1 writer.writerow( { "Sample_ID": "X", "PLATE_barcode": pl.barcode, "PLATE_name": pl.label, "WELL_label": get_well_label(last_slot), "INDIVIDUAL_gender": "X", "INDIVIDUAL_vid": "X", "T1D_affected": "X", "MS_affected": "X", } ) writer.writerow( { "Sample_ID": get_ichip_sample_code(wells_lookup[well], pl.barcode), "PLATE_barcode": pl.barcode, "PLATE_name": pl.label, "WELL_label": well.label, "INDIVIDUAL_gender": map_gender(wells_lookup[well][0].individual), "INDIVIDUAL_vid": wells_lookup[well][0].individual.id, "T1D_affected": t1d, "MS_affected": ms, } ) last_slot = slot
mset, if there is at least one, otherwise we will skip the individual. """ def extract_data_sample(group, mset, dsample_name): by_individual = {} for i in kb.get_individuals(group): gds = filter(lambda x: x.snpMarkersSet == mset, kb.get_data_samples(i, dsample_name)) assert (len(gds) == 1) by_individual[i.id] = gds[0] return by_individual group = kb.get_study(label='TEST01') gds0_by_individual = extract_data_sample(group, mset0, 'GenotypeDataSample') """ .. Note that what we have now is a dictionary that maps individual ids to GenotypeDataSample objects and the latter are only handlers to get to the actual genotyping data, not the data itself. We can, now, do a global check on data quality. """ def do_check(s): counts = algo.count_homozygotes(s) mafs = algo.maf(None, counts)
The first element of a marker defining tuple is its label, the second is the dbSNP db label, if available, while the third is the marker mask. .. todo:: put a reference to reference documentation Now we will load the markers set definition into Omero.biobank. **Note:** We are considering an ideal case where none of the markers is already in the db. """ study = kb.get_study('TEST01') action = kb.create_an_action(study, doc='importing markers') action.reload() source, context, release = 'foobar', 'fooctx', 'foorel' ref_rs_genome, dbsnp_build = 'foo-rs-genome', 13200 lvs = kb.create_markers(source, context, release, ref_rs_genome, dbsnp_build, taq_man_markers, action) """ .. where lvs is a list of (label, vid) tuples. We can assume that the markers above have been aligned against a reference genome, say fake19, and save in omero.biobank the alignment