class VidMapper(object): def __init__(self, host, user, passwd, logger, study): self.logger = logger self.study = study self.kb = KB(driver="omero")(host, user, passwd) plates = self.kb.get_objects(self.kb.TiterPlate) self.logger.info("fetched %d plates" % len(plates)) self.plate_map = {} self.enroll_map = {} for p in plates: self.plate_map[p.omero_id] = p.barcode s = self.kb.get_study(self.study) enrolls = self.kb.get_enrolled(s) self.logger.info("fetched %d enrollments" % len(enrolls)) for e in enrolls: self.logger.debug('Retrieving wells for %s' % e.studyCode) wells = [ w for w in self.kb.get_vessels_by_individual( e.individual, "PlateWell") ] self.enroll_map[e.studyCode] = wells def map_vid(self, r): en_code = r["source"] pl_barcode = en_code.split("|")[1] try: wells = self.enroll_map[en_code] except KeyError: msg = "%s is not enrolled in %s" % (en_code, self.study) self.logger.error(msg) raise ValueError(msg) self.logger.info("found %d wells for %s" % (len(wells), en_code)) imm_wells = [ w for w in wells if self.plate_map[w.container.omero_id] == pl_barcode ] if len(imm_wells) > 1: msg = ("more than 1 (%d) immuno wells for plate %s" % (len(imm_wells), pl_barcode)) self.logger.error(msg) raise ValueError(msg) elif len(imm_wells) == 0: msg = "no immuno well for plate %s" % pl_barcode self.logger.warn(msg) raise ValueError(msg) else: r["source"] = imm_wells[0].id
class VidMapper(object): def __init__(self, host, user, passwd, logger, study): self.logger = logger self.study = study self.kb = KB(driver="omero")(host, user, passwd) plates = self.kb.get_objects(self.kb.TiterPlate) self.logger.info("fetched %d plates" % len(plates)) self.plate_map = {} self.enroll_map = {} for p in plates: self.plate_map[p.omero_id] = p.barcode s = self.kb.get_study(self.study) enrolls = self.kb.get_enrolled(s) self.logger.info("fetched %d enrollments" % len(enrolls)) for e in enrolls: self.logger.debug("Retrieving wells for %s" % e.studyCode) wells = [w for w in self.kb.get_vessels_by_individual(e.individual, "PlateWell")] self.enroll_map[e.studyCode] = wells def map_vid(self, r): en_code = r["source"] pl_barcode = en_code.split("|")[1] try: wells = self.enroll_map[en_code] except KeyError: msg = "%s is not enrolled in %s" % (en_code, self.study) self.logger.error(msg) raise ValueError(msg) self.logger.info("found %d wells for %s" % (len(wells), en_code)) imm_wells = [w for w in wells if self.plate_map[w.container.omero_id] == pl_barcode] if len(imm_wells) > 1: msg = "more than 1 (%d) immuno wells for plate %s" % (len(imm_wells), pl_barcode) self.logger.error(msg) raise ValueError(msg) elif len(imm_wells) == 0: msg = "no immuno well for plate %s" % pl_barcode self.logger.warn(msg) raise ValueError(msg) else: r["source"] = imm_wells[0].id
def main(argv): parser = make_parser() args = parser.parse_args(argv) log_level = getattr(logging, args.loglevel) kwargs = {"format": LOG_FORMAT, "datefmt": LOG_DATEFMT, "level": log_level} if args.logfile: kwargs["filename"] = args.logfile logging.basicConfig(**kwargs) logger = logging.getLogger() kb = KB(driver="omero")(args.host, args.user, args.passwd) # Load enrollments and individual (needed to build sample label and for gender field) enrolls = [] for sl in STUDY_LABELS: logger.debug("Loading enrollments for study %s" % sl) enrolls.extend(kb.get_enrolled(kb.get_study(sl))) logger.debug("Fetched %d individuals" % len(enrolls)) wells_lookup = get_wells_enrolls_lookup(enrolls, kb) logger.debug("Loading EHR records") ehr_records = kb.get_ehr_records("(valid == True)") ehr_records_map = {} for r in ehr_records: ehr_records_map.setdefault(r["i_id"], []).append(r) # Read plate barcodes with open(args.plates_list) as pl_list: barcodes = [row.strip() for row in pl_list] # Load plate for plate_barcode in barcodes: logger.info("Creating datasheet for plate %s" % plate_barcode) pl = load_plate(plate_barcode, kb) if not pl: logger.error("No plate with barcode %s exists, skipping it." % (plate_barcode)) continue # Load wells for selected plate pl_wells = get_wells_by_plate(plate_barcode, kb) with open(os.path.join(args.out_dir, "%s_datasheet.csv" % plate_barcode), "w") as of: writer = csv.DictWriter(of, CSV_FIELDS, delimiter="\t") writer.writeheader() last_slot = 0 for slot, well in sorted(pl_wells.iteritems()): cl_record = ehr_records_map[wells_lookup[well][0].individual.id] t1d, ms = get_affections(cl_record) # Fill empty slots while last_slot != slot - 1: last_slot += 1 writer.writerow( { "Sample_ID": "X", "PLATE_barcode": pl.barcode, "PLATE_name": pl.label, "WELL_label": get_well_label(last_slot), "INDIVIDUAL_gender": "X", "INDIVIDUAL_vid": "X", "T1D_affected": "X", "MS_affected": "X", } ) writer.writerow( { "Sample_ID": get_ichip_sample_code(wells_lookup[well], pl.barcode), "PLATE_barcode": pl.barcode, "PLATE_name": pl.label, "WELL_label": well.label, "INDIVIDUAL_gender": map_gender(wells_lookup[well][0].individual), "INDIVIDUAL_vid": wells_lookup[well][0].individual.id, "T1D_affected": t1d, "MS_affected": ms, } ) last_slot = slot
host = args.host or vlu.ome_host() user = args.user or vlu.ome_user() passwd = args.passwd or vlu.ome_passwd() except ValueError, ve: logger.critical(ve) sys.exit(ve) kb = KB(driver='omero')(host, user, passwd) logger.debug('Reading codes from source list') with open(args.source_list) as f: codes = [row.strip() for row in f.readlines()] logger.debug('Found %d codes to discard' % len(codes)) logger.debug('Retrieving enrollments for study %s' % args.source_study) source_enrolls = kb.get_enrolled(kb.get_study(args.source_study)) logger.debug('Retrieved %d enrollments' % len(source_enrolls)) src_st_lookup = {} for sen in source_enrolls: src_st_lookup[sen.studyCode] = sen to_be_discarded = [] discard_st = kb.get_study(args.discard_study) if discard_st is None: logger.critical('Study with label %s not found!' % args.discard_study) sys.exit(2) for c in codes: try: src_st_lookup[c].study = discard_st to_be_discarded.append(src_st_lookup[c])
passwd = args.passwd or vlu.ome_passwd() except ValueError, ve: logger.critical(ve) sys.exit(ve) kb = KB(driver='omero')(host, user, passwd) with open(args.couples_list) as f: reader = csv.reader(f, delimiter='\t') couples = [] for row in reader: couples.append((row[0], row[1])) logger.info('%d couples are going to be swapped' % len(couples)) logger.debug('Retrieving enrollments for study %s' % args.study) enrolls = kb.get_enrolled(kb.get_study(args.study)) logger.debug('Retrieved %d enrollments' % len(enrolls)) en_lookup = {} for en in enrolls: en_lookup[en.studyCode] = en for en_code1, en_code2 in couples: logger.info('Swapping couple %s - %s' % (en_code1, en_code2)) try: en1 = en_lookup[en_code1] en2 = en_lookup[en_code2] except KeyError, ke: logger.error('Code %s not found in study %s' % (ke, args.study)) sys.exit(2)
def main(argv): parser = make_parser() args = parser.parse_args(argv) log_level = getattr(logging, args.loglevel) kwargs = {'format': LOG_FORMAT, 'datefmt': LOG_DATEFMT, 'level': log_level} if args.logfile: kwargs['filename'] = args.logfile logging.basicConfig(**kwargs) logger = logging.getLogger() kb = KB(driver='omero')(args.host, args.user, args.passwd) # Load enrollments and individual (needed to build sample label and for gender field) enrolls = [] for sl in STUDY_LABELS: logger.debug('Loading enrollments for study %s' % sl) enrolls.extend(kb.get_enrolled(kb.get_study(sl))) logger.debug('Fetched %d individuals' % len(enrolls)) wells_lookup = get_wells_enrolls_lookup(enrolls, kb) logger.debug('Loading EHR records') ehr_records = kb.get_ehr_records('(valid == True)') ehr_records_map = {} for r in ehr_records: ehr_records_map.setdefault(r['i_id'], []).append(r) # Read plate barcodes with open(args.plates_list) as pl_list: barcodes = [row.strip() for row in pl_list] # Load plate for plate_barcode in barcodes: logger.info('Creating datasheet for plate %s' % plate_barcode) pl = load_plate(plate_barcode, kb) if not pl: logger.error('No plate with barcode %s exists, skipping it.' % (plate_barcode)) continue # Load wells for selected plate pl_wells = get_wells_by_plate(plate_barcode, kb) with open( os.path.join(args.out_dir, '%s_datasheet.csv' % plate_barcode), 'w') as of: writer = csv.DictWriter(of, CSV_FIELDS, delimiter='\t') writer.writeheader() last_slot = 0 for slot, well in sorted(pl_wells.iteritems()): cl_record = ehr_records_map[wells_lookup[well] [0].individual.id] t1d, ms = get_affections(cl_record) # Fill empty slots while (last_slot != slot - 1): last_slot += 1 writer.writerow({ 'Sample_ID': 'X', 'PLATE_barcode': pl.barcode, 'PLATE_name': pl.label, 'WELL_label': get_well_label(last_slot), 'INDIVIDUAL_gender': 'X', 'INDIVIDUAL_vid': 'X', 'T1D_affected': 'X', 'MS_affected': 'X' }) writer.writerow({ 'Sample_ID': get_ichip_sample_code(wells_lookup[well], pl.barcode), 'PLATE_barcode': pl.barcode, 'PLATE_name': pl.label, 'WELL_label': well.label, 'INDIVIDUAL_gender': map_gender(wells_lookup[well][0].individual), 'INDIVIDUAL_vid': wells_lookup[well][0].individual.id, 'T1D_affected': t1d, 'MS_affected': ms }) last_slot = slot
gender_map = {"MALE": kb.Gender.MALE, "FEMALE": kb.Gender.FEMALE} by_label = {} for label, gender, father, mother in individuals_defs: conf = {"gender": gender_map[gender], "action": action} if father: conf["father"] = by_label[father] if mother: conf["mother"] = by_label[mother] i = kb.factory.create(kb.Individual, conf).save() by_label[label] = i e = kb.factory.create(kb.Enrollment, {"study": study, "individual": i, "studyCode": label}).save() """ .. Note that studyCode is the code assigned to each individual in a specific study. As a test, we now loop on all the individuals enrolled in the study and check if they are who we think they should be. """ for e in kb.get_enrolled(study): assert e.individual == by_label[e.studyCode] """ .. See CoreOmeroWrapper.__eq__ for details on what we actually checked for with the == operator. """