def __init__(self, host, user, passwd, study_label, maker, model, release): self.kb = KB(driver='omero')(host, user, passwd) self.mset = self.kb.get_snp_markers_set(maker, model, release) self.logger = logging.getLogger() if not self.mset: raise ValueError('SNPMarkersSet[%s,%s,%s] has not been defined.' % (maker, model, release)) #-- alabel = 'load_genotypes-setup-%s' % time.time() self.asetup = self.kb.factory.create(self.kb.ActionSetup, { 'label': alabel, 'conf': '' }).save() #-- dmaker, dmodel, drelease = 'CRS4', 'load_genotypes', '0.1' dlabel = '%s-%s-%s' % (dmaker, dmodel, drelease) device = self.kb.get_device(dlabel) if not device: device = self.kb.factory.create( self.kb.Device, { 'label': dlabel, 'maker': dmaker, 'model': dmodel, 'release': drelease }).save() self.device = device #-- FIXME this will break if study is not defined. self.study = self.kb.get_study(study_label)
def setUp(self): self.kb = KB(driver='omero')(OME_HOST, OME_USER, OME_PASSWD) conf = { 'label': 'TEST-%f' % time.time(), 'description': 'unit test garbage', } self.study = self.kb.factory.create(self.kb.Study, conf).save() self.kill_list.append(self.study) self.action = self.kb.create_an_action(self.study) self.kill_list.append(self.action)
def main(argv): global logger parser = make_parser() args = parser.parse_args(argv) logformat = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" loglevel = getattr(logging, args.loglevel) if args.logfile: logging.basicConfig(filename=args.logfile, format=logformat, level=loglevel) else: logging.basicConfig(format=logformat, level=loglevel) logger = logging.getLogger() abi_service = ABISnpService() found_markers = {} data = {} min_datetime = datetime.max #datetime(2999, 12, 31, 23, 59, 59) max_datetime = datetime.min #datetime(1000, 1, 1, 0, 0, 1) for f in (_.strip() for _ in args.ifile if _.strip()): logger.info('processing %s' % f) sds = SDSReader(open(f), swap_sample_well_columns=True) min_datetime = min(min_datetime, sds.datetime) max_datetime = max(max_datetime, sds.datetime) get_markers_definition(found_markers, sds, abi_service) for r in sds: data.setdefault(r['Sample Name'], []).append(r) kb = KB(driver='omero')(args.host, args.user, args.passwd) logger.info('qui - main') missing_kb_markers = add_kb_marker_objects(kb, found_markers) if missing_kb_markers: logger.info('there are missing markers. Cannot proceed further.') fname = '%smarker-defs.tsv' % args.prefix write_import_markers_file(fname, found_markers, missing_kb_markers) logger.info('the list of the missing marker is in %s.' % fname) sys.exit(0) fname = '%smarkers-set-def.tsv' % args.prefix write_markers_set_def_file(fname, found_markers) ssc_data_set = {} device = kb.get_device(args.device_label) for sample_id, d in data.iteritems(): fname = '%s%s-%s.ssc' % (args.prefix, device.id, sample_id) write_ssc_data_set_file(fname, found_markers, device.id, sample_id, min_datetime, max_datetime, d) ssc_data_set[sample_id] = ('taqman-%s-%s' % (args.run_id, sample_id), sample_id, device.id, fname) fname = '%simport.ssc' % args.prefix write_ssc_data_samples_import_file(fname, ssc_data_set.values()) write_ssc_data_objects_import_file(fname, ssc_data_set.values())
def __init__(self, **kwargs): omero_host = vlu.ome_host() omero_user = vlu.ome_user() omero_passwd = vlu.ome_passwd() self.kb = KB(driver='omero')(omero_host, omero_user, omero_passwd) super(UniverseApplication, self).__init__(**kwargs) self.config.omero_default_host = kwargs.get('omero_default_host') self.config.omero_default_user = kwargs.get('omero_default_user') self.config.omero_default_passwd = kwargs.get('omero_default_passwd') self.config.vl_loglevel = kwargs.get('vl_loglevel', 'INFO') self.config.vl_import_enabled_users = kwargs.get('vl_import_enabled_users')
def __init__(self, host=None, user=None, passwd=None, group=None, keep_tokens=1, study_label=None, logger=None): self.kb = KB(driver='omero')(host, user, passwd, group, keep_tokens) self.logger = logger if logger else logging.getLogger() self.record_counter = 0 self.default_study = None if study_label: s = self.kb.get_study(study_label) if not s: raise ValueError('No known study with label %s' % study_label) self.logger.info('Selecting %s[%d,%s] as default study' % (s.label, s.omero_id, s.id)) self.default_study = s
def __init__(self, host, user, passwd, logger, study): self.logger = logger self.study = study self.kb = KB(driver="omero")(host, user, passwd) plates = self.kb.get_objects(self.kb.TiterPlate) self.logger.info("fetched %d plates" % len(plates)) self.plate_map = {} self.enroll_map = {} for p in plates: self.plate_map[p.omero_id] = p.barcode s = self.kb.get_study(self.study) enrolls = self.kb.get_enrolled(s) self.logger.info("fetched %d enrollments" % len(enrolls)) for e in enrolls: self.logger.debug('Retrieving wells for %s' % e.studyCode) wells = [ w for w in self.kb.get_vessels_by_individual( e.individual, "PlateWell") ] self.enroll_map[e.studyCode] = wells
def main(): parser = make_parser() args = parser.parse_args() kb = KB(driver='omero')(args.host, args.user, args.passwd) by_label = dict(((x.label, x) for x in kb.get_objects(kb.GenotypeDataSample))) msets = {} itsv = csv.DictReader(args.data_samples, delimiter='\t') otsv = csv.DictWriter(open(args., mode='w'), fieldnames=['path', 'data_sample_label', 'mimetype', 'size', 'sha1'], delimiter='\t') otsv.writeheader() for r in itsv: ds_label = r['label'] logger.info('Gathering info on %s' % ds_label) if ds_label not in by_label: logger.critical('There is no GenotypeDataSample with label %s' % label) sys.exit(1) ds = by_label[ds_label] # FIXME if ds.spnMarkersSet.omero_id not in msets: ms = ds.snpMarkersSet ms.load_markers() msets[ds.snpMarkersSet.omero_id] = ms ms = msets[ds.snpMarkersSet.omero_id] fname = ds_label + '_do.ssc' make_data_object(ds.id, fname, ms) size = os.stat(fname).st_size sha1 = compute_sha1(fname) otsv.writerow({ 'path' : 'file://' + os.path.realpath(fname), 'data_sample' : ds.id, 'mimetype' : mimetypes.SSC_FILE, 'size' : size, 'sha1' : sha1, })
def main(argv): parser = make_parser() args = parser.parse_args(argv) log_level = getattr(logging, args.loglevel) kwargs = {'format': LOG_FORMAT, 'datefmt': LOG_DATEFMT, 'level': log_level} if args.logfile: kwargs['filename'] = args.logfile logging.basicConfig(**kwargs) logger = logging.getLogger() kb = KB(driver='omero')(args.host, args.user, args.passwd) logger.info('Loading GenotypeDataSample objects') dsamples = kb.get_objects(kb.GenotypeDataSample) logger.info('Loaded %d objects' % len(dsamples)) logger.info('Loading SNPMarkersSet') query = 'SELECT snpm FROM SNPMarkersSet snpm WHERE snpm.label = :mset_label' mset = kb.find_all_by_query(query, {'mset_label': args.marker_set})[0] if not mset: logger.error('Unable to load SNPMarkersSet with label %s' % args.marker_set) sys.exit(2) else: logger.info('Object loaded') gdo_iterator = kb.get_gdo_iterator(mset, dsamples[:args.fetch_size]) gdos = [] logger.info('Loading GDOs') for gdo in gdo_iterator: logger.info(gdo['vid']) gdos.append(gdo) logger.debug('%d/%d GDOs loaded' % (len(gdos), args.fetch_size)) logger.info('Loaded %d GDOs' % len(gdos))
def setUp(self): self.kb = KB(driver='omero')(OME_HOST, OME_USER, OME_PASS)
* Explanations concerning how to open a connection to a KnowledgeBase and what it is are left to other, antecedent, parts of the documentation. """ import os import itertools as it from bl.vl.kb import KnowledgeBase as KB import bl.vl.genotype.algo as algo OME_HOST = os.getenv('OME_HOST', 'localhost') OME_USER = os.getenv('OME_USER', 'test') OME_PASSWD = os.getenv('OME_PASSWD', 'test') kb = KB(driver="omero")(OME_HOST, OME_USER, OME_PASSWD) """ .. The first thing we will do is to select a markers set. See FIXME:XXX for its definition. We will first obtain an handle to it, and then invoke a '.load_markers()' that will bring in memory the actual definition data. """ mset_name = 'FakeTaqSet01' mset0 = kb.get_snp_markers_set(label=mset_name) mset0.load_markers() """ .. For the time being, we can think the SNPMarkerSet mset0 as analogous to an array
def main(argv): parser = make_parser() args = parser.parse_args(argv) log_level = getattr(logging, args.loglevel) kwargs = {'format': LOG_FORMAT, 'datefmt': LOG_DATEFMT, 'level': log_level} if args.logfile: kwargs['filename'] = args.logfile logging.basicConfig(**kwargs) logger = logging.getLogger() kb = KB(driver='omero')(args.host, args.user, args.passwd) # Load enrollments and individual (needed to build sample label and for gender field) enrolls = [] for sl in STUDY_LABELS: logger.debug('Loading enrollments for study %s' % sl) enrolls.extend(kb.get_enrolled(kb.get_study(sl))) logger.debug('Fetched %d individuals' % len(enrolls)) wells_lookup = get_wells_enrolls_lookup(enrolls, kb) logger.debug('Loading EHR records') ehr_records = kb.get_ehr_records('(valid == True)') ehr_records_map = {} for r in ehr_records: ehr_records_map.setdefault(r['i_id'], []).append(r) # Read plate barcodes with open(args.plates_list) as pl_list: barcodes = [row.strip() for row in pl_list] # Load plate for plate_barcode in barcodes: logger.info('Creating datasheet for plate %s' % plate_barcode) pl = load_plate(plate_barcode, kb) if not pl: logger.error('No plate with barcode %s exists, skipping it.' % (plate_barcode)) continue # Load wells for selected plate pl_wells = get_wells_by_plate(plate_barcode, kb) with open( os.path.join(args.out_dir, '%s_datasheet.csv' % plate_barcode), 'w') as of: writer = csv.DictWriter(of, CSV_FIELDS, delimiter='\t') writer.writeheader() last_slot = 0 for slot, well in sorted(pl_wells.iteritems()): cl_record = ehr_records_map[wells_lookup[well] [0].individual.id] t1d, ms = get_affections(cl_record) # Fill empty slots while (last_slot != slot - 1): last_slot += 1 writer.writerow({ 'Sample_ID': 'X', 'PLATE_barcode': pl.barcode, 'PLATE_name': pl.label, 'WELL_label': get_well_label(last_slot), 'INDIVIDUAL_gender': 'X', 'INDIVIDUAL_vid': 'X', 'T1D_affected': 'X', 'MS_affected': 'X' }) writer.writerow({ 'Sample_ID': get_ichip_sample_code(wells_lookup[well], pl.barcode), 'PLATE_barcode': pl.barcode, 'PLATE_name': pl.label, 'WELL_label': well.label, 'INDIVIDUAL_gender': map_gender(wells_lookup[well][0].individual), 'INDIVIDUAL_vid': wells_lookup[well][0].individual.id, 'T1D_affected': t1d, 'MS_affected': ms }) last_slot = slot
def setUp(self): logger.info('start setup') self.kb = KB(driver='omero')(OME_HOST, OME_USER, OME_PASS) logger.info('done with setup')
log_level = getattr(logging, args.loglevel) kwargs = {'format': LOG_FORMAT, 'datefmt': LOG_DATEFMT, 'level': log_level} if args.logfile: kwargs['filename'] = args.logfile logging.basicConfig(**kwargs) logger = logging.getLogger() try: host = args.host or vlu.ome_host() user = args.user or vlu.ome_user() passwd = args.passwd or vlu.ome_passwd() except ValueError, ve: logger.critical(ve) sys.exit(ve) kb = KB(driver="omero")(host, user, passwd) logger.info("getting data samples") ms = kb.get_snp_markers_set(label=args.marker_set) if ms is None: msg = "marker set %s not present in kb, bailing out" % args.marker_set logger.critical(msg) sys.exit(msg) query = "from GenotypeDataSample g where g.snpMarkersSet.id = :id" params = {"id": ms.omero_id} gds = kb.find_all_by_query(query, params) logger.info("found %d data samples for marker set %s" % (len(gds), args.marker_set)) logger.info("updating dep tree") kb.update_dependency_tree() individuals = [get_individual(kb, ds) for ds in gds] ds_by_ind_id = dict((i.id, ds) for i, ds in zip(individuals, gds))
def __init__(self, host, user, passwd): self.kb = KB(driver='omero')(host, user, passwd) self.logger = logging.getLogger()
def __init__(self, host, user, passwd): self.kb = KB(driver='omero')(host, user, passwd) #FIXME we need to do this to sync with the DB idea of the enums. self.kb.Gender.map_enums_values(self.kb) self.logger = logging.getLogger()
log_level = getattr(logging, args.loglevel) kwargs = {'format': LOG_FORMAT, 'datefmt': LOG_DATEFMT, 'level': log_level} if args.logfile: kwargs['filename'] = args.logfile logging.basicConfig(**kwargs) logger = logging.getLogger() try: host = args.host or vlu.ome_host() user = args.user or vlu.ome_user() passwd = args.passwd or vlu.ome_passwd() except ValueError, ve: logger.critical(ve) sys.exit(ve) kb = KB(driver='omero')(host, user, passwd) logger.info('Retrieving diagnosis records') ehr_records = kb.get_ehr_records('(archetype == "%s") & (valid == True)' % DIAGNOSIS_ARCHETYPE) ehr_records.extend( kb.get_ehr_records('(archetype == "%s") & (valid == True)' % EXCL_DIAG_ARCHETYPE)) logger.info('%d records retrieved' % len(ehr_records)) logger.info('Loading actions') actions = kb.get_objects(kb.Action) act_map = {} for act in actions: act_map[act.id] = act.context.label logger.info('%d actions loaded' % len(act_map))
open a tsv with at least the 'allele_a', 'allele_b' and marker_vid columns fetch all markers corresponding to the VIDs and get their masks split mask and compare alleles """ import sys, os, csv import itertools as it from bl.vl.kb import KnowledgeBase as KB from bl.core.seq.utils import reverse_complement as rc import bl.vl.utils.snp as snp OME_HOST = os.getenv('OME_HOST', 'localhost') OME_USER = os.getenv('OME_USER', 'test') OME_PASSWD = os.getenv('OME_PASSWD', 'test') kb = KB(driver='omero')(OME_HOST, OME_USER, OME_PASSWD) fn = sys.argv[1] # "affy_na32_reannot_vids.tsv" outfn = sys.argv[2] # "affy_na32_markers_set_def.tsv" with open(fn) as f: reader = csv.DictReader(f, delimiter="\t") records = [r for r in reader] vids = [r['source'] for r in records] markers = kb.get_snp_markers(vids=vids, col_names=['vid', 'mask']) with open(outfn, 'w') as outf: fieldnames = ['marker_vid', 'marker_indx', 'allele_flip'] writer = csv.DictWriter(outf, delimiter="\t",
def setUp(self): self.kb = KB(driver='omero')('localhost', 'root', 'romeo')