Ejemplo n.º 1
0
 def __init__(self, host, user, passwd, study_label, maker, model, release):
     self.kb = KB(driver='omero')(host, user, passwd)
     self.mset = self.kb.get_snp_markers_set(maker, model, release)
     self.logger = logging.getLogger()
     if not self.mset:
         raise ValueError('SNPMarkersSet[%s,%s,%s] has not been defined.' %
                          (maker, model, release))
     #--
     alabel = 'load_genotypes-setup-%s' % time.time()
     self.asetup = self.kb.factory.create(self.kb.ActionSetup, {
         'label': alabel,
         'conf': ''
     }).save()
     #--
     dmaker, dmodel, drelease = 'CRS4', 'load_genotypes', '0.1'
     dlabel = '%s-%s-%s' % (dmaker, dmodel, drelease)
     device = self.kb.get_device(dlabel)
     if not device:
         device = self.kb.factory.create(
             self.kb.Device, {
                 'label': dlabel,
                 'maker': dmaker,
                 'model': dmodel,
                 'release': drelease
             }).save()
     self.device = device
     #-- FIXME this will break if study is not defined.
     self.study = self.kb.get_study(study_label)
Ejemplo n.º 2
0
 def setUp(self):
   self.kb = KB(driver='omero')(OME_HOST, OME_USER, OME_PASSWD)
   conf = {
     'label': 'TEST-%f' % time.time(),
     'description': 'unit test garbage',
     }
   self.study = self.kb.factory.create(self.kb.Study, conf).save()
   self.kill_list.append(self.study)
   self.action = self.kb.create_an_action(self.study)
   self.kill_list.append(self.action)
Ejemplo n.º 3
0
def main(argv):
  global logger
  parser = make_parser()
  args = parser.parse_args(argv)
  logformat = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
  loglevel  = getattr(logging, args.loglevel)
  if args.logfile:
    logging.basicConfig(filename=args.logfile, format=logformat, level=loglevel)
  else:
    logging.basicConfig(format=logformat, level=loglevel)
  logger = logging.getLogger()

  abi_service = ABISnpService()
  found_markers = {}
  data = {}
  min_datetime = datetime.max #datetime(2999, 12, 31, 23, 59, 59)
  max_datetime = datetime.min #datetime(1000, 1,  1,  0, 0, 1)
  for f in (_.strip() for _ in args.ifile if _.strip()):
    logger.info('processing %s' % f)

    sds = SDSReader(open(f), swap_sample_well_columns=True)
    min_datetime = min(min_datetime, sds.datetime)
    max_datetime = max(max_datetime, sds.datetime)

    get_markers_definition(found_markers, sds, abi_service)

    for r in sds:
      data.setdefault(r['Sample Name'], []).append(r)

  kb = KB(driver='omero')(args.host, args.user, args.passwd)
  logger.info('qui - main')
  missing_kb_markers = add_kb_marker_objects(kb, found_markers)

  if missing_kb_markers:
    logger.info('there are missing markers. Cannot proceed further.')
    fname = '%smarker-defs.tsv' % args.prefix
    write_import_markers_file(fname, found_markers, missing_kb_markers)
    logger.info('the list of the missing marker is in %s.' % fname)
    sys.exit(0)

  fname = '%smarkers-set-def.tsv' % args.prefix
  write_markers_set_def_file(fname, found_markers)

  ssc_data_set = {}
  device = kb.get_device(args.device_label)
  for sample_id, d in data.iteritems():
    fname = '%s%s-%s.ssc' % (args.prefix, device.id, sample_id)
    write_ssc_data_set_file(fname, found_markers,
                            device.id, sample_id,
                            min_datetime, max_datetime, d)
    ssc_data_set[sample_id] = ('taqman-%s-%s' % (args.run_id, sample_id),
                               sample_id, device.id, fname)
  fname = '%simport.ssc' % args.prefix
  write_ssc_data_samples_import_file(fname, ssc_data_set.values())
  write_ssc_data_objects_import_file(fname, ssc_data_set.values())
Ejemplo n.º 4
0
 def __init__(self, **kwargs):
   omero_host = vlu.ome_host()
   omero_user = vlu.ome_user()
   omero_passwd = vlu.ome_passwd()
   self.kb = KB(driver='omero')(omero_host, omero_user, omero_passwd)
   super(UniverseApplication, self).__init__(**kwargs)
   self.config.omero_default_host = kwargs.get('omero_default_host')
   self.config.omero_default_user = kwargs.get('omero_default_user')
   self.config.omero_default_passwd = kwargs.get('omero_default_passwd')
   self.config.vl_loglevel = kwargs.get('vl_loglevel', 'INFO')
   self.config.vl_import_enabled_users = kwargs.get('vl_import_enabled_users')
Ejemplo n.º 5
0
 def __init__(self,
              host=None,
              user=None,
              passwd=None,
              group=None,
              keep_tokens=1,
              study_label=None,
              logger=None):
     self.kb = KB(driver='omero')(host, user, passwd, group, keep_tokens)
     self.logger = logger if logger else logging.getLogger()
     self.record_counter = 0
     self.default_study = None
     if study_label:
         s = self.kb.get_study(study_label)
         if not s:
             raise ValueError('No known study with label %s' % study_label)
         self.logger.info('Selecting %s[%d,%s] as default study' %
                          (s.label, s.omero_id, s.id))
         self.default_study = s
Ejemplo n.º 6
0
 def __init__(self, host, user, passwd, logger, study):
     self.logger = logger
     self.study = study
     self.kb = KB(driver="omero")(host, user, passwd)
     plates = self.kb.get_objects(self.kb.TiterPlate)
     self.logger.info("fetched %d plates" % len(plates))
     self.plate_map = {}
     self.enroll_map = {}
     for p in plates:
         self.plate_map[p.omero_id] = p.barcode
     s = self.kb.get_study(self.study)
     enrolls = self.kb.get_enrolled(s)
     self.logger.info("fetched %d enrollments" % len(enrolls))
     for e in enrolls:
         self.logger.debug('Retrieving wells for %s' % e.studyCode)
         wells = [
             w for w in self.kb.get_vessels_by_individual(
                 e.individual, "PlateWell")
         ]
         self.enroll_map[e.studyCode] = wells
Ejemplo n.º 7
0
def main():
  parser = make_parser()
  args = parser.parse_args()
  kb = KB(driver='omero')(args.host, args.user, args.passwd)

  by_label = dict(((x.label, x) for x in kb.get_objects(kb.GenotypeDataSample)))
  msets = {}
  itsv = csv.DictReader(args.data_samples, delimiter='\t')
  otsv = csv.DictWriter(open(args., mode='w'),
                      fieldnames=['path', 'data_sample_label', 'mimetype',
                                  'size', 'sha1'],
                      delimiter='\t')
  otsv.writeheader()

  for r in itsv:
    ds_label = r['label']
    logger.info('Gathering info on %s' % ds_label)
    if ds_label not in by_label:
      logger.critical('There is no GenotypeDataSample with label %s' % label)
      sys.exit(1)
    ds = by_label[ds_label]
    # FIXME
    if ds.spnMarkersSet.omero_id not in msets:
      ms = ds.snpMarkersSet
      ms.load_markers()
      msets[ds.snpMarkersSet.omero_id] = ms
    ms = msets[ds.snpMarkersSet.omero_id]
    fname = ds_label + '_do.ssc'
    make_data_object(ds.id, fname, ms)
    size = os.stat(fname).st_size
    sha1 = compute_sha1(fname)
    otsv.writerow({
      'path' : 'file://' + os.path.realpath(fname),
      'data_sample' : ds.id,
      'mimetype' : mimetypes.SSC_FILE,
      'size' : size,
      'sha1' : sha1,
      })
Ejemplo n.º 8
0
def main(argv):
    parser = make_parser()
    args = parser.parse_args(argv)

    log_level = getattr(logging, args.loglevel)
    kwargs = {'format': LOG_FORMAT, 'datefmt': LOG_DATEFMT, 'level': log_level}
    if args.logfile:
        kwargs['filename'] = args.logfile
    logging.basicConfig(**kwargs)
    logger = logging.getLogger()

    kb = KB(driver='omero')(args.host, args.user, args.passwd)

    logger.info('Loading GenotypeDataSample objects')
    dsamples = kb.get_objects(kb.GenotypeDataSample)
    logger.info('Loaded %d objects' % len(dsamples))

    logger.info('Loading SNPMarkersSet')
    query = 'SELECT snpm FROM SNPMarkersSet snpm WHERE snpm.label = :mset_label'
    mset = kb.find_all_by_query(query, {'mset_label': args.marker_set})[0]
    if not mset:
        logger.error('Unable to load SNPMarkersSet with label %s' %
                     args.marker_set)
        sys.exit(2)
    else:
        logger.info('Object loaded')

    gdo_iterator = kb.get_gdo_iterator(mset, dsamples[:args.fetch_size])
    gdos = []

    logger.info('Loading GDOs')
    for gdo in gdo_iterator:
        logger.info(gdo['vid'])
        gdos.append(gdo)
        logger.debug('%d/%d GDOs loaded' % (len(gdos), args.fetch_size))
    logger.info('Loaded %d GDOs' % len(gdos))
Ejemplo n.º 9
0
 def setUp(self):
     self.kb = KB(driver='omero')(OME_HOST, OME_USER, OME_PASS)
Ejemplo n.º 10
0
 * Explanations concerning how to open a connection to a KnowledgeBase
   and what it is are left to other, antecedent, parts of the
   documentation.

"""

import os
import itertools as it
from bl.vl.kb import KnowledgeBase as KB
import bl.vl.genotype.algo as algo

OME_HOST = os.getenv('OME_HOST', 'localhost')
OME_USER = os.getenv('OME_USER', 'test')
OME_PASSWD = os.getenv('OME_PASSWD', 'test')

kb = KB(driver="omero")(OME_HOST, OME_USER, OME_PASSWD)
""" ..

The first thing we will do is to select a markers set. See FIXME:XXX
for its definition. We will first obtain an handle to it, and then
invoke a '.load_markers()' that will bring in memory the actual definition
data.

"""

mset_name = 'FakeTaqSet01'
mset0 = kb.get_snp_markers_set(label=mset_name)
mset0.load_markers()
""" ..

For the time being, we can think the SNPMarkerSet mset0 as analogous to an array
Ejemplo n.º 11
0
def main(argv):
    parser = make_parser()
    args = parser.parse_args(argv)

    log_level = getattr(logging, args.loglevel)
    kwargs = {'format': LOG_FORMAT, 'datefmt': LOG_DATEFMT, 'level': log_level}
    if args.logfile:
        kwargs['filename'] = args.logfile
    logging.basicConfig(**kwargs)
    logger = logging.getLogger()

    kb = KB(driver='omero')(args.host, args.user, args.passwd)

    # Load enrollments and individual (needed to build sample label and for gender field)
    enrolls = []
    for sl in STUDY_LABELS:
        logger.debug('Loading enrollments for study %s' % sl)
        enrolls.extend(kb.get_enrolled(kb.get_study(sl)))
        logger.debug('Fetched %d individuals' % len(enrolls))

    wells_lookup = get_wells_enrolls_lookup(enrolls, kb)

    logger.debug('Loading EHR records')
    ehr_records = kb.get_ehr_records('(valid == True)')
    ehr_records_map = {}
    for r in ehr_records:
        ehr_records_map.setdefault(r['i_id'], []).append(r)

    # Read plate barcodes
    with open(args.plates_list) as pl_list:
        barcodes = [row.strip() for row in pl_list]

    # Load plate
    for plate_barcode in barcodes:
        logger.info('Creating datasheet for plate %s' % plate_barcode)
        pl = load_plate(plate_barcode, kb)
        if not pl:
            logger.error('No plate with barcode %s exists, skipping it.' %
                         (plate_barcode))
            continue

        # Load wells for selected plate
        pl_wells = get_wells_by_plate(plate_barcode, kb)

        with open(
                os.path.join(args.out_dir, '%s_datasheet.csv' % plate_barcode),
                'w') as of:
            writer = csv.DictWriter(of, CSV_FIELDS, delimiter='\t')
            writer.writeheader()
            last_slot = 0
            for slot, well in sorted(pl_wells.iteritems()):
                cl_record = ehr_records_map[wells_lookup[well]
                                            [0].individual.id]
                t1d, ms = get_affections(cl_record)
                # Fill empty slots
                while (last_slot != slot - 1):
                    last_slot += 1
                    writer.writerow({
                        'Sample_ID': 'X',
                        'PLATE_barcode': pl.barcode,
                        'PLATE_name': pl.label,
                        'WELL_label': get_well_label(last_slot),
                        'INDIVIDUAL_gender': 'X',
                        'INDIVIDUAL_vid': 'X',
                        'T1D_affected': 'X',
                        'MS_affected': 'X'
                    })

                writer.writerow({
                    'Sample_ID':
                    get_ichip_sample_code(wells_lookup[well], pl.barcode),
                    'PLATE_barcode':
                    pl.barcode,
                    'PLATE_name':
                    pl.label,
                    'WELL_label':
                    well.label,
                    'INDIVIDUAL_gender':
                    map_gender(wells_lookup[well][0].individual),
                    'INDIVIDUAL_vid':
                    wells_lookup[well][0].individual.id,
                    'T1D_affected':
                    t1d,
                    'MS_affected':
                    ms
                })
                last_slot = slot
Ejemplo n.º 12
0
 def setUp(self):
     logger.info('start setup')
     self.kb = KB(driver='omero')(OME_HOST, OME_USER, OME_PASS)
     logger.info('done with setup')
Ejemplo n.º 13
0
  log_level = getattr(logging, args.loglevel)
  kwargs = {'format': LOG_FORMAT, 'datefmt': LOG_DATEFMT, 'level': log_level}
  if args.logfile:
    kwargs['filename'] = args.logfile
  logging.basicConfig(**kwargs)
  logger = logging.getLogger()

  try:
    host = args.host or vlu.ome_host()
    user = args.user or vlu.ome_user()
    passwd = args.passwd or vlu.ome_passwd()
  except ValueError, ve:
    logger.critical(ve)
    sys.exit(ve)

  kb = KB(driver="omero")(host, user, passwd)
  logger.info("getting data samples")
  ms = kb.get_snp_markers_set(label=args.marker_set)
  if ms is None:
    msg = "marker set %s not present in kb, bailing out" % args.marker_set
    logger.critical(msg)
    sys.exit(msg)
  query = "from GenotypeDataSample g where g.snpMarkersSet.id = :id"
  params = {"id": ms.omero_id}
  gds = kb.find_all_by_query(query, params)
  logger.info("found %d data samples for marker set %s" %
              (len(gds), args.marker_set))
  logger.info("updating dep tree")
  kb.update_dependency_tree()
  individuals = [get_individual(kb, ds) for ds in gds]
  ds_by_ind_id = dict((i.id, ds) for i, ds in zip(individuals, gds))
Ejemplo n.º 14
0
 def __init__(self, host, user, passwd):
     self.kb = KB(driver='omero')(host, user, passwd)
     self.logger = logging.getLogger()
Ejemplo n.º 15
0
 def __init__(self, host, user, passwd):
   self.kb = KB(driver='omero')(host, user, passwd)
   #FIXME we need to do this to sync with the DB idea of the enums.
   self.kb.Gender.map_enums_values(self.kb)
   self.logger = logging.getLogger()
Ejemplo n.º 16
0
    log_level = getattr(logging, args.loglevel)
    kwargs = {'format': LOG_FORMAT, 'datefmt': LOG_DATEFMT, 'level': log_level}
    if args.logfile:
        kwargs['filename'] = args.logfile
    logging.basicConfig(**kwargs)
    logger = logging.getLogger()

    try:
        host = args.host or vlu.ome_host()
        user = args.user or vlu.ome_user()
        passwd = args.passwd or vlu.ome_passwd()
    except ValueError, ve:
        logger.critical(ve)
        sys.exit(ve)

    kb = KB(driver='omero')(host, user, passwd)

    logger.info('Retrieving diagnosis records')
    ehr_records = kb.get_ehr_records('(archetype == "%s") & (valid == True)' %
                                     DIAGNOSIS_ARCHETYPE)
    ehr_records.extend(
        kb.get_ehr_records('(archetype == "%s") & (valid == True)' %
                           EXCL_DIAG_ARCHETYPE))
    logger.info('%d records retrieved' % len(ehr_records))

    logger.info('Loading actions')
    actions = kb.get_objects(kb.Action)
    act_map = {}
    for act in actions:
        act_map[act.id] = act.context.label
    logger.info('%d actions loaded' % len(act_map))
Ejemplo n.º 17
0
  open a tsv with at least the 'allele_a', 'allele_b' and marker_vid columns
  fetch all markers corresponding to the VIDs and get their masks
  split mask and compare alleles
"""
import sys, os, csv
import itertools as it

from bl.vl.kb import KnowledgeBase as KB
from bl.core.seq.utils import reverse_complement as rc
import bl.vl.utils.snp as snp

OME_HOST = os.getenv('OME_HOST', 'localhost')
OME_USER = os.getenv('OME_USER', 'test')
OME_PASSWD = os.getenv('OME_PASSWD', 'test')

kb = KB(driver='omero')(OME_HOST, OME_USER, OME_PASSWD)

fn = sys.argv[1]  # "affy_na32_reannot_vids.tsv"
outfn = sys.argv[2]  # "affy_na32_markers_set_def.tsv"

with open(fn) as f:
    reader = csv.DictReader(f, delimiter="\t")
    records = [r for r in reader]

vids = [r['source'] for r in records]
markers = kb.get_snp_markers(vids=vids, col_names=['vid', 'mask'])

with open(outfn, 'w') as outf:
    fieldnames = ['marker_vid', 'marker_indx', 'allele_flip']
    writer = csv.DictWriter(outf,
                            delimiter="\t",
Ejemplo n.º 18
0
 def setUp(self):
     self.kb = KB(driver='omero')('localhost', 'root', 'romeo')