def next(self):
   r = csv.DictReader.next(self)
   try:
     mask = split_mask(r['mask'].upper())
   except ValueError, e:
     print "ERROR: %r: %s, skipping" % (label, e)
     return self.next()
예제 #2
0
 def next(self):
     r = csv.DictReader.next(self)
     try:
         mask = split_mask(r['mask'].upper())
     except ValueError, e:
         print "ERROR: %r: %s, skipping" % (label, e)
         return self.next()
예제 #3
0
def check_mask(mask):
  try:
    lflank, alleles, rflank = split_mask(mask)
  except ValueError:
    problem = "bad mask format"
  else:
    if not(2 <= len(alleles) <= 4 and set(alleles) <= POSSIBLE_ALLELES):
      problem = "bad alleles: %r" % list(alleles)
    else:
      problem = None
  return problem
예제 #4
0
def build_fastq_records(label, mask, name_serializer, logger=None):
  logger = logger or NullLogger()
  records = []
  try:
    lflank, alleles, rflank = split_mask(mask)
  except ValueError:
    status = "no mask" if mask == "None" else "bad mask format"
    logger.warn("%r: %s, skipping" % (label, status))
  else:
    snp_offset = len(lflank)
    for a, c in izip(alleles, ALLELE_CODES):
      seq = "%s%s%s" % (lflank, a, rflank)
      seq_id = name_serializer.serialize(label, c, snp_offset, alleles)
      r = ('@%s' % seq_id, seq, '+%s' % seq_id, '~'*len(seq))
      records.append(r)
  return records
예제 #5
0
def build_fastq_records(label, mask, name_serializer, logger=None):
    logger = logger or NullLogger()
    records = []
    try:
        lflank, alleles, rflank = split_mask(mask)
    except ValueError:
        status = "no mask" if mask == "None" else "bad mask format"
        logger.warn("%r: %s, skipping" % (label, status))
    else:
        snp_offset = len(lflank)
        for a, c in izip(alleles, ALLELE_CODES):
            seq = "%s%s%s" % (lflank, a, rflank)
            seq_id = name_serializer.serialize(label, c, snp_offset, alleles)
            r = ('@%s' % seq_id, seq, '+%s' % seq_id, '~' * len(seq))
            records.append(r)
    return records
예제 #6
0
def canonize_call(mask, abi_call):
  """
  Canonize call against top mask. Directly uses the base
  called by TaqMan to compute the relevant allele code.
  """
  if abi_call.upper() == 'BOTH':
    return SnpCall.AB
  if abi_call.upper() == 'UNDETERMINED':
    return SnpCall.NOCALL
  _, call_base = abi_call.split('-')

  _, alleles, _ = split_mask(mask)
  if call_base in [alleles[0], rc(alleles[0])]:
    return SnpCall.AA
  elif call_base in [alleles[1], rc(alleles[1])]:
    return SnpCall.BB
  else:
    raise ValueError('Cannot map %s (alleles: %s)' % (abi_call, alleles))
예제 #7
0
    records = [r for r in reader]

vids = [r['source'] for r in records]
markers = kb.get_snp_markers(vids=vids, col_names=['vid', 'mask'])

with open(outfn, 'w') as outf:
    fieldnames = ['marker_vid', 'marker_indx', 'allele_flip']
    writer = csv.DictWriter(outf,
                            delimiter="\t",
                            lineterminator=os.linesep,
                            fieldnames=fieldnames)
    writer.writeheader()
    for i, (m, r) in enumerate(it.izip(markers, records)):
        assert m.id == r['source']
        try:
            _, stored_alleles, _ = snp.split_mask(m.mask)
        except ValueError:
            sys.stdout.write("WARNING: could not split mask for %r\n" %
                             r['source'])
            flip = False
        else:
            alleles = r['allele_a'], r['allele_b']
            fl_alleles = r['allele_b'], r['allele_a']
            if alleles == stored_alleles or rc(alleles) == stored_alleles:
                flip = False
            elif fl_alleles == stored_alleles or rc(
                    fl_alleles) == stored_alleles:
                flip = True
            else:
                raise ValueError("%s: got inconsistent mask from db: %r" %
                                 (m.id, m.mask))
예제 #8
0
 def test_good(self):
   for s, t in SPLIT_MASK_PAIRS:
     self.assertEqual(usnp.split_mask(s), t)
예제 #9
0
with open(fn) as f:
  reader = csv.DictReader(f, delimiter="\t")
  records = [r for r in reader]

vids = [r['source'] for r in records]
markers = kb.get_snp_markers(vids=vids, col_names=['vid', 'mask'])

with open(outfn, 'w') as outf:
  fieldnames = ['marker_vid', 'marker_indx', 'allele_flip']
  writer = csv.DictWriter(outf, delimiter="\t", lineterminator=os.linesep,
                          fieldnames=fieldnames)
  writer.writeheader()
  for i, (m, r) in enumerate(it.izip(markers, records)):
    assert m.id == r['source']
    try:
      _, stored_alleles, _ = snp.split_mask(m.mask)
    except ValueError:
      sys.stdout.write("WARNING: could not split mask for %r\n" % r['source'])
      flip = False
    else:
      alleles = r['allele_a'], r['allele_b']
      fl_alleles = r['allele_b'], r['allele_a']
      if alleles == stored_alleles or rc(alleles) == stored_alleles:
        flip = False
      elif fl_alleles == stored_alleles or rc(fl_alleles) == stored_alleles:
        flip = True
      else:
        raise ValueError("%s: got inconsistent mask from db: %r" %
                         (m.id, m.mask))
    index = r.get("marker_indx", i)
    writer.writerow({"marker_vid": m.id, "marker_indx": index,