Esempio n. 1
0
def canonize_call(mask, abi_call):
  """
  Canonize call against top mask. Directly uses the base
  called by TaqMan to compute the relevant allele code.
  """
  if abi_call.upper() == 'BOTH':
    return SnpCall.AB
  if abi_call.upper() == 'UNDETERMINED':
    return SnpCall.NOCALL
  _, call_base = abi_call.split('-')

  _, alleles, _ = split_mask(mask)
  if call_base in [alleles[0], rc(alleles[0])]:
    return SnpCall.AA
  elif call_base in [alleles[1], rc(alleles[1])]:
    return SnpCall.BB
  else:
    raise ValueError('Cannot map %s (alleles: %s)' % (abi_call, alleles))
Esempio n. 2
0
    writer = csv.DictWriter(outf,
                            delimiter="\t",
                            lineterminator=os.linesep,
                            fieldnames=fieldnames)
    writer.writeheader()
    for i, (m, r) in enumerate(it.izip(markers, records)):
        assert m.id == r['source']
        try:
            _, stored_alleles, _ = snp.split_mask(m.mask)
        except ValueError:
            sys.stdout.write("WARNING: could not split mask for %r\n" %
                             r['source'])
            flip = False
        else:
            alleles = r['allele_a'], r['allele_b']
            fl_alleles = r['allele_b'], r['allele_a']
            if alleles == stored_alleles or rc(alleles) == stored_alleles:
                flip = False
            elif fl_alleles == stored_alleles or rc(
                    fl_alleles) == stored_alleles:
                flip = True
            else:
                raise ValueError("%s: got inconsistent mask from db: %r" %
                                 (m.id, m.mask))
        index = r.get("marker_indx", i)
        writer.writerow({
            "marker_vid": m.id,
            "marker_indx": index,
            "allele_flip": flip
        })
  records = [r for r in reader]

vids = [r['source'] for r in records]
markers = kb.get_snp_markers(vids=vids, col_names=['vid', 'mask'])

with open(outfn, 'w') as outf:
  fieldnames = ['marker_vid', 'marker_indx', 'allele_flip']
  writer = csv.DictWriter(outf, delimiter="\t", lineterminator=os.linesep,
                          fieldnames=fieldnames)
  writer.writeheader()
  for i, (m, r) in enumerate(it.izip(markers, records)):
    assert m.id == r['source']
    try:
      _, stored_alleles, _ = snp.split_mask(m.mask)
    except ValueError:
      sys.stdout.write("WARNING: could not split mask for %r\n" % r['source'])
      flip = False
    else:
      alleles = r['allele_a'], r['allele_b']
      fl_alleles = r['allele_b'], r['allele_a']
      if alleles == stored_alleles or rc(alleles) == stored_alleles:
        flip = False
      elif fl_alleles == stored_alleles or rc(fl_alleles) == stored_alleles:
        flip = True
      else:
        raise ValueError("%s: got inconsistent mask from db: %r" %
                         (m.id, m.mask))
    index = r.get("marker_indx", i)
    writer.writerow({"marker_vid": m.id, "marker_indx": index,
                     "allele_flip": flip})
Esempio n. 4
0
def build_index_key(seq):
  return min(seq, rc(seq))