Beispiel #1
0
  def process_variant_detail_vcf(self, record, assaytype):
    """Process info file variant detail records
       Set up a json-stype document and add it to the
      variant buffer
    """
    doc = {}
    doc["assaytype"] = assaytype
    vcfr = VCFrecord(record)
    prfx, sfx = vcfr.get_prfx_sfx()
    doc["rsid"] = vcfr.get_varid()
    # always store chromosome as a 2-digit string
    doc["chromosome"] = "%.2d" % (int(vcfr.get_chr()))
    alleleA, alleleB = vcfr.get_alleles()
    doc["alleleA"] = alleleA
    doc["alleleB"] = alleleB
    doc["position"] = vcfr.get_posn_as_int()
    try:
      doc["ref_maf"] = float(vcfr.get_info_value("RefPanelAF"))
    except:
      pass
    try:
      doc["info"] = float(vcfr.get_info_value("INFO"))
    except:
      doc["info"] = 1.0

    self.variantbuff.append(doc)
Beispiel #2
0
def main(options):
  hdr = []
  hdrlen = 0
  count = 0
  try:
    fh = open(options.chrommap)
    chrom_map = load_chrom_map(fh)
  except:
    print "Unable to open", options.chrommap
    exit()

  for line in sys.stdin:
    count += 1
    line = line.strip()
    if (line.startswith('#')):
      print line
    else:
      vcfr = VCFrecord(line)
      strchrom = vcfr.get_chr()
      try:
        vcfr.set_chr(chrom_map[strchrom])
      except:
        logging.info("Chromosome not found in map %s, %s" % (options.chrommap, strchrom))
        exit()
      print vcfr.get_record()

  return count
Beispiel #3
0
def main():
  mafh = Mafhelper()
  hweh = Hwehelper()
  in_count = 0
  hdr_count = 0
  homr_total = 0
  het_total = 0
  homa_total = 0
  virt_nc_total = 0
  miss_total = 0

  print "SNPId,AssayType,chr,pos,REF,ALT,Minor,MAF,CallRate,HWE_pval"

  for line in sys.stdin:
    line = line.strip()
    in_count += 1
    if line.startswith("#"):
      hdr_count += 1
      continue


    vcfr = VCFrecord(line)
    varid = vcfr.get_varid_ukb()
    chromosome = vcfr.get_chr()
    posn = vcfr.get_posn_as_int()
    ref, alt = vcfr.get_alleles()
    homref_count, het_count, homalt_count, virt_nc_count, miss_count = vcfr.get_allele_counts()
    call_count = homref_count + het_count + homalt_count
    #nocall_count = virt_nc_count + miss_count
    nocall_count = virt_nc_count
    call_rate = float(call_count) / float(call_count + nocall_count)
    homr_total += homref_count
    het_total += het_count
    homa_total += homalt_count
    virt_nc_total += virt_nc_count
    miss_total += miss_count
    try:
      hwe = hweh.HWE_exact(het_count, homref_count, homalt_count, call_count)
      maf, ma = mafh.maf(het_count, homref_count, ref, homalt_count, alt, virt_nc_count)
    except ZeroDivisionError:
      logging.info("DIV 0 error at %d (%d), where hom_r=%d, het=%d, home_a=%d, cc=%d", in_count, posn, homref_count, het_count, homalt_count, call_count)
    print "%s,combo,%s,%d,%s,%s,%s,%s,%.3f,%s" % (varid, chromosome, posn, ref, alt, ma, maf, call_rate, hwe)
  return in_count, hdr_count, homr_total, het_total, homa_total, virt_nc_total, miss_total