Exemple #1
0
def main(args):

    inf1 = None
    if re.search('\.gz$', args.depth_bed):
        inf1 = gzip.open(args.depth_bed)
    else:
        inf1 = open(args.depth_bed)
    inf2 = None
    if re.search('\.gz$', args.feature_bed):
        inf2 = gzip.open(args.feature_bed)
    else:
        inf2 = open(args.feature_bed)

    of = sys.stdout
    if args.output:
        if re.search('\.gz$', args.output):
            of = gzip.open(args.output, 'w')
        else:
            of = open(args.output, 'w')
    bs1 = BedStream(inf1)
    bs2 = BedStream(inf2)
    mls = MultiLocusStream([bs1, bs2])
    for overlapped in mls:
        [b1s, b2s] = overlapped.get_payload()
        if len(b1s) == 0 or len(b2s) == 0: continue
        for b1 in b1s:
            m = union_range_array(b1, b2s, is_sorted=True)
            for rng in m:
                of.write("\t".join([str(x) for x in rng.get_bed_array()]) +
                         "\t" + b1.get_payload() + "\n")
    of.close()
def main():
  parser = argparse.ArgumentParser(description="",formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument('input',help="Use - for STDIN")
  parser.add_argument('-o','--output',help="output file or use STDOUT if not set")
  args = parser.parse_args()
  
  if args.input == '-':
    args.input = sys.stdin
  else: args.input = open(args.input)
  gs = GPDStream(args.input)
  ls = LocusStream(gs)
  of = sys.stdout
  if args.output:
    if re.search('\.gz$',args.output): 
      of = gzip.open(args.output,'w')
    else:
      of = open(args.output,'w')
  for rng in ls:
    sys.stderr.write(rng.get_range_string()+"    \r")
    gpds = rng.get_payload()
    exs = []
    for ex_set in [[y.get_range() for y in x.exons] for x in gpds]:
      exs += ex_set
    cov = ranges_to_coverage(exs)
    #use our coverage data on each gpd entry now
    for gpd in gpds:
      totcov = 0
      for exon in [x.get_range() for x in gpd.exons]:
        gcovs = union_range_array(exon,cov,payload=2)
        totcov += sum([x.get_payload()*x.length() for x in gcovs])
      of.write(gpd.get_gene_name()+"\t"+str(gpd.get_exon_count())+"\t"+str(gpd.get_length())+"\t"+str(float(totcov)/float(gpd.get_length()))+"\n")
  sys.stderr.write("\n")
  of.close()
Exemple #3
0
def main():
    parser = argparse.ArgumentParser(
        description="", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('input', help="Use - for STDIN")
    parser.add_argument('-o',
                        '--output',
                        help="output file or use STDOUT if not set")
    args = parser.parse_args()

    if args.input == '-':
        args.input = sys.stdin
    else:
        args.input = open(args.input)
    gs = GPDStream(args.input)
    ls = LocusStream(gs)
    of = sys.stdout
    if args.output:
        if re.search('\.gz$', args.output):
            of = gzip.open(args.output, 'w')
        else:
            of = open(args.output, 'w')
    for rng in ls:
        sys.stderr.write(rng.get_range_string() + "    \r")
        gpds = rng.get_payload()
        exs = []
        for ex_set in [[y.get_range() for y in x.exons] for x in gpds]:
            exs += ex_set
        cov = ranges_to_coverage(exs)
        #use our coverage data on each gpd entry now
        for gpd in gpds:
            totcov = 0
            for exon in [x.get_range() for x in gpd.exons]:
                gcovs = union_range_array(exon, cov, payload=2)
                totcov += sum([x.get_payload() * x.length() for x in gcovs])
            of.write(gpd.get_gene_name() + "\t" + str(gpd.get_exon_count()) +
                     "\t" + str(gpd.get_length()) + "\t" +
                     str(float(totcov) / float(gpd.get_length())) + "\n")
    sys.stderr.write("\n")
    of.close()
def main():
  parser = argparse.ArgumentParser(description="For every gpd entry (sorted) intersect it with bed depth (sorted)",formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument('gpd_input',help="GPD file")
  parser.add_argument('bed_depth_input',help="GPD file")
  parser.add_argument('-o','--output',help="output file")
  args = parser.parse_args()
  
  inf1 = None
  if re.search('\.gz$',args.gpd_input):
    inf1 = gzip.open(args.gpd_input)
  else:
    inf1 = open(args.gpd_input)
  inf2 = None
  if re.search('\.gz$',args.bed_depth_input):
    inf2 = gzip.open(args.bed_depth_input)
  else:
    inf2 = open(args.bed_depth_input)
  gs = GPDStream(inf1)
  bs = BedStream(inf2)
  of = sys.stdout
  if args.output:
    if re.search('\.gz$',args.output):
      of = gzip.open(args.output,'w')
    else:
      of = open(args.output,'w')
  mls = MultiLocusStream([gs,bs])
  z = 0
  for ml in mls:
    z += 1
    #if z%1000 == 0:
    sys.stderr.write(ml.get_range_string()+"       \r")
    [gpds,beds] = ml.get_payload()
    if len(gpds) == 0: 
      continue
    if len(beds)==0:
      for gpd in gpds:
        of.write(gpd.get_gene_name()+"\t"+gpd.get_transcript_name()+"\t"+str(gpd.get_exon_count())+"\t"+str(gpd.get_length())+"\t0\t0\t0"+"\n")
      continue
    #break beds up by depth
    #depths = {}
    #for bed in beds:
    #  d = int(bed.get_payload())
    #  if d not in depths: depths[d] = []
    #  depths[d].append(bed)
    #for gpd in gpds:
    #  clen = 0
    #  tot = 0
    #  for d in depths:
    #    covs = []
    #    for ex in [x.get_range() for x in gpd.exons]:
    #      clen += sum([x.overlap_size(ex) for x in depths[d]])
    #      tot += clen*d
    #  of.write(gpd.get_gene_name()+"\t"+gpd.get_transcript_name()+"\t"+str(gpd.get_exon_count())+"\t"+str(gpd.get_length())+"\t"+str(clen)+"\t"+str(float(clen)/float(gpd.get_length()))+"\t"+str(float(tot)/float(gpd.get_length()))+"\n")
    for gpd in gpds:
      covs = []
      for ex in [x.get_range() for x in gpd.exons]:
        c = union_range_array(ex,beds,payload=2)
        covs += c
      clen = sum([x.length() for x in covs if int(x.get_payload())>0])
      tot =  sum([x.length()*int(x.get_payload()) for x in covs])
      of.write(gpd.get_gene_name()+"\t"+gpd.get_transcript_name()+"\t"+str(gpd.get_exon_count())+"\t"+str(gpd.get_length())+"\t"+str(clen)+"\t"+str(float(clen)/float(gpd.get_length()))+"\t"+str(float(tot)/float(gpd.get_length()))+"\n")
  sys.stderr.write("\n")
  of.close()
  inf1.close()
  inf2.close()