Beispiel #1
0
def main():
  if len(sys.argv) != 4:
    drdcommon.error("Wrong # of args", usage)
  pattern  = sys.argv[1]
  re_id    = sys.argv[2] # Regular expression to extract id
  f_dir    = sys.argv[3]

  l_ids    = []
  l        = lambda:defaultdict(l)
  h        = l() # hold all data in mem

  files_to_iterate = drdcommon.files_in_dir(f_dir, pattern)
  sys.stderr.write("# of files to process: " + str(len(files_to_iterate)) + "\n")
  for fn in files_to_iterate:
    try:
      sid = re.search(re_id, fn).group(1)
      l_ids.append(sid)
    except:
      raise(Exception('Problems extracting id using regular expression.'))
    load_data(sid, fn, h)

  # print header
  out("chrm start ")
  for _id in l_ids:
    out("%s " % _id)
  out("\n")

  for chrm, one in h.items():
    for start, two in one.items():
      out("%s %s " % (str(chrm), str(start)))
      for sid, nr in two.items():
        out(str(nr[1]) + " ")
      out("\n")
Beispiel #2
0
def main():
    args = parse_args()
    stream = drdcommon.xopen("-")
    if not drdcommon.data_in_stdin():
        drdcommon.error(usage)
    print Saturation(stream, args.at_least_seen).csv("\t")
    stream.close()
Beispiel #3
0
 def __init__(self, h):
     self.set_attr(h)
     if self.id == '':
         common.error('I cannot create a bam without id ')
     if not self.valid_path():
         common.error('Invalid path [%s] for bam with id = [%s]' %
                      (self.path, self.id))
Beispiel #4
0
def main():
  if len(sys.argv) == 1:
    fd_reads = drdcommon.xopen("-")
    do_work(fd_reads)
    fd_reads.close()
  else:
    drdcommon.error("Incorrect # of params.", usage)
Beispiel #5
0
def main():
    if len(sys.argv) != 3:
        drdcommon.error("Wrong # of args", usage)
    pattern = sys.argv[1]
    re_id = sys.argv[2]  # Regular expression to extract id
    l = lambda: defaultdict(l)
    h = l()
    l_ids = []

    for fn in drdcommon.files_in_dir(".", pattern):
        try:
            sid = re.search(re_id, fn).group(1)
            l_ids.append(sid)
        except:
            raise (
                Exception('Problems extracting id using regular expression.'))
        load_data(sid, fn, h)

    # print header
    sys.stdout.write("chrm coor type num_over_zero ")
    for _id in l_ids:
        sys.stdout.write("%s " % _id)
    print ""

    for t, one in h.items():
        for chrm, two in one.items():
            for coor, three in two.items():
                sys.stdout.write("%s %s %s %s " %
                                 (str(chrm), str(coor), t, len(three)))
                for sid in l_ids:
                    if sid in three:
                        sys.stdout.write(str(three[sid]) + " ")
                    else:
                        sys.stdout.write("0 ")
                print ""
Beispiel #6
0
def main():
    if len(sys.argv) == 1:
        fd_reads = drdcommon.xopen("-")
        do_work(fd_reads)
        fd_reads.close()
    else:
        drdcommon.error("Incorrect # of params.", usage)
Beispiel #7
0
def main():
    if len(sys.argv) == 5:
        fn_sam, fn_hits = sys.argv[1:3]
        min_mapq, pm_hits = [int(i) for i in sys.argv[3:]]
        ValidateChip(fn_sam, fn_hits, min_mapq, pm_hits).do_work()
    else:
        drdcommon.error("Incorrect # of params.", usage)
Beispiel #8
0
def main():
  if len(sys.argv) == 5:
    fn_sam, fn_hits   = sys.argv[1:3]
    min_mapq, pm_hits = [ int(i) for i in sys.argv[3:]]
    ValidateChip(fn_sam, fn_hits, min_mapq, pm_hits).do_work()
  else:
    drdcommon.error("Incorrect # of params.", usage)
Beispiel #9
0
def main():
  if len(sys.argv) != 3:
    drdcommon.error("Wrong # of args", usage)
  pattern  = sys.argv[1]
  re_id    = sys.argv[2] # Regular expression to extract id
  l        = lambda:defaultdict(l)
  h        = l()
  l_ids    = []

  for fn in drdcommon.files_in_dir(".", pattern):
    try:
      sid = re.search(re_id, fn).group(1)
      l_ids.append(sid)
    except:
      raise(Exception('Problems extracting id using regular expression.'))
    load_data(sid, fn, h)

  # print header
  sys.stdout.write("chrm coor type num_over_zero ")
  for _id in l_ids:
    sys.stdout.write("%s " % _id)
  print ""

  for t, one in h.items():
    for chrm, two in one.items():
      for coor, three in two.items():
        sys.stdout.write("%s %s %s %s " % (str(chrm), str(coor), t, len(three)))
        for sid in l_ids:
          if sid in three:
            sys.stdout.write(str(three[sid]) + " ")
          else:
            sys.stdout.write("0 ")
        print ""
Beispiel #10
0
def main():
    if len(sys.argv) != 1:
        drdcommon.error("Wrong # of args", usage)
    if not drdcommon.data_in_stdin():
        drdcommon.error("No data in stdin.", usage)
    fd_vcf = drdcommon.xopen("-")
    do_work(fd_vcf)
    fd_vcf.close()
Beispiel #11
0
def main():
    if len(sys.argv) != 2:
        drdcommon.error("Wrong # of args", usage)
    if not drdcommon.data_in_stdin():
        drdcommon.error("No data in stdin.", usage)
    ratios_stream = drdcommon.xopen("-")
    threshold = float(sys.argv[1])
    CnvStateMachine(ratios_stream, threshold).run()
Beispiel #12
0
def check_input():
    if len(sys.argv) != 3:
        drdcommon.error("Wrong # of args", usage)
    bam_fn, probes_fn = sys.argv[1], sys.argv[2]
    logging.info("bam: %s probes: %s" % (bam_fn, probes_fn))
    if not os.path.isfile(bam_fn):
        drdcommon.error("Invalid bam file.", usage)
    return bam_fn, probes_fn
Beispiel #13
0
    def __load_vcf(self):
        self.vcf = Vcf(self.fd_vcf)
        self.vcf.load_meta_header()

        if self.drop and (not self.coordinates_in_file
                          and self.vcf.num_of_samples < 2):
            drdcommon.error(
                "I need a population level vcf in order to drop species snps.")
Beispiel #14
0
def main():
    if len(sys.argv) == 2:
        pattern = sys.argv[1]
        data = load_data(pattern)
        boxplot(data, title=pattern, y_limit=50)

    else:
        drdcommon.error("Incorrect # of params.", usage)
Beispiel #15
0
def main():
  if len(sys.argv) == 3:
    df = pd.read_table(sys.argv[1])
    title = "MAF CRV"
    labels = ["0-0.05", "0.05-0.1", "0.1-0.15", "0.15-0.2", "0.2-0.25", "0.25-0.3", "0.3-0.35", "0.35-0.4", "0.4-0.45", "0.45-0.5" ]
    drdplots.barplot(df.counts, labels, title, ofn=sys.argv[2])
  else:
    drdcommon.error("Wrong number of args. Need input tsv file and output png.")
Beispiel #16
0
def main():
    if not drdcommon.data_in_stdin():
        drdcommon.error("I need a data stream in stdin.", usage=_usage)
    if not len(sys.argv) == 4:
        drdcommon.error("Wrong number of parameters", usage=_usage)

    title, _xl, _yl = sys.argv[1:]
    x, y = process_data(drdcommon.xopen("-"))
    plot(x, y, title, xlabel=_xl, ylabel=_yl)
Beispiel #17
0
def main():
    if not drdcommon.data_in_stdin():
        drdcommon.error("I need a data stream in stdin.", usage="-")
    if not len(sys.argv) == 2:
        drdcommon.error("Wrong number of parameters", usage="-")

    title = sys.argv[1]
    x, y = process_data(drdcommon.xopen("-"))
    plot(x, y, title, xlabel="genomic window", ylabel="Average Read Depth")
Beispiel #18
0
def main():
    if not drdcommon.data_in_stdin():
        drdcommon.error("I need a data stream in stdin.", usage=_usage)
    if not len(sys.argv) == 4:
        drdcommon.error("Wrong number of parameters", usage=_usage)

    title, _xl, _yl = sys.argv[1:]
    x, y = process_data(drdcommon.xopen("-"))
    plot(x, y, title, xlabel=_xl, ylabel=_yl)
Beispiel #19
0
def main():
    if len(sys.argv) == 4:
        fd_vcf = drdcommon.xopen("-")
        fd_csv = drdcommon.xopen(sys.argv[1])
        do_work(fd_vcf, fd_csv)
        fd_vcf.close()
        fd_csv.close()
    else:
        drdcommon.error("Incorrect # of params.", usage)
Beispiel #20
0
def main():
  if len(sys.argv) == 2:
    drdcommon.error("Wrong # of args", usage)
  if drdcommon.data_in_stdin() == False:
    drdcommon.error("Need data in stdin.", usage)

  fd_vcf = drdcommon.xopen("-")
  do_work(fd_vcf)
  fd_vcf.close()
Beispiel #21
0
def main():
    if len(sys.argv) != 2:
        drdcommon.error("Wrong # of args", usage)
    if not drdcommon.data_in_stdin():
        drdcommon.error("No data in stdin.", usage)
    fd_vcf = drdcommon.xopen("-")
    w_size = int(sys.argv[1])
    do_work(fd_vcf, w_size)
    fd_vcf.close()
Beispiel #22
0
def main():
    if len(sys.argv) != 2:
        drdcommon.error("Wrong # of args", usage)
    if drdcommon.data_in_stdin() == False:
        drdcommon.error("Need data in stdin.", usage)

    min_num_samples = int(sys.argv[1])
    fd_vcf = drdcommon.xopen("-")
    do_work(fd_vcf, min_num_samples)
    fd_vcf.close()
Beispiel #23
0
def main():
  if len(sys.argv) == 3:
    logratios = process_data(drdcommon.xopen("-"))
    bin_nums  = range(1, len(logratios)+1)
    title     = sys.argv[1]
    output_fn = sys.argv[2]
    plot(output_fn,
      bin_nums, logratios, title, xlabel="bin #", ylabel="log2ratios (sample/control)")
  else:
    drdcommon.error("Wrong number of args. <title> <output.filename>")
Beispiel #24
0
def main():
  if len(sys.argv) == 3:
    fd_vcf       = drdcommon.xopen("-")
    fd_pheno_tsv = drdcommon.xopen(sys.argv[1])
    fd_haplo_tsv = drdcommon.xopen(sys.argv[2])
    do_work(fd_vcf, fd_pheno_tsv, fd_haplo_tsv)
    fd_vcf.close()
    fd_pheno_tsv.close()
    fd_haplo_tsv.close()
  else:
    drdcommon.error("Incorrect # of params.", usage)
Beispiel #25
0
def main():
  if len(sys.argv) == 1:
    fd = drdcommon.xopen("-")
    std, counts = process_data(fd)
    title = "std dev freq of var allele ratios"

    drdplots.scatter_plot("std.dist.png",
                          std, log_it(counts, 10),
                          title=title, xlabel="std deviation",
                          ylabel="log10(counts)", dot_size=10)
    fd.close()
  else:
    drdcommon.error("Wrong number of args. Just need std values in stdin.")
Beispiel #26
0
def main():
    if len(sys.argv) == 3:
        logratios = process_data(drdcommon.xopen("-"))
        bin_nums = range(1, len(logratios) + 1)
        title = sys.argv[1]
        output_fn = sys.argv[2]
        plot(output_fn,
             bin_nums,
             logratios,
             title,
             xlabel="bin #",
             ylabel="log2ratios (sample/control)")
    else:
        drdcommon.error("Wrong number of args. <title> <output.filename>")
Beispiel #27
0
def main():
    if len(sys.argv) == 1:
        fd = drdcommon.xopen("-")
        std, counts = process_data(fd)
        title = "std dev freq of var allele ratios"

        drdplots.scatter_plot("std.dist.png",
                              std,
                              log_it(counts, 10),
                              title=title,
                              xlabel="std deviation",
                              ylabel="log10(counts)",
                              dot_size=10)
        fd.close()
    else:
        drdcommon.error("Wrong number of args. Just need std values in stdin.")
Beispiel #28
0
def main():
    if len(sys.argv) != 2:
        drdcommon.error("Wrong # of args", usage)
    if not drdcommon.data_in_stdin():
        drdcommon.error("No data in stdin.", usage)
    windows = drdcommon.xopen("-")
    bam_name = sys.argv[1]
    if not os.path.isfile(bam_name):
        drdcommon.error("Invalid bam file.", usage)
    compute_ratios(windows, bam_name)
Beispiel #29
0
def main():
    if len(sys.argv) != 4:
        drdcommon.error("Wrong # of args", usage)
    n_reads, bam_name, target_chrm = int(sys.argv[1]), sys.argv[2], sys.argv[3]
    if not os.path.isfile(bam_name):
        drdcommon.error("Invalid bam file.", usage)
    samfile = pysam.Samfile(bam_name, "rb")
    chrms, chrm_name_to_length = gen_chrm_lenghts(samfile)
    if not target_chrm in chrm_name_to_length:
        samfile.close()
        drdcommon.error("Chrm not present in bam header.", usage)
    print_bins(samfile, n_reads, bam_name, target_chrm, chrm_name_to_length)
    samfile.close()
Beispiel #30
0
def main():
  if len(sys.argv) != 4:
    drdcommon.error("Wrong # of args", usage)
  n_reads, bam_name, target_chrm = int(sys.argv[1]), sys.argv[2], sys.argv[3]
  if not os.path.isfile(bam_name):
    drdcommon.error("Invalid bam file.", usage)
  samfile = pysam.Samfile(bam_name, "rb")
  chrms, chrm_name_to_length = gen_chrm_lenghts(samfile)
  if not target_chrm in chrm_name_to_length:
    samfile.close()
    drdcommon.error("Chrm not present in bam header.", usage)
  print_bins(samfile, n_reads, bam_name, target_chrm, chrm_name_to_length)
  samfile.close()
Beispiel #31
0
def main():
  if len(sys.argv) == 3:
    sam_fn, pattern_fn_hits = sys.argv[1:]
    JoinData(sam_fn, pattern_fn_hits).do_work()
  else:
    drdcommon.error("Incorrect # of params.", usage)
Beispiel #32
0
 def __init__(self, h):
   self.set_attr(h)
   if self.id == '':
     common.error('I cannot create a bam without id ')
   if not self.valid_path():
     common.error('Invalid path [%s] for bam with id = [%s]' % (self.path, self.id))
Beispiel #33
0
 def __init__(self, fn):
   self.df = pd.read_table(fn)
   for c in self.MANDATORY_COLS:
     if c not in self.df.columns:
       common.error("I couldn't find column [%s] in project tsv" % c)
Beispiel #34
0
def main():
  if len(sys.argv) == 2:
    pattern = sys.argv[1]
    do_work(pattern)
  else:
    drdcommon.error("Incorrect # of params.", usage)
Beispiel #35
0
def main():
    if len(sys.argv) == 2:
        pattern = sys.argv[1]
        do_work(pattern)
    else:
        drdcommon.error("Incorrect # of params.", usage)
Beispiel #36
0
  def __load_vcf(self):
    self.vcf = Vcf(self.fd_vcf)
    self.vcf.load_meta_header()

    if self.drop and (not self.coordinates_in_file and self.vcf.num_of_samples < 2):
      drdcommon.error("I need a population level vcf in order to drop species snps.")
Beispiel #37
0
  def check_logic(self):
    options, args = self.options, self.args

    if not options.vcf_fn:
      drdcommon.error("Need vcf file.")

    if options.vcf_fn == '-' and not drdcommon.data_in_stdin():
      drdcommon.error("No data in stdin.")

    if not options.vcf_fn == '-' and not os.path.isfile(options.vcf_fn):
      drdcommon.error("Vcf file does not exists.")

    if options.coor_fn:
      if options.coor_fn == '-' and options.vcf_fn == '-':
        drdcommon.error("I cannot read two streams from stdin.")
      if not os.path.isfile(options.coor_fn):
        drdcommon.error("coor file does not exists.")
      options.drop = True
      options.coordinates_in_file = True
    else:
      options.coordinates_in_file = False

    if len(args) == 0:
      if options.wes:
        self.exp_type = 'wes'
      elif options.wgs:
        self.exp_type = 'wgs'
      else:
        self.exp_type = 'null'
        if not self.options.list_s_snps:
          drdcommon.error("Experiment type not set.")
    else:
      drdcommon.error("Incorrect # of params.")
Beispiel #38
0
 def __init__(self, fn):
     self.df = pd.read_table(fn)
     for c in self.MANDATORY_COLS:
         if c not in self.df.columns:
             common.error("I couldn't find column [%s] in project tsv" % c)
Beispiel #39
0
    def check_logic(self):
        options, args = self.options, self.args

        if not options.vcf_fn:
            drdcommon.error("Need vcf file.")

        if options.vcf_fn == '-' and not drdcommon.data_in_stdin():
            drdcommon.error("No data in stdin.")

        if not options.vcf_fn == '-' and not os.path.isfile(options.vcf_fn):
            drdcommon.error("Vcf file does not exists.")

        if options.coor_fn:
            if options.coor_fn == '-' and options.vcf_fn == '-':
                drdcommon.error("I cannot read two streams from stdin.")
            if not os.path.isfile(options.coor_fn):
                drdcommon.error("coor file does not exists.")
            options.drop = True
            options.coordinates_in_file = True
        else:
            options.coordinates_in_file = False

        if len(args) == 0:
            if options.wes:
                self.exp_type = 'wes'
            elif options.wgs:
                self.exp_type = 'wgs'
            else:
                self.exp_type = 'null'
                if not self.options.list_s_snps:
                    drdcommon.error("Experiment type not set.")
        else:
            drdcommon.error("Incorrect # of params.")
Beispiel #40
0
def main():
    if len(sys.argv) == 3:
        sam_fn, pattern_fn_hits = sys.argv[1:]
        JoinData(sam_fn, pattern_fn_hits).do_work()
    else:
        drdcommon.error("Incorrect # of params.", usage)