Exemple #1
0
def extract_unaligned_lomem(b6_fname, in_fname, out_fname):
    # extracts unaligned reads using a small memory footprint--probably the
    # smallest possible that doesnt require reads to be sorted

    b6_cache = set([])
    in_cache = {}

    with open(b6_fname, "r") as b6_fp, \
         open(in_fname, "r") as in_fp, \
         open(out_fname, "w") as out_fp:
        for b6_l, in_r in izip_longest(b6_fp,
                                       fast_fastq(in_fp),
                                       fillvalue=False):
            try:
                b6_l = b6_l.split("\t", 1)
                del in_cache[b6_l[0]]
            except KeyError:
                b6_cache.add(b6_l[0])
            except AttributeError:
                pass

            try:
                b6_cache.remove(in_r.id)
            except KeyError:
                in_cache[in_r.id] = in_r.raw()
            except AttributeError:
                pass

        # dump all the reads that remain
        out_fp.write("".join(in_cache.values()))
Exemple #2
0
def split_file(input_fp, fps, dests, formats, read_index, stats):
  for read in fast_fastq(input_fp):
    sample = read.id.split("_")[0]

    try:
      read_dest = dests[sample]
    except KeyError:
      #sys.stderr.write("read: %s notfound\n" % read.id)
      stats["notfound"]["matched"] += 1
      fps["notfound"][read_index].write(read.raw())
      continue

    # fastq_or_fasta min_read_length max_read_length regexp
    #sys.stderr.write("read: %s -> %s\n" % (read.id, read_dest))
    read_format = formats[read_dest]

    # read too short
    if read_format[1] != -1 and len(read.sequence) < read_format[1]:
      stats[read_dest]["short"] += 1
      #sys.stderr.write("  len(read) = %s < %s\n" % (len(read.sequence), read_format[1]))
      continue

    # read too long
    if read_format[2] != -1 and len(read.sequence) > read_format[2]:
      stats[read_dest]["long"] += 1
      #sys.stderr.write("  len(read) = %s > %s\n" % (len(read.sequence), read_format[2]))
      continue

    # read didn't match regular expression
    if read_format[3]:
      if not read_format[3].match(read.sequence):
        stats[read_dest]["regexp"] += 1
        #sys.stderr.write("  regexp match failed\n")
        continue

    # convert to fasta or don't
    if read_format[0] == "fasta":
      #sys.stderr.write("  convert to fasta\n")
      raw_read = ">%s\n%s\n" % (read.id, read.sequence)
    else:
      raw_read = read.raw()

    # write it out
    stats[read_dest]["matched"] += 1
    #sys.stderr.write("  write to %s\n\n" % fps[read_dest][read_index])
    fps[read_dest][read_index].write(raw_read) 
Exemple #3
0
def pool_unzip(fname_list, sample_list, status_msg):
  # accepts a list of gzip'd filenames and combines them all into
  # a single unzipped FASTQ, renaming reads to samples

  pooled_file = tempfile.NamedTemporaryFile(delete=False)

  for fname, sample in zip(fname_list, sample_list):
    sys.stderr.write("\rPooling %s -> %s: Sample %s" % (status_msg, pooled_file.name, sample))

    read_num = 1

    for read in fast_fastq(gzip.GzipFile(fname).readlines()):
      read.id = "%s_%s" % (sample, read_num)
      pooled_file.write(read.raw())
      read_num += 1

  sys.stderr.write("\n")

  return pooled_file
Exemple #4
0
def pool_unzip(fname_list, sample_list, status_msg):
    # accepts a list of gzip'd filenames and combines them all into
    # a single unzipped FASTQ, renaming reads to samples

    pooled_file = tempfile.NamedTemporaryFile(delete=False)

    for fname, sample in zip(fname_list, sample_list):
        sys.stderr.write("\rPooling %s -> %s: Sample %s" %
                         (status_msg, pooled_file.name, sample))

        read_num = 1

        for read in fast_fastq(gzip.GzipFile(fname).readlines()):
            read.id = "%s_%s" % (sample, read_num)
            pooled_file.write(read.raw())
            read_num += 1

    sys.stderr.write("\n")

    return pooled_file
Exemple #5
0
def main():
  parse_options(sys.argv[1:])

  # load barcodes
  fwd_bcs, rev_bcs = load_barcodes(args[0])

  if options.use_plate:
    plate = load_plate(args[1])
    barcode_to_sample = map_bc_to_sample(plate, fwd_bcs, rev_bcs)

  if options.zip_fname:
    # open zipfile
    miseq_zip = zipfile.ZipFile(options.zip_fname)

    # find pairs
    pairs = dict([("_".join(x.split("/")[-1].split(".")[0].split("_")[:-2]), (x, y)) for x, y in find_pairs(miseq_zip)])

    if options.sample_name not in pairs:
      raise ValueError("Could not find %s in %s!" % (options.sample_name, options.zip_fname))

    # extract read files
    sys.stderr.write("Extracting reads from zipfile...\n")

    fwd = fetch_unzip(miseq_zip, pairs[options.sample_name][0])
    rev = fetch_unzip(miseq_zip, pairs[options.sample_name][1])
  elif options.fwd_fname and options.rev_fname:
    fwd = open(options.fwd_fname, "r")
    rev = open(options.rev_fname, "r")

  barcode_to_count = {}

  if options.merge or options.merged_fname:
    with open(os.path.join(options.output_dir, "merged_reads.assigned.fastq"), "w") as assigned, \
         open(os.path.join(options.output_dir, "merged_reads.unassigned.fastq"), "w") as unassigned:
      read_length_bins = {}

      total_reads = 0.0
      quality_reads = 0.0

      if options.merge:
        # run pear to merge reads
        sys.stderr.write("Merging reads...\n")
        merged, stats, raw_pear_log = pear(fwd.name, rev.name, options.mem_size, options.num_threads)

        open(os.path.join(options.output_dir, "pear.log"), "w").write(raw_pear_log)

        if stats["assembled_reads"] < options.min_merged_perc:
          sys.stderr.write("  Warning: only %.02f%% of reads assembled\n" % stats["assembled_reads"])

        merged_fastq = fast_fastq(open("%s.assembled.fastq" % merged.name, "r"))
      elif options.merged_fname:
        merged_fastq = fast_fastq(open(options.merged_fname, "r"))

      # read through fastq
      sys.stderr.write("Filtering and demultiplexing reads...\n")

      for merged_read in merged_fastq:
        total_reads += 1

        # check that read passes quality filter
        if not qual_filter(merged_read, options.min_qual, options.phred_offset, options.max_errors):
          continue

        # keep track of stats
        quality_reads += 1

        try:
          read_length_bins[len(merged_read.sequence)] += 1
        except KeyError:
          read_length_bins[len(merged_read.sequence)] = 1

        # demultiplex
        dm_out = demultiplex(merged_read, fwd_bcs.values(), None, rev_bcs.values(), options.max_mismatch)

        if dm_out == False:
          # strip pair info from read and write to unassigned file
          merged_read.id = merged_read.id.split(" ")[0]
          unassigned.write(merged_read.raw())
        else:
          # rename read to barcodes used and write to assigned file
          trimmed_read, _, f_bc, r_bc = dm_out

          bc_name = "%s_%s" % (f_bc, r_bc)

          try:
            barcode_to_count[bc_name] += 1
          except KeyError:
            barcode_to_count[bc_name] = 1

          # we want to rename to sample names, and a barcode was found to match
          # something in barcodes.txt, but that particular barcode is not in use
          # in our plate layout. in this case, ignore this read
          if options.use_plate and bc_name not in barcode_to_sample:
            # strip pair info from read and write to unassigned file
            merged_read.id = merged_read.id.split(" ")[0]
            unassigned.write(merged_read.raw())

            continue

          if options.use_plate:
            trimmed_read.id = "%s_%s" % (barcode_to_sample[bc_name], barcode_to_count[bc_name])
          else:
            trimmed_read.id = "%s_%s" % (bc_name, barcode_to_count[bc_name])

          assigned.write(trimmed_read.raw())

      if total_reads > 0:
        if quality_reads / total_reads < options.min_qual_perc:
          sys.stderr.write("  Warning: only %.02f%% of reads passed quality filter\n" % (quality_reads * 100 / total_reads))

      sys.stderr.write("\nSummary")
      sys.stderr.write("\n  Total reads:       %d" % total_reads)
      sys.stderr.write("\n  Quality reads:     %d" % quality_reads)
      sys.stderr.write("\n  Min read length:   %d" % min(read_length_bins.keys()))
      sys.stderr.write("\n  Mean read length:  %d" % mean(read_length_bins.keys()))
      sys.stderr.write("\n  Max read length:   %d\n" % max(read_length_bins.keys()))
      sys.stderr.write("\n  Assigned reads:    %d" % sum(barcode_to_count.values()))
      sys.stderr.write("\n  Unassigned reads:  %d" % (quality_reads - sum(barcode_to_count.values())))
      sys.stderr.write("\n  Avg reads/barcode: %d\n" % mean(barcode_to_count.values()))

      with open(os.path.join(options.output_dir, "read_lengths.log"), "w") as fp:
        for read_length, count in sorted(read_length_bins.items(), key=lambda x: x[0], reverse=True):
          fp.write("%s\t%s\n" % (read_length, count))

      if options.merge:
        # remove temporary files
        os.unlink(merged.name)
        os.unlink("%s.assembled.fastq" % merged.name)
        os.unlink("%s.discarded.fastq" % merged.name)
        os.unlink("%s.unassembled.forward.fastq" % merged.name)
        os.unlink("%s.unassembled.reverse.fastq" % merged.name)
  else:
    with open(os.path.join(options.output_dir, "fwd_reads.assigned.fastq"), "w") as f_assigned, \
         open(os.path.join(options.output_dir, "rev_reads.assigned.fastq"), "w") as r_assigned, \
         open(os.path.join(options.output_dir, "fwd_reads.unassigned.fastq"), "w") as f_unassigned, \
         open(os.path.join(options.output_dir, "rev_reads.unassigned.fastq"), "w") as r_unassigned:
      total_reads = 0.0
      quality_reads = 0.0

      # read through foward and reverse fastq simultaneously
      sys.stderr.write("Filtering and demultiplexing reads...\n")

      for f_read, r_read in izip(fast_fastq(open(fwd.name, "r")), fast_fastq(open(rev.name, "r"))):
        total_reads += 1

        # check that read passes quality filter
        if qual_filter(f_read, options.min_qual, options.phred_offset, options.max_errors) == False or \
           qual_filter(r_read, options.min_qual, options.phred_offset, options.max_errors) == False:
          continue

        quality_reads += 1

        # demultiplex
        dm_out = demultiplex(f_read, fwd_bcs.values(), r_read, rev_bcs.values(), options.max_mismatch)

        if dm_out == False:
          # strip pair info from read and write to unassigned file
          f_read.id = f_read.id.split(" ")[0]
          f_unassigned.write(f_read.raw())

          r_read.id = r_read.id.split(" ")[0]
          r_unassigned.write(r_read.raw())
        else:
          # rename reads to barcodes used and write to assigned file
          trimmed_f_read, trimmed_r_read, f_bc, r_bc = dm_out

          bc_name = "%s_%s" % (f_bc, r_bc)

          try:
            barcode_to_count[bc_name] += 1
          except KeyError:
            barcode_to_count[bc_name] = 1

          # we want to rename to sample names, and a barcode was found to match
          # something in barcodes.txt, but that particular barcode is not in use
          # in our plate layout. in this case, ignore this read
          if options.use_plate and bc_name not in barcode_to_sample:
            # strip pair info from read and write to unassigned file
            f_read.id = f_read.id.split(" ")[0]
            f_unassigned.write(f_read.raw())

            r_read.id = r_read.id.split(" ")[0]
            r_unassigned.write(r_read.raw())

            continue

          if options.use_plate:
            trimmed_f_read.id = "%s_%s" % (barcode_to_sample[bc_name], barcode_to_count[bc_name])
          else:
            trimmed_f_read.id = "%s_%s" % (bc_name, barcode_to_count[bc_name])

          f_assigned.write(trimmed_f_read.raw())

          if options.use_plate:
            trimmed_r_read.id = "%s_%s" % (barcode_to_sample[bc_name], barcode_to_count[bc_name])
          else:
            trimmed_r_read.id = "%s_%s" % (bc_name, barcode_to_count[bc_name])

          r_assigned.write(trimmed_r_read.raw())

      if total_reads > 0:
        if quality_reads / total_reads < options.min_qual_perc:
          sys.stderr.write("  Warning: only %.02f%% of reads passed quality filter\n" % (quality_reads * 100 / total_reads))

      sys.stderr.write("\nSummary")
      sys.stderr.write("\n  Total pairs:       %d" % total_reads)
      sys.stderr.write("\n  Quality pairs:     %d" % quality_reads)
      sys.stderr.write("\n  Assigned pairs:    %d" % sum(barcode_to_count.values()))
      sys.stderr.write("\n  Unassigned pairs:  %d" % (quality_reads - sum(barcode_to_count.values())))
      sys.stderr.write("\n  Avg pairs/barcode: %d\n" % int(mean(barcode_to_count.values())))

  sorted_barcode_to_count = sorted(barcode_to_count.items(), key=lambda x: x[1], reverse=True)

  with open(os.path.join(options.output_dir, "barcode_to_count.log"), "w") as fp:
    if options.use_plate:
      for barcode, count in sorted_barcode_to_count:
        try:
          sample_name = barcode_to_sample[barcode]
        except KeyError:
          sample_name = "BARCODE_NOT_IN_PLATE_LAYOUT"

        fp.write("%s\t%s\t%s\n" % (barcode, sample_name, count))
    else:
      for barcode, count in sorted_barcode_to_count:
        fp.write("%s\t%s\n" % (barcode, count))

  # remove temporary files
  if options.zip_fname:
    os.unlink(fwd.name)
    os.unlink(rev.name)
Exemple #6
0
def main():
    parse_options(sys.argv[1:])

    # load barcodes
    if options.dir_name == False:
        fwd_bcs, rev_bcs = load_barcodes(args[0])

    if options.dir_name == False and options.use_plate:
        plate = load_plate(args[1])
        barcode_to_sample = map_bc_to_sample(plate, fwd_bcs, rev_bcs)

    if options.zip_fname and options.sample_name:
        # open zipfile
        miseq_zip = zipfile.ZipFile(options.zip_fname)

        # find pairs
        pairs = dict([
            ("_".join(x.split("/")[-1].split(".")[0].split("_")[:-2]), (x, y))
            for x, y in find_pairs(miseq_zip)
        ])

        if options.sample_name not in pairs:
            raise ValueError("Could not find %s in %s!" %
                             (options.sample_name, options.zip_fname))

        # extract read files
        sys.stderr.write("Extracting reads from zipfile...\n")

        fwd = fetch_unzip(miseq_zip, pairs[options.sample_name][0])
        rev = fetch_unzip(miseq_zip, pairs[options.sample_name][1])
    elif options.fwd_fname and options.rev_fname:
        fwd = open(options.fwd_fname, "r")
        rev = open(options.rev_fname, "r")
    elif options.dir_name and options.multi_plates:
        # find pairs in directory
        fwd_files, rev_files, sample_list = find_subdirs(
            options.dir_name, options.multi_plates)

        # combine all desired files into new fwd and rev files
        fwd = pool_unzip(fwd_files, sample_list, "Read 1")
        rev = pool_unzip(rev_files, sample_list, "Read 2")

    barcode_to_count = {}

    if options.merge or options.merged_fname:
        with open(os.path.join(options.output_dir, "merged_reads.assigned.fastq"), "w") as assigned, \
             open(os.path.join(options.output_dir, "merged_reads.unassigned.fastq"), "w") as unassigned:
            read_length_bins = {}

            total_reads = 0.0
            quality_reads = 0.0
            nbj_trim_fail = 0.0

            if options.merge:
                # run pear to merge reads
                sys.stderr.write("Merging reads...\n")
                merged, stats, raw_pear_log = pear(fwd.name, rev.name,
                                                   options.mem_size,
                                                   options.num_threads)

                open(os.path.join(options.output_dir, "pear.log"),
                     "w").write(raw_pear_log)

                if stats["assembled_reads"] < options.min_merged_perc:
                    sys.stderr.write(
                        "  Warning: only %.02f%% of reads assembled\n" %
                        stats["assembled_reads"])

                merged_fastq = fast_fastq(
                    open("%s.assembled.fastq" % merged.name, "r"))
            elif options.merged_fname:
                merged_fastq = fast_fastq(open(options.merged_fname, "r"))

            # read through fastq
            sys.stderr.write("Filtering and demultiplexing reads...\n")

            for merged_read in merged_fastq:
                total_reads += 1

                # check that read passes quality filter
                if not qual_filter(merged_read, options.min_qual,
                                   options.phred_offset, options.max_errors):
                    continue

                # keep track of stats
                quality_reads += 1

                try:
                    read_length_bins[len(merged_read.sequence)] += 1
                except KeyError:
                    read_length_bins[len(merged_read.sequence)] = 1

                if options.dir_name:
                    sample = merged_read.id.split("_")[0]

                    # renumber reads so that they refer to new merged reads
                    try:
                        barcode_to_count[sample] += 1
                    except KeyError:
                        barcode_to_count[sample] = 1

                    merged_read.id = "%s_%s" % (sample,
                                                barcode_to_count[sample])

                    if options.trim_nbj:
                        nbj_read = trim_primers(merged_read,
                                                NBJ_V3V4_FWD_PRIMER,
                                                NBJ_V3V4_REV_PRIMER,
                                                options.max_mismatch)

                        if nbj_read:
                            assigned.write(nbj_read.raw())
                        else:
                            nbj_trim_fail += 1
                            merged_read.id = "%s-NBJTRIMFAIL" % merged_read.id
                            unassigned.write(merged_read.raw())
                    else:
                        assigned.write(merged_read.raw())
                else:
                    # demultiplex
                    dm_out = demultiplex(merged_read, fwd_bcs.values(), None,
                                         rev_bcs.values(),
                                         options.max_mismatch)

                    if dm_out == False:
                        # strip pair info from read and write to unassigned file
                        merged_read.id = merged_read.id.split(" ")[0]
                        unassigned.write(merged_read.raw())
                    else:
                        # rename read to barcodes used and write to assigned file
                        trimmed_read, _, f_bc, r_bc = dm_out

                        bc_name = "%s_%s" % (f_bc, r_bc)

                        try:
                            barcode_to_count[bc_name] += 1
                        except KeyError:
                            barcode_to_count[bc_name] = 1

                        # we want to rename to sample names, and a barcode was found to match
                        # something in barcodes.txt, but that particular barcode is not in use
                        # in our plate layout. in this case, ignore this read
                        if options.use_plate and bc_name not in barcode_to_sample:
                            # strip pair info from read and write to unassigned file
                            merged_read.id = merged_read.id.split(" ")[0]
                            unassigned.write(merged_read.raw())

                            continue

                        if options.use_plate:
                            trimmed_read.id = "%s_%s" % (
                                barcode_to_sample[bc_name],
                                barcode_to_count[bc_name])
                        else:
                            trimmed_read.id = "%s_%s" % (
                                bc_name, barcode_to_count[bc_name])

                        if options.trim_nbj:
                            nbj_read = trim_primers(trimmed_read,
                                                    NBJ_V3V4_FWD_PRIMER,
                                                    NBJ_V3V4_REV_PRIMER,
                                                    options.max_mismatch)

                            if nbj_read:
                                assigned.write(nbj_read.raw())
                            else:
                                nbj_trim_fail += 1
                                trimmed_read.id = "%s-NBJTRIMFAIL" % trimmed_read.id
                                unassigned.write(trimmed_read.raw())
                        else:
                            assigned.write(trimmed_read.raw())

            if total_reads > 0:
                if quality_reads / total_reads < options.min_qual_perc:
                    sys.stderr.write(
                        "  Warning: only %.02f%% of reads passed quality filter\n"
                        % (quality_reads * 100 / total_reads))

            sys.stderr.write("\nSummary")
            sys.stderr.write("\n  Total reads:       %d" % total_reads)
            sys.stderr.write("\n  Quality reads:     %d" % quality_reads)

            if options.trim_nbj:
                sys.stderr.write("\n  Failed NBJ Trim:   %d" % nbj_trim_fail)

            sys.stderr.write("\n  Min read length:   %d" %
                             min(read_length_bins.keys()))
            sys.stderr.write("\n  Mean read length:  %d" %
                             mean(read_length_bins.keys()))
            sys.stderr.write("\n  Max read length:   %d\n" %
                             max(read_length_bins.keys()))
            sys.stderr.write("\n  Assigned reads:    %d" %
                             sum(barcode_to_count.values()))
            sys.stderr.write("\n  Unassigned reads:  %d" %
                             (quality_reads - sum(barcode_to_count.values())))
            sys.stderr.write("\n  Avg reads/barcode: %d\n" %
                             mean(barcode_to_count.values()))

            with open(os.path.join(options.output_dir, "read_lengths.log"),
                      "w") as fp:
                for read_length, count in sorted(read_length_bins.items(),
                                                 key=lambda x: x[0],
                                                 reverse=True):
                    fp.write("%s\t%s\n" % (read_length, count))

            if options.merge:
                # remove temporary files
                os.unlink(merged.name)
                os.unlink("%s.assembled.fastq" % merged.name)
                os.unlink("%s.discarded.fastq" % merged.name)
                os.unlink("%s.unassembled.forward.fastq" % merged.name)
                os.unlink("%s.unassembled.reverse.fastq" % merged.name)
    else:
        with open(os.path.join(options.output_dir, "fwd_reads.assigned.fastq"), "w") as f_assigned, \
             open(os.path.join(options.output_dir, "rev_reads.assigned.fastq"), "w") as r_assigned, \
             open(os.path.join(options.output_dir, "fwd_reads.unassigned.fastq"), "w") as f_unassigned, \
             open(os.path.join(options.output_dir, "rev_reads.unassigned.fastq"), "w") as r_unassigned:
            total_reads = 0.0
            quality_reads = 0.0

            # read through foward and reverse fastq simultaneously
            sys.stderr.write("Filtering and demultiplexing reads...\n")

            for f_read, r_read in izip(fast_fastq(open(fwd.name, "r")),
                                       fast_fastq(open(rev.name, "r"))):
                total_reads += 1

                # check that read passes quality filter
                if qual_filter(f_read, options.min_qual, options.phred_offset, options.max_errors) == False or \
                   qual_filter(r_read, options.min_qual, options.phred_offset, options.max_errors) == False:
                    continue

                quality_reads += 1

                if options.dir_name:
                    # count reads
                    sample1 = f_read.id.split("_")[0]
                    sample2 = r_read.id.split("_")[0]

                    assert sample1 == sample2

                    try:
                        barcode_to_count[sample1] += 1
                    except KeyError:
                        barcode_to_count[sample1] = 1

                    # just write the filtered reads to their final destination
                    f_assigned.write(f_read.raw())
                    r_assigned.write(r_read.raw())
                else:
                    # demultiplex
                    dm_out = demultiplex(f_read, fwd_bcs.values(), r_read,
                                         rev_bcs.values(),
                                         options.max_mismatch)

                    if dm_out == False:
                        # strip pair info from read and write to unassigned file
                        f_read.id = f_read.id.split(" ")[0]
                        f_unassigned.write(f_read.raw())

                        r_read.id = r_read.id.split(" ")[0]
                        r_unassigned.write(r_read.raw())
                    else:
                        # rename reads to barcodes used and write to assigned file
                        trimmed_f_read, trimmed_r_read, f_bc, r_bc = dm_out

                        bc_name = "%s_%s" % (f_bc, r_bc)

                        try:
                            barcode_to_count[bc_name] += 1
                        except KeyError:
                            barcode_to_count[bc_name] = 1

                        # we want to rename to sample names, and a barcode was found to match
                        # something in barcodes.txt, but that particular barcode is not in use
                        # in our plate layout. in this case, ignore this read
                        if options.use_plate and bc_name not in barcode_to_sample:
                            # strip pair info from read and write to unassigned file
                            f_read.id = f_read.id.split(" ")[0]
                            f_unassigned.write(f_read.raw())

                            r_read.id = r_read.id.split(" ")[0]
                            r_unassigned.write(r_read.raw())

                            continue

                        if options.use_plate:
                            trimmed_f_read.id = "%s_%s" % (
                                barcode_to_sample[bc_name],
                                barcode_to_count[bc_name])
                        else:
                            trimmed_f_read.id = "%s_%s" % (
                                bc_name, barcode_to_count[bc_name])

                        f_assigned.write(trimmed_f_read.raw())

                        if options.use_plate:
                            trimmed_r_read.id = "%s_%s" % (
                                barcode_to_sample[bc_name],
                                barcode_to_count[bc_name])
                        else:
                            trimmed_r_read.id = "%s_%s" % (
                                bc_name, barcode_to_count[bc_name])

                        r_assigned.write(trimmed_r_read.raw())

            if total_reads > 0:
                if quality_reads / total_reads < options.min_qual_perc:
                    sys.stderr.write(
                        "  Warning: only %.02f%% of reads passed quality filter\n"
                        % (quality_reads * 100 / total_reads))

            sys.stderr.write("\nSummary")
            sys.stderr.write("\n  Total pairs:       %d" % total_reads)
            sys.stderr.write("\n  Quality pairs:     %d" % quality_reads)
            sys.stderr.write("\n  Assigned pairs:    %d" %
                             sum(barcode_to_count.values()))
            sys.stderr.write("\n  Unassigned pairs:  %d" %
                             (quality_reads - sum(barcode_to_count.values())))
            sys.stderr.write("\n  Avg pairs/barcode: %d\n" %
                             int(mean(barcode_to_count.values())))

    sorted_barcode_to_count = sorted(barcode_to_count.items(),
                                     key=lambda x: x[1],
                                     reverse=True)

    with open(os.path.join(options.output_dir, "barcode_to_count.log"),
              "w") as fp:
        if options.dir_name == False and options.use_plate:
            for barcode, count in sorted_barcode_to_count:
                try:
                    sample_name = barcode_to_sample[barcode]
                except KeyError:
                    sample_name = "BARCODE_NOT_IN_PLATE_LAYOUT"

                fp.write("%s\t%s\t%s\n" % (barcode, sample_name, count))
        else:
            for barcode, count in sorted_barcode_to_count:
                fp.write("%s\t%s\n" % (barcode, count))

    # remove temporary files
    if options.zip_fname or options.dir_name:
        os.unlink(fwd.name)
        os.unlink(rev.name)