def _collapse(in_file): out_file = append_stem(in_file, ".trimming").replace(".gz", "") if file_exists(out_file): return out_file seqs = collapse(in_file) write_output(out_file, seqs) return out_file
def collapse_fastq(args): """collapse fasq files after adapter trimming """ try: umi_fn = args.fastq if _is_umi(args.fastq): umis = collapse(args.fastq) umi_fn = os.path.join(args.out, splitext_plus(os.path.basename(args.fastq))[0] + "_umi_trimmed.fastq") write_output(umi_fn, umis, args.minimum) seqs = collapse(umi_fn) out_file = splitext_plus(os.path.basename(args.fastq))[0] + "_trimmed.fastq" except IOError as e: logger.error("I/O error({0}): {1}".format(e.errno, e.strerror)) raise "Can not read file" out_file = os.path.join(args.out, out_file) write_output(out_file, seqs, args.minimum) return out_file
def _collapse(in_file): """ Collpase reads into unique sequences with seqcluster """ out_file = append_stem(in_file, ".trimming").replace(".gz", "") if file_exists(out_file): return out_file seqs = collapse(in_file) write_output(out_file, seqs, minimum=1, size=16) return out_file
def test_umis(self): from seqcluster.libs.fastq import collapse, write_output umis = collapse(os.path.abspath("data/examples/umis/sample.fastq")) if len(umis.keys()) != 2: raise ValueError("umis didn't detect two unique sequences") out_dir = "test/test_automated_output" if os.path.exists(out_dir): shutil.rmtree(out_dir) os.mkdir(out_dir) write_output(os.path.join(out_dir, "umis.fastq"), umis)
def collapse_fastq(args): """collapse fasq files after adapter trimming """ try: umi_fn = args.fastq if _is_umi(args.fastq): umis = collapse(args.fastq) umi_fn = os.path.join( args.out, splitext_plus(os.path.basename(args.fastq))[0] + "_umi_trimmed.fastq") write_output(umi_fn, umis, args.minimum) seqs = collapse(umi_fn) out_file = splitext_plus(os.path.basename( args.fastq))[0] + "_trimmed.fastq" except IOError as e: logger.error("I/O error({0}): {1}".format(e.errno, e.strerror)) raise "Can not read file" out_file = os.path.join(args.out, out_file) write_output(out_file, seqs, args.minimum) return out_file
def _collapse(in_file): seqs = collapse(in_file) out_file = append_stem(in_file, ".trimming").replace(".gz", "") write_output(out_file, seqs) return out_file
from seqcluster.libs.fastq import collapse, splitext_plus, write_output from bcbio.distributed.transaction import file_transaction, tx_tmpdir from bcbio.utils import (file_exists, append_stem, replace_directory, symlink_plus, local_path_export) from collections import Counter if __name__ == "__main__": in_file = sys.argv[1] """ Collpase reads into unique sequences with seqcluster """ out_file = append_stem(in_file, ".trimming").replace(".gz", "") #out_file = splitext_plus(os.path.basename(fastq))[0] + ".fq" seqs = collapse(in_file) write_output(out_file, seqs, 1) """ Calculate size distribution after adapter removal """ data = Counter() out_stat_file = out_file + "_size_stats" with open(out_file) as in_handle: for line in in_handle: counts = int(line.strip().split("_x")[1]) line = in_handle.next() l = len(line.strip()) in_handle.next() in_handle.next()