def main(): parser = ArgumentParser() parser.add_argument('-a1', '--aln1', help=".aln file associated with simulated read1") parser.add_argument('-a2', '--aln2', help=".aln file associated with simulated read2") parser.add_argument('-r1', '--reads1', help="trimmed fastq file read1") parser.add_argument('-r2', '--reads2', help="trimmed fastq file read1") parser.add_argument('-l', '--read-length', type=int, default=125) parser.add_argument('-o', '--output', default='-') parser.add_argument('-s', '--summary', default='-') parser.add_argument('-t', '--table', default=None) parser.add_argument("--name", default=None) parser.add_argument("--adapters", nargs=2, default=DEFAULT_ADAPTERS) parser.add_argument("--no-progress", action="store_true", default=False) args = parser.parse_args() with open(args.aln1, 'rt') as a1, open(args.aln2, 'rt') as a2: aln_pair_iterator = zip(aln_iterator(a1), aln_iterator(a2)) with xopen.xopen(args.reads1, 'rt') as r1, xopen.xopen(args.reads2, 'rt') as r2: read_pair_iterator = zip(fq_iterator(r1, 1), fq_iterator(r2, 2)) if not args.no_progress: try: import tqdm aln_pair_iterator = iter(tqdm.tqdm(aln_pair_iterator)) except: print("tqdm library is required for a progress bar") with fileoutput(args.output) as o: w = csv.writer(o, delimiter="\t") w.writerow(( 'read_id','mate','expected_len','actual_len','status','has_adapter', 'adapter_len','adapter_edit_dist','adapter_ins','adapter_del','polyA')) summary = summarize_accuracy(aln_pair_iterator, read_pair_iterator, w, args.read_length, args.adapters) summary_fields = ( "retained reads", "mismatch reads", "discarded reads", "total reads", "reads with adapters", "retained reads with adapters", "non-adapter reads trimmed", "adapter reads untrimmed", "adapter reads undertrimmed", "adapter reads overtrimmed", "total ref bases", "total ref edit distance", "total adapter bases", "total retained adapter bases", "total adapter edit dist", "overtrimmed bases", "undertrimmed bases" ) with fileoutput(args.summary) as s: for field, value in zip(("{} " + field for field in summary_fields), summary): print(field.format(value), file=s) if args.table: header = not os.path.exists(args.table) with fileoutput(args.table, "at") as t: w = csv.writer(t, delimiter="\t") if header: w.writerow(("name",) + summary_fields) w.writerow((args.name,) + summary)
def test_append(): for ext in ["", ".gz"]: # BZ2 does NOT support append text = "AB" reference = text + text filename = 'truncated.fastq' + ext mode = 'a' if ext != "": mode = 'ab' text = text.encode() reference = text + text text = get_compressor(filename).compress( text) # On Py3, need to send BYTES, not unicode print("Trying ext=%s" % ext) with temporary_path(filename) as path: try: os.unlink(path) except OSError: pass with open_output(path, mode) as f: f.write(text) print(path) with open_output(path, mode) as f: f.write(text) with xopen(path, 'r') as f: try: reference = reference.decode("utf-8") except AttributeError: pass for appended in f: assert appended == reference
def test_truncated_gz_iter(): with raises(EOFError), temporary_path('truncated.gz') as path: create_truncated_file(path) f = xopen(path, 'r', use_system=False) # work around bug in py3.4 for line in f: pass f.close()
def test_truncated_gz_iter(): with temporary_path('truncated.gz') as path: create_truncated_file(path) f = xopen(path, 'r', use_system=False) # work around bug in py3.4 for line in f: pass f.close()
def test_xopen_binary(): for name in files: f = xopen(name, 'rb') lines = list(f) assert len(lines) == 12 assert lines[5] == b'AGCCGCTANGACGGGTTGGCCCTTAGACGTATCT\n', name f.close()
def create_truncated_file(path): # Random text text = ''.join(random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(200)) f = xopen(path, 'w') f.write(text) f.close() f = open(path, 'a') f.truncate(os.stat(path).st_size - 10) f.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument("-1", "--fastq1") parser.add_argument("-2", "--fastq2") parser.add_argument("-o", "--output", default="-") args = parser.parse_args() with xopen(args.fastq1) as fq1, xopen(args.fastq2) as fq2: hists = make_hists(fq1, fq2) with open_output(args.output) as o: w = csv.writer(o, delimiter="\t") w.writerow(('read', 'side', 'pos', 'base', 'count')) for i, h in enumerate(hists, 1): for j in range(2): for b in nuc: for k, count in enumerate(h[j][b], 1): w.writerow((i, j, k, b, count))
def __init__(self, file, mode='r'): """ file is a path or a file-like object. In both cases, the file may be compressed (.gz, .bz2, .xz). """ if isinstance(file, str): file = xopen(file, mode) self._close_on_exit = True self._file = file
def test_context_manager(): major, minor = sys.version_info[0:2] for name in files: if major == 2 and minor == 6: continue # Py26 compression libraries do not support context manager protocol. with xopen(name, 'rt') as f: lines = list(f) assert len(lines) == 12 assert lines[5] == 'AGCCGCTANGACGGGTTGGCCCTTAGACGTATCT\n', name
def __init__(self, file, sequence_class=Sequence): """ file is a filename or a file-like object. If file is a filename, then .gz files are supported. """ self._close_on_exit = False if isinstance(file, str): file = xopen(file, 'rb') self._close_on_exit = True self._file = file self.sequence_class = sequence_class self.delivers_qualities = True
def main(): parser = argparse.ArgumentParser() parser.add_argument("-a", "--adapter1", default=ADAPTER1) parser.add_argument("-A", "--adapter2", default=ADAPTER2) parser.add_argument("-1", "--fastq1") parser.add_argument("-2", "--fastq2") parser.add_argument("-o", "--output", default="-") args = parser.parse_args() with xopen(args.fastq1) as fq1, xopen(args.fastq2) as fq2: metrics = estimate_metrics(fq1, fq2, args.adapter1, args.adapter2) with open_output(args.output) as o: print("Avg error prob: {}".format(metrics[0]), file=o) print("Read 1 with full-length adapters: {}".format(metrics[1]), file=o) print("Read 1 full-length adapter bases: {}".format(metrics[2]), file=o) print("Read 2 with full-length adapters: {}".format(metrics[3]), file=o) print("Read 2 full-length adapter bases: {}".format(metrics[4]), file=o)
def write(self, file_desc, data, compressed=False): """ Write data to file. """ if compressed: path, mode = file_desc else: path = file_desc if path not in self.writers: if self.suffix: real_path = add_suffix_to_path(path, self.suffix) else: real_path = path # TODO: test whether O_NONBLOCK allows non-blocking write to NFS if compressed: self.writers[path] = open_output(real_path, mode) else: self.writers[path] = xopen(real_path, "w") self.writers[path].write(data)
def test_truncated_gz(): with temporary_path('truncated.gz') as path: create_truncated_file(path) f = xopen(path, 'r') f.read() f.close()
def test_truncated_gz(): with raises(EOFError), temporary_path('truncated.gz') as path: create_truncated_file(path) f = xopen(path, 'r') f.read() f.close()