def main():
    parser = ArgumentParser()
    parser.add_argument('-a1', '--aln1', help=".aln file associated with simulated read1")
    parser.add_argument('-a2', '--aln2', help=".aln file associated with simulated read2")
    parser.add_argument('-r1', '--reads1', help="trimmed fastq file read1")
    parser.add_argument('-r2', '--reads2', help="trimmed fastq file read1")
    parser.add_argument('-l', '--read-length', type=int, default=125)
    parser.add_argument('-o', '--output', default='-')
    parser.add_argument('-s', '--summary', default='-')
    parser.add_argument('-t', '--table', default=None)
    parser.add_argument("--name", default=None)
    parser.add_argument("--adapters", nargs=2, default=DEFAULT_ADAPTERS)
    parser.add_argument("--no-progress", action="store_true", default=False)
    args = parser.parse_args()
    
    with open(args.aln1, 'rt') as a1, open(args.aln2, 'rt') as a2:
        aln_pair_iterator = zip(aln_iterator(a1), aln_iterator(a2))
        
        with xopen.xopen(args.reads1, 'rt') as r1, xopen.xopen(args.reads2, 'rt') as r2:
            read_pair_iterator = zip(fq_iterator(r1, 1), fq_iterator(r2, 2))
            
            if not args.no_progress:
                try:
                    import tqdm
                    aln_pair_iterator = iter(tqdm.tqdm(aln_pair_iterator))
                except:
                    print("tqdm library is required for a progress bar")
            
            with fileoutput(args.output) as o:
                w = csv.writer(o, delimiter="\t")
                w.writerow((
                    'read_id','mate','expected_len','actual_len','status','has_adapter',
                    'adapter_len','adapter_edit_dist','adapter_ins','adapter_del','polyA'))
                summary = summarize_accuracy(aln_pair_iterator, read_pair_iterator, w, args.read_length, args.adapters)

            summary_fields = (
                "retained reads", "mismatch reads", "discarded reads", "total reads", "reads with adapters",
                "retained reads with adapters", "non-adapter reads trimmed", "adapter reads untrimmed",
                "adapter reads undertrimmed", "adapter reads overtrimmed", "total ref bases",
                "total ref edit distance", "total adapter bases", "total retained adapter bases",
                "total adapter edit dist", "overtrimmed bases", "undertrimmed bases"
            )
                
            with fileoutput(args.summary) as s:
                for field, value in zip(("{} " + field for field in summary_fields), summary):
                    print(field.format(value), file=s)
            
            if args.table:
                header = not os.path.exists(args.table)
                with fileoutput(args.table, "at") as t:
                    w = csv.writer(t, delimiter="\t")
                    if header:
                        w.writerow(("name",) + summary_fields)
                    w.writerow((args.name,) + summary)
예제 #2
0
def test_append():
    for ext in ["", ".gz"]:  # BZ2 does NOT support append
        text = "AB"
        reference = text + text
        filename = 'truncated.fastq' + ext
        mode = 'a'
        if ext != "":
            mode = 'ab'
            text = text.encode()
            reference = text + text
            text = get_compressor(filename).compress(
                text)  # On Py3, need to send BYTES, not unicode
        print("Trying ext=%s" % ext)
        with temporary_path(filename) as path:
            try:
                os.unlink(path)
            except OSError:
                pass
            with open_output(path, mode) as f:
                f.write(text)
            print(path)
            with open_output(path, mode) as f:
                f.write(text)
            with xopen(path, 'r') as f:
                try:
                    reference = reference.decode("utf-8")
                except AttributeError:
                    pass
                for appended in f:
                    assert appended == reference
예제 #3
0
 def test_truncated_gz_iter():
     with raises(EOFError), temporary_path('truncated.gz') as path:
         create_truncated_file(path)
         f = xopen(path, 'r', use_system=False) # work around bug in py3.4
         for line in f:
             pass
         f.close()
예제 #4
0
	def test_truncated_gz_iter():
		with temporary_path('truncated.gz') as path:
			create_truncated_file(path)
			f = xopen(path, 'r', use_system=False) # work around bug in py3.4
			for line in f:
				pass
			f.close()
예제 #5
0
def test_xopen_binary():
    for name in files:
        f = xopen(name, 'rb')
        lines = list(f)
        assert len(lines) == 12
        assert lines[5] == b'AGCCGCTANGACGGGTTGGCCCTTAGACGTATCT\n', name
        f.close()
예제 #6
0
def create_truncated_file(path):
    # Random text
    text = ''.join(random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(200))
    f = xopen(path, 'w')
    f.write(text)
    f.close()
    f = open(path, 'a')
    f.truncate(os.stat(path).st_size - 10)
    f.close()
예제 #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-1", "--fastq1")
    parser.add_argument("-2", "--fastq2")
    parser.add_argument("-o", "--output", default="-")
    args = parser.parse_args()
    
    with xopen(args.fastq1) as fq1, xopen(args.fastq2) as fq2:
        hists = make_hists(fq1, fq2)
    
    with open_output(args.output) as o:
        w = csv.writer(o, delimiter="\t")
        w.writerow(('read', 'side', 'pos', 'base', 'count'))
        for i, h in enumerate(hists, 1):
            for j in range(2):
                for b in nuc:
                    for k, count in enumerate(h[j][b], 1):
                        w.writerow((i, j, k, b, count))
예제 #8
0
def create_truncated_file(path):
	# Random text
	text = ''.join(random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(200))
	f = xopen(path, 'w')
	f.write(text)
	f.close()
	f = open(path, 'a')
	f.truncate(os.stat(path).st_size - 10)
	f.close()
예제 #9
0
 def __init__(self, file, mode='r'):
     """
     file is a path or a file-like object. In both cases, the file may
     be compressed (.gz, .bz2, .xz).
     """
     if isinstance(file, str):
         file = xopen(file, mode)
         self._close_on_exit = True
     self._file = file
예제 #10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-1", "--fastq1")
    parser.add_argument("-2", "--fastq2")
    parser.add_argument("-o", "--output", default="-")
    args = parser.parse_args()

    with xopen(args.fastq1) as fq1, xopen(args.fastq2) as fq2:
        hists = make_hists(fq1, fq2)

    with open_output(args.output) as o:
        w = csv.writer(o, delimiter="\t")
        w.writerow(('read', 'side', 'pos', 'base', 'count'))
        for i, h in enumerate(hists, 1):
            for j in range(2):
                for b in nuc:
                    for k, count in enumerate(h[j][b], 1):
                        w.writerow((i, j, k, b, count))
예제 #11
0
파일: seqio.py 프로젝트: llllaaaa/atropos
 def __init__(self, file, mode='r'):
     """
     file is a path or a file-like object. In both cases, the file may
     be compressed (.gz, .bz2, .xz).
     """
     if isinstance(file, str):
         file = xopen(file, mode)
         self._close_on_exit = True
     self._file = file
예제 #12
0
def test_context_manager():
    major, minor = sys.version_info[0:2]
    for name in files:
        if major == 2 and minor == 6:
            continue  # Py26 compression libraries do not support context manager protocol.
        with xopen(name, 'rt') as f:
            lines = list(f)
            assert len(lines) == 12
            assert lines[5] == 'AGCCGCTANGACGGGTTGGCCCTTAGACGTATCT\n', name
예제 #13
0
파일: seqio.py 프로젝트: llllaaaa/atropos
 def __init__(self, file, sequence_class=Sequence):
     """
     file is a filename or a file-like object.
     If file is a filename, then .gz files are supported.
     """
     self._close_on_exit = False
     if isinstance(file, str):
         file = xopen(file, 'rb')
         self._close_on_exit = True
     self._file = file
     self.sequence_class = sequence_class
     self.delivers_qualities = True
예제 #14
0
 def __init__(self, file, sequence_class=Sequence):
     """
     file is a filename or a file-like object.
     If file is a filename, then .gz files are supported.
     """
     self._close_on_exit = False
     if isinstance(file, str):
         file = xopen(file, 'rb')
         self._close_on_exit = True
     self._file = file
     self.sequence_class = sequence_class
     self.delivers_qualities = True
예제 #15
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-a", "--adapter1", default=ADAPTER1)
    parser.add_argument("-A", "--adapter2", default=ADAPTER2)
    parser.add_argument("-1", "--fastq1")
    parser.add_argument("-2", "--fastq2")
    parser.add_argument("-o", "--output", default="-")
    args = parser.parse_args()

    with xopen(args.fastq1) as fq1, xopen(args.fastq2) as fq2:
        metrics = estimate_metrics(fq1, fq2, args.adapter1, args.adapter2)

    with open_output(args.output) as o:
        print("Avg error prob: {}".format(metrics[0]), file=o)
        print("Read 1 with full-length adapters: {}".format(metrics[1]),
              file=o)
        print("Read 1 full-length adapter bases: {}".format(metrics[2]),
              file=o)
        print("Read 2 with full-length adapters: {}".format(metrics[3]),
              file=o)
        print("Read 2 full-length adapter bases: {}".format(metrics[4]),
              file=o)
예제 #16
0
파일: seqio.py 프로젝트: llllaaaa/atropos
 def write(self, file_desc, data, compressed=False):
     """
     Write data to file.
     """
     if compressed:
         path, mode = file_desc
     else:
         path = file_desc
     if path not in self.writers:
         if self.suffix:
             real_path = add_suffix_to_path(path, self.suffix)
         else:
             real_path = path
         # TODO: test whether O_NONBLOCK allows non-blocking write to NFS
         if compressed:
             self.writers[path] = open_output(real_path, mode)
         else:
             self.writers[path] = xopen(real_path, "w")
     self.writers[path].write(data)
예제 #17
0
 def write(self, file_desc, data, compressed=False):
     """
     Write data to file.
     """
     if compressed:
         path, mode = file_desc
     else:
         path = file_desc
     if path not in self.writers:
         if self.suffix:
             real_path = add_suffix_to_path(path, self.suffix)
         else:
             real_path = path
         # TODO: test whether O_NONBLOCK allows non-blocking write to NFS
         if compressed:
             self.writers[path] = open_output(real_path, mode)
         else:
             self.writers[path] = xopen(real_path, "w")
     self.writers[path].write(data)
예제 #18
0
	def test_truncated_gz():
		with temporary_path('truncated.gz') as path:
			create_truncated_file(path)
			f = xopen(path, 'r')
			f.read()
			f.close()
예제 #19
0
 def test_truncated_gz():
     with raises(EOFError), temporary_path('truncated.gz') as path:
         create_truncated_file(path)
         f = xopen(path, 'r')
         f.read()
         f.close()