コード例 #1
0
def main():
    parser = ArgumentParser()
    parser.add_argument('-a1', '--aln1', help=".aln file associated with simulated read1")
    parser.add_argument('-a2', '--aln2', help=".aln file associated with simulated read2")
    parser.add_argument('-r1', '--reads1', help="trimmed fastq file read1")
    parser.add_argument('-r2', '--reads2', help="trimmed fastq file read1")
    parser.add_argument('-l', '--read-length', type=int, default=125)
    parser.add_argument('-o', '--output', default='-')
    parser.add_argument('-s', '--summary', default='-')
    parser.add_argument('-t', '--table', default=None)
    parser.add_argument("--name", default=None)
    parser.add_argument("--adapters", nargs=2, default=DEFAULT_ADAPTERS)
    parser.add_argument("--no-progress", action="store_true", default=False)
    args = parser.parse_args()
    
    with open(args.aln1, 'rt') as a1, open(args.aln2, 'rt') as a2:
        aln_pair_iterator = zip(aln_iterator(a1), aln_iterator(a2))
        
        with xopen.xopen(args.reads1, 'rt') as r1, xopen.xopen(args.reads2, 'rt') as r2:
            read_pair_iterator = zip(fq_iterator(r1, 1), fq_iterator(r2, 2))
            
            if not args.no_progress:
                try:
                    import tqdm
                    aln_pair_iterator = iter(tqdm.tqdm(aln_pair_iterator))
                except:
                    print("tqdm library is required for a progress bar")
            
            with fileoutput(args.output) as o:
                w = csv.writer(o, delimiter="\t")
                w.writerow((
                    'read_id','mate','expected_len','actual_len','status','has_adapter',
                    'adapter_len','adapter_edit_dist','adapter_ins','adapter_del','polyA'))
                summary = summarize_accuracy(aln_pair_iterator, read_pair_iterator, w, args.read_length, args.adapters)

            summary_fields = (
                "retained reads", "mismatch reads", "discarded reads", "total reads", "reads with adapters",
                "retained reads with adapters", "non-adapter reads trimmed", "adapter reads untrimmed",
                "adapter reads undertrimmed", "adapter reads overtrimmed", "total ref bases",
                "total ref edit distance", "total adapter bases", "total retained adapter bases",
                "total adapter edit dist", "overtrimmed bases", "undertrimmed bases"
            )
                
            with fileoutput(args.summary) as s:
                for field, value in zip(("{} " + field for field in summary_fields), summary):
                    print(field.format(value), file=s)
            
            if args.table:
                header = not os.path.exists(args.table)
                with fileoutput(args.table, "at") as t:
                    w = csv.writer(t, delimiter="\t")
                    if header:
                        w.writerow(("name",) + summary_fields)
                    w.writerow((args.name,) + summary)
コード例 #2
0
ファイル: test_xopen.py プロジェクト: llllaaaa/atropos
def test_append():
    for ext in ["", ".gz"]:  # BZ2 does NOT support append
        text = "AB"
        reference = text + text
        filename = 'truncated.fastq' + ext
        mode = 'a'
        if ext != "":
            mode = 'ab'
            text = text.encode()
            reference = text + text
            text = get_compressor(filename).compress(
                text)  # On Py3, need to send BYTES, not unicode
        print("Trying ext=%s" % ext)
        with temporary_path(filename) as path:
            try:
                os.unlink(path)
            except OSError:
                pass
            with open_output(path, mode) as f:
                f.write(text)
            print(path)
            with open_output(path, mode) as f:
                f.write(text)
            with xopen(path, 'r') as f:
                try:
                    reference = reference.decode("utf-8")
                except AttributeError:
                    pass
                for appended in f:
                    assert appended == reference
コード例 #3
0
 def test_truncated_gz_iter():
     with raises(EOFError), temporary_path('truncated.gz') as path:
         create_truncated_file(path)
         f = xopen(path, 'r', use_system=False) # work around bug in py3.4
         for line in f:
             pass
         f.close()
コード例 #4
0
	def test_truncated_gz_iter():
		with temporary_path('truncated.gz') as path:
			create_truncated_file(path)
			f = xopen(path, 'r', use_system=False) # work around bug in py3.4
			for line in f:
				pass
			f.close()
コード例 #5
0
ファイル: test_xopen.py プロジェクト: llllaaaa/atropos
def test_xopen_binary():
    for name in files:
        f = xopen(name, 'rb')
        lines = list(f)
        assert len(lines) == 12
        assert lines[5] == b'AGCCGCTANGACGGGTTGGCCCTTAGACGTATCT\n', name
        f.close()
コード例 #6
0
def create_truncated_file(path):
    # Random text
    text = ''.join(random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(200))
    f = xopen(path, 'w')
    f.write(text)
    f.close()
    f = open(path, 'a')
    f.truncate(os.stat(path).st_size - 10)
    f.close()
コード例 #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-1", "--fastq1")
    parser.add_argument("-2", "--fastq2")
    parser.add_argument("-o", "--output", default="-")
    args = parser.parse_args()
    
    with xopen(args.fastq1) as fq1, xopen(args.fastq2) as fq2:
        hists = make_hists(fq1, fq2)
    
    with open_output(args.output) as o:
        w = csv.writer(o, delimiter="\t")
        w.writerow(('read', 'side', 'pos', 'base', 'count'))
        for i, h in enumerate(hists, 1):
            for j in range(2):
                for b in nuc:
                    for k, count in enumerate(h[j][b], 1):
                        w.writerow((i, j, k, b, count))
コード例 #8
0
def create_truncated_file(path):
	# Random text
	text = ''.join(random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(200))
	f = xopen(path, 'w')
	f.write(text)
	f.close()
	f = open(path, 'a')
	f.truncate(os.stat(path).st_size - 10)
	f.close()
コード例 #9
0
 def __init__(self, file, mode='r'):
     """
     file is a path or a file-like object. In both cases, the file may
     be compressed (.gz, .bz2, .xz).
     """
     if isinstance(file, str):
         file = xopen(file, mode)
         self._close_on_exit = True
     self._file = file
コード例 #10
0
ファイル: fastq_base_hist.py プロジェクト: llllaaaa/atropos
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-1", "--fastq1")
    parser.add_argument("-2", "--fastq2")
    parser.add_argument("-o", "--output", default="-")
    args = parser.parse_args()

    with xopen(args.fastq1) as fq1, xopen(args.fastq2) as fq2:
        hists = make_hists(fq1, fq2)

    with open_output(args.output) as o:
        w = csv.writer(o, delimiter="\t")
        w.writerow(('read', 'side', 'pos', 'base', 'count'))
        for i, h in enumerate(hists, 1):
            for j in range(2):
                for b in nuc:
                    for k, count in enumerate(h[j][b], 1):
                        w.writerow((i, j, k, b, count))
コード例 #11
0
ファイル: seqio.py プロジェクト: llllaaaa/atropos
 def __init__(self, file, mode='r'):
     """
     file is a path or a file-like object. In both cases, the file may
     be compressed (.gz, .bz2, .xz).
     """
     if isinstance(file, str):
         file = xopen(file, mode)
         self._close_on_exit = True
     self._file = file
コード例 #12
0
ファイル: test_xopen.py プロジェクト: llllaaaa/atropos
def test_context_manager():
    major, minor = sys.version_info[0:2]
    for name in files:
        if major == 2 and minor == 6:
            continue  # Py26 compression libraries do not support context manager protocol.
        with xopen(name, 'rt') as f:
            lines = list(f)
            assert len(lines) == 12
            assert lines[5] == 'AGCCGCTANGACGGGTTGGCCCTTAGACGTATCT\n', name
コード例 #13
0
ファイル: seqio.py プロジェクト: llllaaaa/atropos
 def __init__(self, file, sequence_class=Sequence):
     """
     file is a filename or a file-like object.
     If file is a filename, then .gz files are supported.
     """
     self._close_on_exit = False
     if isinstance(file, str):
         file = xopen(file, 'rb')
         self._close_on_exit = True
     self._file = file
     self.sequence_class = sequence_class
     self.delivers_qualities = True
コード例 #14
0
 def __init__(self, file, sequence_class=Sequence):
     """
     file is a filename or a file-like object.
     If file is a filename, then .gz files are supported.
     """
     self._close_on_exit = False
     if isinstance(file, str):
         file = xopen(file, 'rb')
         self._close_on_exit = True
     self._file = file
     self.sequence_class = sequence_class
     self.delivers_qualities = True
コード例 #15
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-a", "--adapter1", default=ADAPTER1)
    parser.add_argument("-A", "--adapter2", default=ADAPTER2)
    parser.add_argument("-1", "--fastq1")
    parser.add_argument("-2", "--fastq2")
    parser.add_argument("-o", "--output", default="-")
    args = parser.parse_args()

    with xopen(args.fastq1) as fq1, xopen(args.fastq2) as fq2:
        metrics = estimate_metrics(fq1, fq2, args.adapter1, args.adapter2)

    with open_output(args.output) as o:
        print("Avg error prob: {}".format(metrics[0]), file=o)
        print("Read 1 with full-length adapters: {}".format(metrics[1]),
              file=o)
        print("Read 1 full-length adapter bases: {}".format(metrics[2]),
              file=o)
        print("Read 2 with full-length adapters: {}".format(metrics[3]),
              file=o)
        print("Read 2 full-length adapter bases: {}".format(metrics[4]),
              file=o)
コード例 #16
0
ファイル: seqio.py プロジェクト: llllaaaa/atropos
 def write(self, file_desc, data, compressed=False):
     """
     Write data to file.
     """
     if compressed:
         path, mode = file_desc
     else:
         path = file_desc
     if path not in self.writers:
         if self.suffix:
             real_path = add_suffix_to_path(path, self.suffix)
         else:
             real_path = path
         # TODO: test whether O_NONBLOCK allows non-blocking write to NFS
         if compressed:
             self.writers[path] = open_output(real_path, mode)
         else:
             self.writers[path] = xopen(real_path, "w")
     self.writers[path].write(data)
コード例 #17
0
 def write(self, file_desc, data, compressed=False):
     """
     Write data to file.
     """
     if compressed:
         path, mode = file_desc
     else:
         path = file_desc
     if path not in self.writers:
         if self.suffix:
             real_path = add_suffix_to_path(path, self.suffix)
         else:
             real_path = path
         # TODO: test whether O_NONBLOCK allows non-blocking write to NFS
         if compressed:
             self.writers[path] = open_output(real_path, mode)
         else:
             self.writers[path] = xopen(real_path, "w")
     self.writers[path].write(data)
コード例 #18
0
	def test_truncated_gz():
		with temporary_path('truncated.gz') as path:
			create_truncated_file(path)
			f = xopen(path, 'r')
			f.read()
			f.close()
コード例 #19
0
 def test_truncated_gz():
     with raises(EOFError), temporary_path('truncated.gz') as path:
         create_truncated_file(path)
         f = xopen(path, 'r')
         f.read()
         f.close()