def test_append(): for ext in ["", ".gz"]: # BZ2 does NOT support append text = "AB" reference = text + text filename = 'truncated.fastq' + ext mode = 'a' if ext != "": mode = 'ab' text = text.encode() reference = text + text text = get_compressor(filename).compress( text) # On Py3, need to send BYTES, not unicode print("Trying ext=%s" % ext) with temporary_path(filename) as path: try: os.unlink(path) except OSError: pass with open_output(path, mode) as f: f.write(text) print(path) with open_output(path, mode) as f: f.write(text) with xopen(path, 'r') as f: try: reference = reference.decode("utf-8") except AttributeError: pass for appended in f: assert appended == reference
def test_append(): for ext in ["", ".gz"]: # BZ2 does NOT support append text = "AB" reference = text + text filename = 'truncated.fastq' + ext mode = 'a' if ext != "": mode = 'ab' text = text.encode() reference = text + text text = get_compressor(filename).compress(text) # On Py3, need to send BYTES, not unicode print("Trying ext=%s" % ext) with temporary_path(filename) as path: try: os.unlink(path) except OSError: pass with open_output(path, mode) as f: f.write(text) print(path) with open_output(path, mode) as f: f.write(text) with xopen(path, 'r') as f: try: reference = reference.decode("utf-8") except AttributeError: pass for appended in f: assert appended == reference
def generate_fasta(outfile, summary, union=False, perinput=False): names = summary['input']['input_names'] or repeat(None) n_reads = summary['record_counts'][0] fasta_format = FastaFormat() if union: union_records = [] if perinput: if outfile.endswith('.fasta'): name_prefix = outfile[:-6] elif outfile.endswith('.fa'): name_prefix = outfile[:-3] else: name_prefix = outfile def format_match(idx, match, records): name2 = [ "kmer_freq={}".format(match['kmer_freq']), "kmer_freq_type={}".format(match["kmer_freq_type"]) ] if match['abundance']: name2.append("abundance={}".format(match['abundance'])) name2.append("abundance_frac={}".format(match['abundance'] / n_reads)) if match['contaminant_to_known_match_frac']: name2.append("contaminant_to_known_match_frac={}".format( match["contaminant_to_known_match_frac"])) if match['is_known']: name = match['known_names'][0] name3 = [] if len(match['known_names']) > 1: name3 = ["other_names={}".format('|'.join(match['known_names'][1:]))] if len(match['known_seqs']) > 1: for seq in match['known_seqs']: records.append(fasta_format.format_entry( "{}.{} {}".format(name, idx, ";".join(name2 + name3)), seq)) else: records.append(fasta_format.format_entry( "{} {}".format(name, ";".join(name2 + name3)), match['known_seqs'][0])) else: records.append(fasta_format.format_entry( "{} {}".format(idx, ";".join(name2)), match['longest_kmer'])) for i, (name, matches) in enumerate(zip(names, summary['detect']['matches'])): records = [] for idx, match in enumerate(matches, 1): format_match(idx, match, records) if union: union_records.extend(records) if perinput: with open_output("{}.{}.fasta".format(name_prefix, i), 'wt') as out: out.write("".join(records)) if union: with open_output(outfile, 'wt') as union_out: union_out.write("".join(union_records))
def test_write_sequence_object(self): fmt = FastaFormat() with open_output(self.path, "w") as fw: fw.write(fmt.format(Sequence("name", "CCATA"))) fw.write(fmt.format(Sequence("name2", "HELLO"))) with open(self.path) as t: assert t.read() == '>name\nCCATA\n>name2\nHELLO\n'
def test_autodetect_fastq_format(self): path = os.path.join(self._tmpdir, 'tmp.fastq') fmt = get_format(path) with open_output(path, "w") as f: for seq in simple_fastq: f.write(fmt.format(seq)) assert list(openseq(path)) == simple_fastq
def test_twoheaders(self): fmt = FastqFormat() with open_output(self.path, "w") as fw: fw.write(fmt.format(Sequence("name", "CCATA", "!#!#!", name2="name"))) fw.write(fmt.format(Sequence("name2", "HELLO", "&&&!&", name2="name2"))) with open(self.path) as t: assert t.read() == '@name\nCCATA\n+name\n!#!#!\n@name2\nHELLO\n+name2\n&&&!&\n'
def test(self): fmt = FastqFormat() with open_output(self.path, "w") as fw: fw.write(fmt.format_entry("name", "CCATA", "!#!#!")) fw.write(fmt.format_entry("name2", "HELLO", "&&&!&&")) with open(self.path) as t: assert t.read() == '@name\nCCATA\n+\n!#!#!\n@name2\nHELLO\n+\n&&&!&&\n'
def get_writer(self, file_desc, compressed=False): """Create the writer for a file descriptor if it does not already exist. Args: file_desc: File descriptor. If `compressed==True`, this is a tuple (path, mode), otherwise it's only a path. compressed: Whether data has already been compressed. Returns: The writer. """ if compressed: path, mode = file_desc else: path = file_desc if path not in self.writers: if self.suffix: real_path = add_suffix_to_path(path, self.suffix) else: real_path = path # TODO: test whether O_NONBLOCK allows non-blocking write to NFS if compressed: self.writers[path] = open_output(real_path, mode) else: self.writers[path] = xopen(real_path, "w") return self.writers[path]
def test_write_qualities_to_fasta(self): path = os.path.join(self._tmpdir, 'tmp.fasta') fmt = get_format(path, qualities=True) assert isinstance(fmt, FastaFormat) with open_output(path, "w") as f: for seq in simple_fastq: f.write(fmt.format(seq)) assert list(openseq(path)) == simple_fasta
def generate_text_report(self, fmt, summary, outfile, **kwargs): if fmt == 'txt': with open_output(outfile, context_wrapper=True) as out: generate_reports(out, summary, **kwargs) elif fmt == 'fasta': generate_fasta(outfile, summary, **kwargs) else: super().generate_from_template(fmt, summary, outfile, **kwargs)
def test(self): fmt = FastqFormat() with open_output(self.path, "w") as fw: fw.write(fmt.format_entry("name", "CCATA", "!#!#!")) fw.write(fmt.format_entry("name2", "HELLO", "&&&!&&")) with open(self.path) as t: assert t.read( ) == '@name\nCCATA\n+\n!#!#!\n@name2\nHELLO\n+\n&&&!&&\n'
def close(self): """Close all outputs. """ for path in self.force_create: if path not in self.writers and path != STDOUT: with open_output(path, "w"): pass for writer in self.writers.values(): if writer not in (sys.stdout, sys.stderr): writer.close()
def test_linelength(self): fmt = FastaFormat(line_length=3) with open_output(self.path, "w") as fw: fw.write(fmt.format_entry("r1", "ACG")) fw.write(fmt.format_entry("r2", "CCAT")) fw.write(fmt.format_entry("r3", "TACCAG")) with open(self.path) as t: x=t.read() print(x) assert x == '>r1\nACG\n>r2\nCCA\nT\n>r3\nTAC\nCAG\n'
def test_linelength(self): fmt = FastaFormat(line_length=3) with open_output(self.path, "w") as fw: fw.write(fmt.format_entry("r1", "ACG")) fw.write(fmt.format_entry("r2", "CCAT")) fw.write(fmt.format_entry("r3", "TACCAG")) with open(self.path) as t: x = t.read() print(x) assert x == '>r1\nACG\n>r2\nCCA\nT\n>r3\nTAC\nCAG\n'
def test_twoheaders(self): fmt = FastqFormat() with open_output(self.path, "w") as fw: fw.write( fmt.format(Sequence("name", "CCATA", "!#!#!", name2="name"))) fw.write( fmt.format(Sequence("name2", "HELLO", "&&&!&", name2="name2"))) with open(self.path) as t: assert t.read( ) == '@name\nCCATA\n+name\n!#!#!\n@name2\nHELLO\n+name2\n&&&!&\n'
def serialize(self, obj, fmt, mode, outfile, **kwargs): """Serialize a summary dict to a file. Args: obj: The summary dict. fmt: The serialization format (e.g. json, yaml). mode: The file mode (b=binary, t=text). outfile: The output file. kwargs: Additional arguments to pass to the `dump` method. """ mod = importlib.import_module(fmt) with open_output(outfile, 'w' + mode) as stream: mod.dump(obj, stream, **kwargs)