def write_contig_summary_b(contig_ace_dir, singleton_seq_dir, summary_file): fd_summary = open(summary_file, 'w') nr_contigs = 0 summary = "" # get contig info os.chdir(contig_ace_dir) for ace_file in sorted(glob.glob("*.ace")): ace_record = Ace.read(open(ace_file)) if (ace_record.ncontigs > 1): contigs = ace_record.contigs for contig in contigs: # do not write singletons if found in ace file if contig.nreads == 1: continue summary = summary + contig.name for read in contig.reads: summary = summary + "\t" + read.rd.name summary = summary + "\n" nr_contigs += 1 header = "# nr Contigs: " + str(nr_contigs) + "\n" header = header + "# Column 1: Contig_id" + "\n" header = header + "# Columns 2 to n: Member Sequences" + "\n" summary = header + summary fd_summary.write(summary) fd_summary.close()
def __init__(self,ace_file): self.ace_file = ace_file self.records = Ace.read(open(ace_file, 'r')) assert len(self.records.contigs)==1 self.contig = self.records.contigs[0] self.consensus = self.contig.sequence self.consensus_name = self.contig.name self.number_sequences = len(self.contig.reads) self.reference = "" self.reference_name = ""
def report(args): """ %prog report [--options] ace_file > report Prepare a report of read location, consensus location or quality segment per contig """ from jcvi.utils.table import tabulate p = OptionParser(report.__doc__) types = {"read": ["padded_start", "padded_end", "orient"], "consensus": ["padded_consensus_start", "padded_consensus_end"], "quality" : ["qual_clipping_start", "qual_clipping_end", "align_clipping_start", "align_clipping_end"] } valid_types = tuple(types.keys()) p.add_option("--type", default="read", choices=valid_types, help="choose report type [default: %default]") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) acefile, = args ace = Ace.read(must_open(acefile)) logging.debug('Loaded ace file {0}'.format(acefile)) for c in ace.contigs: print c.name table = dict() if opts.type == "read": ps, pe = [], [] ps = [read.padded_start for read in c.af] for i in xrange(1, len(ps)): pe.append(ps[i] - ps[i-1]) pe.append(c.nbases) map = dict(zip(ps, pe)) for i, read in enumerate(c.af): values = [str(x) for x in (read.padded_start, map[read.padded_start], read.coru)] for i, label in enumerate(types[opts.type]): table[(str(read.name), label)] = values[i] elif opts.type == "consensus": for read in c.bs: values = [str(x) for x in (read.padded_start, read.padded_end)] for i, label in enumerate(types[opts.type]): table[(str(read.name), label)] = values[i] elif opts.type == "quality": for read in c.reads: (r1, r2) = (read.rd, read.qa) values = [str(x) for x in (r2.qual_clipping_start, r2.qual_clipping_end, r2.align_clipping_start, r2.align_clipping_end)] for i, label in enumerate(types[opts.type]): table[(str(r1.name), label)] = values[i] print tabulate(table), "\n"
def report(args): """ %prog report [--options] ace_file > report Prepare a report of read location, consensus location or quality segment per contig """ from jcvi.utils.table import tabulate p = OptionParser(report.__doc__) types = {"read": ["padded_start", "padded_end", "orient"], "consensus": ["padded_consensus_start", "padded_consensus_end"], "quality" : ["qual_clipping_start", "qual_clipping_end", "align_clipping_start", "align_clipping_end"] } valid_types = tuple(types.keys()) p.add_option("--type", default="read", choices=valid_types, help="choose report type [default: %default]") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) acefile, = args ace = Ace.read(must_open(acefile)) logging.debug('Loaded ace file {0}'.format(acefile)) for c in ace.contigs: print c.name table = dict() if opts.type == "read": ps, pe = [], [] ps = [read.padded_start for read in c.af] for i in xrange(1, len(ps)): pe.append(ps[i] - ps[i-1]) pe.append(c.nbases) map = dict(zip(ps, pe)) for i, read in enumerate(c.af): values = [str(x) for x in (read.padded_start, map[read.padded_start], read.coru)] for i, label in enumerate(types[opts.type]): table[(str(read.name), label)] = values[i] elif opts.type == "consensus": for read in c.bs: values = [str(x) for x in (read.padded_start, read.padded_end)] for i, label in enumerate(types[opts.type]): table[(str(read.name), label)] = values[i] elif opts.type == "quality": for read in c.reads: (r1, r2) = (read.rd, read.qa) values = [str(x) for x in (r2.qual_clipping_start, r2.qual_clipping_end, r2.align_clipping_start, r2.align_clipping_end)] for i, label in enumerate(types[opts.type]): table[(str(r1.name), label)] = values[i] print tabulate(table), "\n"
def main(): base_name = 'FX5ZTWB02D1DFX' #contigs = Ace.parse(open('/Users/bcf/Tmp/tmp2.fa.cap.ace')) c = Ace.read(open('/Users/bcf/Tmp/tmp2.fa.cap.ace')) '''for c in contigs: for r in c.reads: if r.rd.name == base_name: contig = c break else: pass''' write(c, '/Users/bcf/Tmp/tmp2_rewrite.fa.cap.ace') pdb.set_trace()
def assemble(self): """Assemble sequences.""" if 0 < len(self.sequences) < self.seq_limit: with open(os.devnull, 'w') as DEVNULL: args = ['cap3', self.input_path, '-p', '75', '-s', '500', '-z', '2'] try: # Use check call to ignore stdout of cap3 subprocess.check_call(args, stdout=DEVNULL, close_fds=True) except subprocess.CalledProcessError as e: logger.error("An error occured while attempting to assemble reads: " "%s\n The problematic sequences are: %s", e, self.sequences) return Ace.ACEFileRecord().contigs return Ace.read(open(os.path.join(self.input_dir, 'multialign.fa.cap.ace'))).contigs else: # We return an empty record if there are too many sequences to assemble return Ace.ACEFileRecord().contigs
def extract(args): """ %prog extract [--options] ace_file Extract contigs from ace file and if necessary reformat header with a pipe(|) separated list of constituent reads. """ p = OptionParser(extract.__doc__) p.add_option("--format", default=False, action="store_true", help="enable flag to reformat header into a symbol separated list of constituent reads "+ \ "[default: %default]") p.add_option("--sep", default="|", help="choose a separator used to list the reads in the FASTA header [default: '%default']") p.add_option("--singlets", default=False, action="store_true", help="ask the program to look in the singlets file (should be in the same folder) for " +\ "unused reads and put them in the resultant fasta file [default: %default]") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) acefile, = args ace = Ace.read(must_open(acefile)) logging.debug('Loaded ace file {0}'.format(acefile)) fastafile = acefile.rsplit(".", 1)[0] + ".fasta" fw = open(fastafile, "w") for c in ace.contigs: id = c.name if opts.format: id = opts.sep.join([read.name for read in c.af]) seqrec = SeqRecord(Seq(c.sequence), id=id, description="") SeqIO.write([seqrec], fw, "fasta") if opts.singlets: singletsfile = acefile.rsplit(".", 1)[0] + ".singlets" if os.path.getsize(singletsfile) > 0: fp = SeqIO.parse(must_open(singletsfile), "fasta") for rec in fp: SeqIO.write(rec, fw, "fasta") fw.close() logging.debug('Wrote contigs to fasta file {0}'.format(fastafile))
def extract(args): """ %prog extract [--options] ace_file Extract contigs from ace file and if necessary reformat header with a pipe(|) separated list of constituent reads. """ p = OptionParser(extract.__doc__) p.add_option("--format", default=False, action="store_true", help="enable flag to reformat header into a symbol separated list of constituent reads "+ \ "[default: %default]") p.add_option("--sep", default="|", help="choose a separator used to list the reads in the FASTA header [default: '%default']") p.add_option("--singlets", default=False, action="store_true", help="ask the program to look in the singlets file (should be in the same folder) for " +\ "unused reads and put them in the resultant fasta file [default: %default]") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) acefile, = args ace = Ace.read(must_open(acefile)) logging.debug('Loaded ace file {0}'.format(acefile)) fastafile = acefile.rsplit(".", 1)[0] + ".fasta" fw = open(fastafile, "w") for c in ace.contigs: id = c.name if opts.format: id = opts.sep.join([read.name for read in c.af]) seqrec = SeqRecord(Seq(c.sequence), id=id, description="") SeqIO.write([seqrec], fw, "fasta") if opts.singlets: singletsfile = acefile.rsplit(".", 1)[0] + ".singlets" if os.path.getsize(singletsfile) > 0: fp = SeqIO.parse(must_open(singletsfile), "fasta") for rec in fp: SeqIO.write(rec, fw, "fasta") fw.close() logging.debug('Wrote contigs to fasta file {0}'.format(fastafile))
def parse_singletons_fasta_in_ace(contig_ace_dir, singleton_seq_dir): # get contig info os.chdir(contig_ace_dir) for ace_file in sorted(glob.glob("*.ace")): ace_record = Ace.read(open(ace_file)) contigs = ace_record.contigs for contig in contigs: if contig.nreads == 1: singleton_name = contig.reads[0].rd.name singleton_seq = Seq(contig.reads[0].rd.sequence) singleton_record = SeqRecord(seq=singleton_seq, id="", name="", description=singleton_name) singleton_file = singleton_seq_dir + "/" + singleton_name + ".fsa" singleton_fd = open(singleton_file, "w") SeqIO.write([singleton_record], singleton_fd, "fasta") singleton_fd.close() os.system("sed -i \"s/> />/g\" " + singleton_file)
from Bio.Sequencing import Ace fn = '../../samples/contig1.ace' acefilerecord = Ace.read(open(fn)) # For each contig: for ctg in acefilerecord.contigs: print('==========================================') print('Contig name: %s'%ctg.name) print('Bases: %s'%ctg.nbases) print('Reads: %s'%ctg.nreads) print('Segments: %s'%ctg.nsegments) print('Sequence: %s'%ctg.sequence) print('Quality: %s'%ctg.quality) # For each read in contig: for read in ctg.reads: print('Read name: %s'%read.rd.name) print('Align start: %s'%read.qa.align_clipping_start) print('Align end: %s'%read.qa.align_clipping_end) print('Qual start: %s'%read.qa.qual_clipping_start) print('Qual end: %s'%read.qa.qual_clipping_end) print('Read sequence: %s'%read.rd.sequence) print('==========================================')
from Bio.Sequencing import Ace fn = '../../samples/contig1.ace' acefilerecord = Ace.read(open(fn)) # For each contig: for ctg in acefilerecord.contigs: print('==========================================') print('Contig name: %s' % ctg.name) print('Bases: %s' % ctg.nbases) print('Reads: %s' % ctg.nreads) print('Segments: %s' % ctg.nsegments) print('Sequence: %s' % ctg.sequence) print('Quality: %s' % ctg.quality) # For each read in contig: for read in ctg.reads: print('Read name: %s' % read.rd.name) print('Align start: %s' % read.qa.align_clipping_start) print('Align end: %s' % read.qa.align_clipping_end) print('Qual start: %s' % read.qa.qual_clipping_start) print('Qual end: %s' % read.qa.qual_clipping_end) print('Read sequence: %s' % read.rd.sequence) print('==========================================')