Ejemplo n.º 1
0
def write_contig_summary_b(contig_ace_dir, singleton_seq_dir, summary_file):
    fd_summary = open(summary_file, 'w')
    nr_contigs = 0
    summary = ""
    # get contig info
    os.chdir(contig_ace_dir)
    for ace_file in sorted(glob.glob("*.ace")):
        ace_record = Ace.read(open(ace_file))
        if (ace_record.ncontigs > 1):
            contigs = ace_record.contigs
            for contig in contigs:
                # do not write singletons if found in ace file
                if contig.nreads == 1:
                    continue
                summary = summary + contig.name
                for read in contig.reads:
                    summary = summary + "\t" + read.rd.name
                summary = summary + "\n"
                nr_contigs += 1
    header = "# nr Contigs: " + str(nr_contigs) + "\n"
    header = header + "# Column 1: Contig_id" + "\n"
    header = header + "# Columns 2 to n: Member Sequences" + "\n"
    summary = header + summary
    fd_summary.write(summary)
    fd_summary.close()
Ejemplo n.º 2
0
 def __init__(self,ace_file):    
     self.ace_file = ace_file
     self.records = Ace.read(open(ace_file, 'r'))
     assert len(self.records.contigs)==1
     self.contig = self.records.contigs[0]
     self.consensus = self.contig.sequence
     self.consensus_name = self.contig.name
     self.number_sequences = len(self.contig.reads)
     self.reference = ""
     self.reference_name = ""
Ejemplo n.º 3
0
Archivo: ace.py Proyecto: rrane/jcvi
def report(args):
    """
    %prog report [--options] ace_file > report

    Prepare a report of read location, consensus location or quality segment per contig
    """
    from jcvi.utils.table import tabulate

    p = OptionParser(report.__doc__)

    types = {"read":      ["padded_start", "padded_end", "orient"],
             "consensus": ["padded_consensus_start", "padded_consensus_end"],
             "quality"  : ["qual_clipping_start", "qual_clipping_end", "align_clipping_start", "align_clipping_end"]
            }
    valid_types = tuple(types.keys())
    p.add_option("--type", default="read", choices=valid_types,
            help="choose report type [default: %default]")

    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    acefile, = args
    ace = Ace.read(must_open(acefile))
    logging.debug('Loaded ace file {0}'.format(acefile))

    for c in ace.contigs:
        print c.name
        table = dict()
        if opts.type == "read":
            ps, pe = [], []
            ps = [read.padded_start for read in c.af]
            for i in xrange(1, len(ps)):
                pe.append(ps[i] - ps[i-1])
            pe.append(c.nbases)
            map = dict(zip(ps, pe))
            for i, read in enumerate(c.af):
                values = [str(x) for x in (read.padded_start, map[read.padded_start], read.coru)]
                for i, label in enumerate(types[opts.type]):
                    table[(str(read.name), label)] = values[i]
        elif opts.type == "consensus":
            for read in c.bs:
                values = [str(x) for x in (read.padded_start, read.padded_end)]
                for i, label in enumerate(types[opts.type]):
                    table[(str(read.name), label)] = values[i]
        elif opts.type == "quality":
            for read in c.reads:
                (r1, r2) = (read.rd, read.qa)
                values = [str(x) for x in (r2.qual_clipping_start, r2.qual_clipping_end, r2.align_clipping_start, r2.align_clipping_end)]
                for i, label in enumerate(types[opts.type]):
                    table[(str(r1.name), label)] = values[i]
        print tabulate(table), "\n"
Ejemplo n.º 4
0
def report(args):
    """
    %prog report [--options] ace_file > report

    Prepare a report of read location, consensus location or quality segment per contig
    """
    from jcvi.utils.table import tabulate

    p = OptionParser(report.__doc__)

    types = {"read":      ["padded_start", "padded_end", "orient"],
             "consensus": ["padded_consensus_start", "padded_consensus_end"],
             "quality"  : ["qual_clipping_start", "qual_clipping_end", "align_clipping_start", "align_clipping_end"]
            }
    valid_types = tuple(types.keys())
    p.add_option("--type", default="read", choices=valid_types,
            help="choose report type [default: %default]")

    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    acefile, = args
    ace = Ace.read(must_open(acefile))
    logging.debug('Loaded ace file {0}'.format(acefile))

    for c in ace.contigs:
        print c.name
        table = dict()
        if opts.type == "read":
            ps, pe = [], []
            ps = [read.padded_start for read in c.af]
            for i in xrange(1, len(ps)):
                pe.append(ps[i] - ps[i-1])
            pe.append(c.nbases)
            map = dict(zip(ps, pe))
            for i, read in enumerate(c.af):
                values = [str(x) for x in (read.padded_start, map[read.padded_start], read.coru)]
                for i, label in enumerate(types[opts.type]):
                    table[(str(read.name), label)] = values[i]
        elif opts.type == "consensus":
            for read in c.bs:
                values = [str(x) for x in (read.padded_start, read.padded_end)]
                for i, label in enumerate(types[opts.type]):
                    table[(str(read.name), label)] = values[i]
        elif opts.type == "quality":
            for read in c.reads:
                (r1, r2) = (read.rd, read.qa)
                values = [str(x) for x in (r2.qual_clipping_start, r2.qual_clipping_end, r2.align_clipping_start, r2.align_clipping_end)]
                for i, label in enumerate(types[opts.type]):
                    table[(str(r1.name), label)] = values[i]
        print tabulate(table), "\n"
Ejemplo n.º 5
0
def main():
    base_name = 'FX5ZTWB02D1DFX'
    #contigs = Ace.parse(open('/Users/bcf/Tmp/tmp2.fa.cap.ace'))
    c = Ace.read(open('/Users/bcf/Tmp/tmp2.fa.cap.ace'))
    '''for c in contigs:
        for r in c.reads:
            if r.rd.name == base_name:
                contig = c
                break
            else:
                pass'''
    write(c, '/Users/bcf/Tmp/tmp2_rewrite.fa.cap.ace')
    pdb.set_trace()
Ejemplo n.º 6
0
 def assemble(self):
     """Assemble sequences."""
     if 0 < len(self.sequences) < self.seq_limit:
         with open(os.devnull, 'w') as DEVNULL:
             args = ['cap3', self.input_path, '-p', '75', '-s', '500', '-z', '2']
             try:
                 # Use check call to ignore stdout of cap3
                 subprocess.check_call(args, stdout=DEVNULL, close_fds=True)
             except subprocess.CalledProcessError as e:
                 logger.error("An error occured while attempting to assemble reads: "
                              "%s\n The problematic sequences are: %s", e, self.sequences)
                 return Ace.ACEFileRecord().contigs
         return Ace.read(open(os.path.join(self.input_dir, 'multialign.fa.cap.ace'))).contigs
     else:
         # We return an empty record if there are too many sequences to assemble
         return Ace.ACEFileRecord().contigs
Ejemplo n.º 7
0
Archivo: ace.py Proyecto: rrane/jcvi
def extract(args):
    """
    %prog extract [--options] ace_file

    Extract contigs from ace file and if necessary reformat header with
    a pipe(|) separated list of constituent reads.
    """
    p = OptionParser(extract.__doc__)
    p.add_option("--format", default=False, action="store_true",
            help="enable flag to reformat header into a symbol separated list of constituent reads "+ \
            "[default: %default]")
    p.add_option("--sep", default="|",
            help="choose a separator used to list the reads in the FASTA header [default: '%default']")
    p.add_option("--singlets", default=False, action="store_true",
            help="ask the program to look in the singlets file (should be in the same folder) for " +\
            "unused reads and put them in the resultant fasta file [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    acefile, = args
    ace = Ace.read(must_open(acefile))
    logging.debug('Loaded ace file {0}'.format(acefile))

    fastafile = acefile.rsplit(".", 1)[0] + ".fasta"
    fw = open(fastafile, "w")
    for c in ace.contigs:
        id = c.name
        if opts.format:
           id = opts.sep.join([read.name for read in c.af])

        seqrec = SeqRecord(Seq(c.sequence), id=id, description="")
        SeqIO.write([seqrec], fw, "fasta")

    if opts.singlets:
        singletsfile = acefile.rsplit(".", 1)[0] + ".singlets"
        if os.path.getsize(singletsfile) > 0:
            fp = SeqIO.parse(must_open(singletsfile), "fasta")
            for rec in fp:
                SeqIO.write(rec, fw, "fasta")

    fw.close()
    logging.debug('Wrote contigs to fasta file {0}'.format(fastafile))
Ejemplo n.º 8
0
def extract(args):
    """
    %prog extract [--options] ace_file

    Extract contigs from ace file and if necessary reformat header with
    a pipe(|) separated list of constituent reads.
    """
    p = OptionParser(extract.__doc__)
    p.add_option("--format", default=False, action="store_true",
            help="enable flag to reformat header into a symbol separated list of constituent reads "+ \
            "[default: %default]")
    p.add_option("--sep", default="|",
            help="choose a separator used to list the reads in the FASTA header [default: '%default']")
    p.add_option("--singlets", default=False, action="store_true",
            help="ask the program to look in the singlets file (should be in the same folder) for " +\
            "unused reads and put them in the resultant fasta file [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    acefile, = args
    ace = Ace.read(must_open(acefile))
    logging.debug('Loaded ace file {0}'.format(acefile))

    fastafile = acefile.rsplit(".", 1)[0] + ".fasta"
    fw = open(fastafile, "w")
    for c in ace.contigs:
        id = c.name
        if opts.format:
           id = opts.sep.join([read.name for read in c.af])

        seqrec = SeqRecord(Seq(c.sequence), id=id, description="")
        SeqIO.write([seqrec], fw, "fasta")

    if opts.singlets:
        singletsfile = acefile.rsplit(".", 1)[0] + ".singlets"
        if os.path.getsize(singletsfile) > 0:
            fp = SeqIO.parse(must_open(singletsfile), "fasta")
            for rec in fp:
                SeqIO.write(rec, fw, "fasta")

    fw.close()
    logging.debug('Wrote contigs to fasta file {0}'.format(fastafile))
Ejemplo n.º 9
0
def parse_singletons_fasta_in_ace(contig_ace_dir, singleton_seq_dir):
    # get contig info
    os.chdir(contig_ace_dir)
    for ace_file in sorted(glob.glob("*.ace")):
        ace_record = Ace.read(open(ace_file))
        contigs = ace_record.contigs
        for contig in contigs:
            if contig.nreads == 1:
                singleton_name = contig.reads[0].rd.name
                singleton_seq = Seq(contig.reads[0].rd.sequence)
                singleton_record = SeqRecord(seq=singleton_seq,
                                             id="",
                                             name="",
                                             description=singleton_name)
                singleton_file = singleton_seq_dir + "/" + singleton_name + ".fsa"
                singleton_fd = open(singleton_file, "w")
                SeqIO.write([singleton_record], singleton_fd, "fasta")
                singleton_fd.close()
                os.system("sed -i \"s/> />/g\" " + singleton_file)
Ejemplo n.º 10
0
from Bio.Sequencing import Ace
fn = '../../samples/contig1.ace'
acefilerecord = Ace.read(open(fn))
# For each contig:
for ctg in acefilerecord.contigs:
   print('==========================================')
   print('Contig name: %s'%ctg.name)
   print('Bases: %s'%ctg.nbases)
   print('Reads: %s'%ctg.nreads)
   print('Segments: %s'%ctg.nsegments)
   print('Sequence: %s'%ctg.sequence)
   print('Quality: %s'%ctg.quality)
   # For each read in contig:
   for read in ctg.reads:
       print('Read name: %s'%read.rd.name)
       print('Align start: %s'%read.qa.align_clipping_start)
       print('Align end: %s'%read.qa.align_clipping_end)
       print('Qual start: %s'%read.qa.qual_clipping_start)
       print('Qual end: %s'%read.qa.qual_clipping_end)
       print('Read sequence: %s'%read.rd.sequence)
       print('==========================================')
Ejemplo n.º 11
0
from Bio.Sequencing import Ace
fn = '../../samples/contig1.ace'
acefilerecord = Ace.read(open(fn))
# For each contig:
for ctg in acefilerecord.contigs:
    print('==========================================')
    print('Contig name: %s' % ctg.name)
    print('Bases: %s' % ctg.nbases)
    print('Reads: %s' % ctg.nreads)
    print('Segments: %s' % ctg.nsegments)
    print('Sequence: %s' % ctg.sequence)
    print('Quality: %s' % ctg.quality)
    # For each read in contig:
    for read in ctg.reads:
        print('Read name: %s' % read.rd.name)
        print('Align start: %s' % read.qa.align_clipping_start)
        print('Align end: %s' % read.qa.align_clipping_end)
        print('Qual start: %s' % read.qa.qual_clipping_start)
        print('Qual end: %s' % read.qa.qual_clipping_end)
        print('Read sequence: %s' % read.rd.sequence)
        print('==========================================')