コード例 #1
0
ファイル: align_clusters.py プロジェクト: crosenth/bioy
def action(args):
    seqdict = {s.id: s for s in args.raw_reads}

    if args.rlefile:
        def rlemap(seq):
            decoded = homodecode(seq.seq, from_ascii(args.rlefile[seq.id]))
            return SeqLite(seq.id, seq.description, decoded)

    groups = groupby(csv.reader(args.readmap), itemgetter(1))
    for cons, group in groups:
        if args.pattern and not re.search(r'' + args.pattern, cons):
            continue
            log.info(cons)
            reads, _ = zip(*group)
            seqs = [seqdict[name] for name in reads]
            if len(seqs) > args.sample:
                seqs = random.sample(seqs, args.sample)
                if args.rlefile:
                    seqs = (rlemap(s) for s in seqs)
                    outfile = path.join(
                        args.outdir,
                        '{}.{}.fasta'.format(cons, args.name_suffix))

            if args.align:
                with fasta_tempfile(seqs) as f:
                    command = ['muscle', '-quiet', '-seqtype', 'dna',
                               '-in', f, '-out', outfile]
                    log.debug(' '.join(command))
                    subprocess.check_call(command)
            else:
                with open(outfile, 'w') as f:
                    f.write('\n'.join('>{}\n{}'.format(s.id, s.seq) for s in seqs))
コード例 #2
0
ファイル: align_clusters.py プロジェクト: nhoffman/bioy
def action(args):
    seqdict = {s.id: s for s in args.raw_reads}

    if args.rlefile:

        def rlemap(seq):
            decoded = homodecode(seq.seq, from_ascii(args.rlefile[seq.id]))
            return SeqLite(seq.id, seq.description, decoded)

    groups = groupby(csv.reader(args.readmap), itemgetter(1))
    for cons, group in groups:
        if args.pattern and not re.search(r'' + args.pattern, cons):
            continue
            log.info(cons)
            reads, _ = zip(*group)
            seqs = [seqdict[name] for name in reads]
            if len(seqs) > args.sample:
                seqs = random.sample(seqs, args.sample)
                if args.rlefile:
                    seqs = (rlemap(s) for s in seqs)
                    outfile = path.join(
                        args.outdir,
                        '{}.{}.fasta'.format(cons, args.name_suffix))

            if args.align:
                with fasta_tempfile(seqs) as f:
                    command = [
                        'muscle', '-quiet', '-seqtype', 'dna', '-in', f,
                        '-out', outfile
                    ]
                    log.debug(' '.join(command))
                    subprocess.check_call(command)
            else:
                with open(outfile, 'w') as f:
                    f.write('\n'.join('>{}\n{}'.format(s.id, s.seq)
                                      for s in seqs))
コード例 #3
0
ファイル: test_sequtils.py プロジェクト: nhoffman/bioy
 def test02(self):
     with sequtils.fasta_tempfile(self.seqs, dir=self.outdir) as f:
         self.assertTrue(path.exists(f))
     self.assertFalse(path.exists(f))
コード例 #4
0
ファイル: test_sequtils.py プロジェクト: nhoffman/bioy
 def test01(self):
     with sequtils.fasta_tempfile(self.seqs) as f:
         self.assertTrue(path.exists(f))
     self.assertFalse(path.exists(f))