Beispiel #1
0
    def render(cls, args, outfile):
        if args.regex and args.regex[0] not in 'exs':
            msg = """
            possible regex forms:
            1. 'm/pattern/modifiers'    - select matching ones
            2. 'x/pattern/modifiers'    - select non-matching ones
            3. 's/pattern/repl/modifiers'   - apply replacement on cmt
            """
            print(msg, file=sys.stderr)
            sys.exit('error: invalid regex')

        for fn in args.filenames:
            recs = fasta.read(fn, args.concise)
            if args.velvet:
                recs = cls.filter_velvet_concise(recs)
            if args.insert_filename:
                ifn = args.insert_filename
                recs = cls.filter_insert_filename(recs, fn, ifn)

            if args.regex:
                try:
                    regexon = Regexon.perl(args.regex)
                except ValueError as e:
                    sys.exit(e)
                recs = cls.filter_regex(recs, regexon)

            fasta.write(outfile, recs, linewidth=args.width)
Beispiel #2
0
    def render(args, outfile):
        def parse_key(l):
            regex = re.compile(r'[^>\s\n]+')
            mat = regex.search(l)
            return mat and mat.group() or None

        recgens = [fasta.read(fn, False) for fn in args.filenames]
        records = itertools.chain(*recgens)

        if not args.order:
            keys = {parse_key(l) for l in open(args.listfile)}
            for rec in records:
                cmt = rec.cmt.split()[0]
                if cmt in keys:
                    outrec = dict(cmt=cmt, seq=rec.seq) if args.concise else rec

                    fasta.write(outfile, outrec)
            return  # simple and fast

        if args.order:  # ^ in case of accidental deletion of that return
            # preserve order of keys in list file
            keys = (parse_key(l) for l in open(args.listfile))
            selection = collections.OrderedDict((k, None) for k in keys if k)

            for rec in records:
                # use title.split()[0] to be tolerent to titles with description
                cmt = rec.cmt.split()[0]
                # this consumes a lot of memory
                if cmt in selection:
                    selection[cmt] = rec
            for cmt in selection:
                rec = selection[cmt]
                outrec = dict(cmt=cmt, seq=rec.seq) if args.concise else rec
                fasta.write(outfile, outrec)
Beispiel #3
0
 def render(cls, args, outfile):
     compl = CharsMapper.create_mapper_compl_dna()
     for fn in args.filenames:
         for rec in fasta.read(fn):
             rec.cmt += ".RC"
             rec.seq = compl.transcode(rec.seq)[::-1]
             fasta.write(outfile, rec)
Beispiel #4
0
 def render(cls, args, outfile):
     recgens = [fasta.read(fn) for fn in args.filenames]
     records = itertools.chain(*recgens)
     for r in records:
         text = primer.render_primer3_input(rec=r)
         if not text.endswith('\n'):
             text += '\n'
         outfile.write(text)
Beispiel #5
0
 def process(cls, args, outfile, filename):
     seqrecords = fasta.read(filename)
     for rec in seqrecords:
         gc = statis.calc_gc_content(rec['seq'])
         if args.percent:
             gc *= 100
         line = '{0:.{1}f}\t{2}\n'.format(gc, args.precision, rec['cmt'])
         outfile.write(line)
Beispiel #6
0
 def render(cls, args, outfile):
     dic = translate.get_transl_table(args.table)
     tab = CodonTable(dic)
     for fn in args.filenames:
         for rec in fasta.read(fn, args.concise):
             if len(rec.seq) % 3:
                 if not args.quiet:
                     msg = 'warning: lenth of seq not a multiple of 3\n'
                     msg += '^debug:\t{}\t{}\t{}'.format(len(rec.seq), fn, rec.cmt)
                     print(msg, file=sys.stderr)
                 if args.skip:
                     continue
                 l = len(rec.seq)
                 rec.seq = rec.seq[:int(l - l % 3)]
             rec.seq = tab.translate(rec.seq)
             fasta.write(outfile, rec)
Beispiel #7
0
def test_align():
    istring, jstring, submatr = submat.read('pam200')
    ali = aligner.Aligner.from_submatr(istring, jstring, submatr)

    path = locate_tests('data/rmlA.2x.fa')
    iseq, jseq = fasta.read(path, castfunc=lambda x: [s.seq for s in x])

    with Timer('align'):
        for i in range(repeat):
            matrx, istring, jstring = ali.align(iseq, jseq, backtrack=True)

    mstring = aligner.gen_match_string(istring, jstring)

    for items in molbiox.frame.streaming.chunkwise(60, istring, mstring, jstring):
        bunch = six.b('\n').join(items).decode('ascii')
        print(bunch, end='\n\n', file=sys.stderr)
Beispiel #8
0
 def process(cls, args, outfile, filename):
     seqrecords = fasta.read(filename)
     for rec in seqrecords:
         line = '{}\t{}\n'.format(len(rec.seq), rec.cmt)
         outfile.write(line)