def render(cls, args, outfile): if args.regex and args.regex[0] not in 'exs': msg = """ possible regex forms: 1. 'm/pattern/modifiers' - select matching ones 2. 'x/pattern/modifiers' - select non-matching ones 3. 's/pattern/repl/modifiers' - apply replacement on cmt """ print(msg, file=sys.stderr) sys.exit('error: invalid regex') for fn in args.filenames: recs = fasta.read(fn, args.concise) if args.velvet: recs = cls.filter_velvet_concise(recs) if args.insert_filename: ifn = args.insert_filename recs = cls.filter_insert_filename(recs, fn, ifn) if args.regex: try: regexon = Regexon.perl(args.regex) except ValueError as e: sys.exit(e) recs = cls.filter_regex(recs, regexon) fasta.write(outfile, recs, linewidth=args.width)
def render(args, outfile): def parse_key(l): regex = re.compile(r'[^>\s\n]+') mat = regex.search(l) return mat and mat.group() or None recgens = [fasta.read(fn, False) for fn in args.filenames] records = itertools.chain(*recgens) if not args.order: keys = {parse_key(l) for l in open(args.listfile)} for rec in records: cmt = rec.cmt.split()[0] if cmt in keys: outrec = dict(cmt=cmt, seq=rec.seq) if args.concise else rec fasta.write(outfile, outrec) return # simple and fast if args.order: # ^ in case of accidental deletion of that return # preserve order of keys in list file keys = (parse_key(l) for l in open(args.listfile)) selection = collections.OrderedDict((k, None) for k in keys if k) for rec in records: # use title.split()[0] to be tolerent to titles with description cmt = rec.cmt.split()[0] # this consumes a lot of memory if cmt in selection: selection[cmt] = rec for cmt in selection: rec = selection[cmt] outrec = dict(cmt=cmt, seq=rec.seq) if args.concise else rec fasta.write(outfile, outrec)
def render(cls, args, outfile): compl = CharsMapper.create_mapper_compl_dna() for fn in args.filenames: for rec in fasta.read(fn): rec.cmt += ".RC" rec.seq = compl.transcode(rec.seq)[::-1] fasta.write(outfile, rec)
def render(cls, args, outfile): recgens = [fasta.read(fn) for fn in args.filenames] records = itertools.chain(*recgens) for r in records: text = primer.render_primer3_input(rec=r) if not text.endswith('\n'): text += '\n' outfile.write(text)
def process(cls, args, outfile, filename): seqrecords = fasta.read(filename) for rec in seqrecords: gc = statis.calc_gc_content(rec['seq']) if args.percent: gc *= 100 line = '{0:.{1}f}\t{2}\n'.format(gc, args.precision, rec['cmt']) outfile.write(line)
def render(cls, args, outfile): dic = translate.get_transl_table(args.table) tab = CodonTable(dic) for fn in args.filenames: for rec in fasta.read(fn, args.concise): if len(rec.seq) % 3: if not args.quiet: msg = 'warning: lenth of seq not a multiple of 3\n' msg += '^debug:\t{}\t{}\t{}'.format(len(rec.seq), fn, rec.cmt) print(msg, file=sys.stderr) if args.skip: continue l = len(rec.seq) rec.seq = rec.seq[:int(l - l % 3)] rec.seq = tab.translate(rec.seq) fasta.write(outfile, rec)
def test_align(): istring, jstring, submatr = submat.read('pam200') ali = aligner.Aligner.from_submatr(istring, jstring, submatr) path = locate_tests('data/rmlA.2x.fa') iseq, jseq = fasta.read(path, castfunc=lambda x: [s.seq for s in x]) with Timer('align'): for i in range(repeat): matrx, istring, jstring = ali.align(iseq, jseq, backtrack=True) mstring = aligner.gen_match_string(istring, jstring) for items in molbiox.frame.streaming.chunkwise(60, istring, mstring, jstring): bunch = six.b('\n').join(items).decode('ascii') print(bunch, end='\n\n', file=sys.stderr)
def process(cls, args, outfile, filename): seqrecords = fasta.read(filename) for rec in seqrecords: line = '{}\t{}\n'.format(len(rec.seq), rec.cmt) outfile.write(line)