def test008(self): """zero-length nucleotides""" s = "" c = [cod for cod in translate.codons(s)] self.assertTrue(len(c) == 0) self.assertTrue(translate.translate(s) == "") self.assertTrue(translate.translateRaw(s) == "")
def test004(self): """translation with problems""" s = "ATGCatTCTNNNTAAAGA" # print translate.translate(s) self.assertTrue(translate.translate(s) is None) # print translate.translateRaw(s,bad_aa='@') self.assertTrue(translate.translateRaw(s, bad_aa="@") == "MHS@*R")
def test008(self): """zero-length nucleotides""" s = '' c = [cod for cod in translate.codons(s)] self.assertTrue(len(c) == 0) self.assertTrue(translate.translate(s) == '') self.assertTrue(translate.translateRaw(s) == '')
def test004(self): """translation with problems""" s = 'ATGCatTCTNNNTAAAGA' #print translate.translate(s) self.assertTrue(translate.translate(s) is None) #print translate.translateRaw(s,bad_aa='@') self.assertTrue(translate.translateRaw(s, bad_aa='@') == 'MHS@*R')
def test009(self): """odd-length coding sequence""" s = "TCTCGTAAGTACGCAGC" c = [cod for cod in translate.codons(s)] self.assertTrue(len(c) == 5) self.assertTrue(c[-1] == "GCA") self.assertTrue(translate.translate(s) is None) self.assertTrue(translate.translateRaw(s) == "SRKYA")
def test009(self): """odd-length coding sequence""" s = 'TCTCGTAAGTACGCAGC' c = [cod for cod in translate.codons(s)] self.assertTrue(len(c) == 5) self.assertTrue(c[-1] == 'GCA') self.assertTrue(translate.translate(s) is None) self.assertTrue(translate.translateRaw(s) == 'SRKYA')
def test007(self): """zero-length protein""" s = "AT" self.assertTrue(translate.translate(s) is None) self.assertTrue(translate.translateRaw(s) == "")
for aa in aas])) # fractions data_outs.write("\t" + "\t".join(["n.{}".format(aa) for aa in aas])) # numbers data_outs.write("\n") if options.merge: data_outs.write("# Merging {:d} sequences into one\n".format( len(seqs))) seqs = [''.join(seqs)] headers = ["merged"] gap = '-' for (h, seq) in zip(headers, seqs): if options.query: if not options.query in h: continue if options.translate: seq = translate.translateRaw(seq) if options.degap: seq = seq.replace(gap, '') if not options.exclude: if not options.end_aa is None and options.end_aa <= len(seq): seq = seq[0:(options.end_aa)] seq = seq[(options.begin_aa - 1):] else: # Exclude the sequence assert options.end_aa < len(seq) assert options.begin_aa < options.end_aa seq = seq[0:(options.begin_aa - 1)] + seq[(options.end_aa):] degapped_seq = seq.replace(gap, "") line = "#{}\n{}\t{:d}\t{:1.4f}\t{:1.4f}\t{:1.4f}".format( h, biofile.firstField(h), pp.getLength(degapped_seq), pp.getCharge(degapped_seq, options.pH), pp.getIsoelectricPoint(degapped_seq),
type=float, default=7.2, help="pH for charge determination") options = parser.parse_args() outs = util.OutStreams() if not options.out_fname is None: outf = file(os.path.expanduser(options.out_fname), 'w') outs.addStream(outf) else: outs.addStream(sys.stdout) pp = protprop.ProteinProperties() if not options.sequence is None: if options.translate: seq = translate.translateRaw(options.sequence) else: seq = options.sequence seq_dict = {"input": seq} else: # Load from FASTA seq_dict = biofile.readFASTADict(options.in_fname) if options.translate: for k in seq_dict.keys(): seq_dict[k] = translate.translate(seq_dict[k]) outs.write("# {}\n".format(options)) outs.write("pos\taa\tcharge\n") n_seqs = len(seq_dict.keys()) for (seqid, seq) in seq_dict.items(): if n_seqs > 1:
"-t", "--translate", dest="translate", action="store_true", default=False, help="translate the input sequences?" ) parser.add_argument("--pH", dest="pH", type=float, default=7.2, help="pH for charge determination") options = parser.parse_args() outs = util.OutStreams() if not options.out_fname is None: outf = file(os.path.expanduser(options.out_fname), "w") outs.addStream(outf) else: outs.addStream(sys.stdout) pp = protprop.ProteinProperties() if not options.sequence is None: if options.translate: seq = translate.translateRaw(options.sequence) else: seq = options.sequence seq_dict = {"input": seq} else: # Load from FASTA seq_dict = biofile.readFASTADict(options.in_fname) if options.translate: for k in seq_dict.keys(): seq_dict[k] = translate.translate(seq_dict[k]) outs.write("# {}\n".format(options)) outs.write("pos\taa\tcharge\n") n_seqs = len(seq_dict.keys()) for (seqid, seq) in seq_dict.items(): if n_seqs > 1:
dnaseq = translate.randomReverseTranslate(seq) #dnaseq = translate.reverseTranslate(seq) assert(translate.translate(dnaseq)==seq) fullseq = options.prefix + dnaseq + options.suffix mutant_seqs[name] = (dnaseq, fullseq) #name = biofile.firstField(hdr) line = "{name:s}\t{dna:s}\tL={length:d}bp, {desc:s}\n".format(name=name, dna=fullseq, length=len(fullseq), desc=hdr) data_outs.write(line) n_written += 1 data_outs.write("\n\n# Confirmation details:\n") for (hdr,seq) in zip(headers,seqs): (name, props) = parseHeader(hdr) (mutant_seq, fullseq) = mutant_seqs[name] prot = translate.translate(mutant_seq) fullprots = [translate.translateRaw(fullseq[i:]) for i in range(3)] allnames = ['core','frame 1','frame 2','frame 3'] data_outs.write("# {name:s} core and 3-frame full translation\n".format(name=name)) for (i,p) in enumerate([prot] + fullprots): line = "# {name:s} {prot:s}\n".format(prot=p, name=allnames[i]) data_outs.write(line) data_outs.write("#\n") # Write out stopping time data_outs.write("# Run finished {}\n".format(util.timestamp())) # Shut down output if not options.out_fname is None: info_outs.write("# Wrote {} lines to {}\n".format(n_written, options.out_fname)) outf.close()
data_outs.write("orf\tlength\tcharge\tpI\thydrophobicity") if not aas is None: data_outs.write("\t"+"\t".join(["f.{}".format(aa) for aa in aas])) # fractions data_outs.write("\t"+"\t".join(["n.{}".format(aa) for aa in aas])) # numbers data_outs.write("\n") if options.merge: data_outs.write("# Merging {:d} sequences into one\n".format(len(seqs))) seqs = [''.join(seqs)] headers = ["merged"] gap = '-' for (h,seq) in zip(headers,seqs): if options.query: if not options.query in h: continue if options.translate: seq = translate.translateRaw(seq) if options.degap: seq = seq.replace(gap,'') if not options.exclude: if not options.end_aa is None and options.end_aa <= len(seq): seq = seq[0:(options.end_aa)] seq = seq[(options.begin_aa-1):] else: # Exclude the sequence assert options.end_aa < len(seq) assert options.begin_aa < options.end_aa seq = seq[0:(options.begin_aa-1)] + seq[(options.end_aa):] degapped_seq = seq.replace(gap,"") line = "#{}\n{}\t{:d}\t{:1.4f}\t{:1.4f}\t{:1.4f}".format(h, biofile.firstField(h), pp.getLength(degapped_seq), pp.getCharge(degapped_seq, options.pH), pp.getIsoelectricPoint(degapped_seq), pp.getHydrophobicity(degapped_seq)) if not aas is None: counts = Composition() counts.initFromSequence(degapped_seq)
def test007(self): """zero-length protein""" s = 'AT' self.assertTrue(translate.translate(s) is None) self.assertTrue(translate.translateRaw(s) == '')