new_headers = headers new_seqs = seqs for append_fname in options.append_fasta: if not os.path.isfile(append_fname): raise IOError("# Error: file {} does not exist".format(append_fname)) with open(append_fname,'r') as inf: # Read a FASTA file? (app_headers, app_seqs) = biofile.readFASTA(inf) info_outs.write("# Read {:d} sequences\n".format(len(app_seqs))) assert len(app_seqs) == len(new_seqs) if options.check_headers: for (h1, h2) in zip(headers, app_headers): assert h1==h2, "# Error: headers do not match:\n\t{}\n\t{}".format(h1,h2) for (i,s) in enumerate(new_seqs): new_seqs[i] += app_seqs[i] # Write output biofile.writeFASTA(new_seqs, fasta_outs, headers=new_headers) n_written = len(new_seqs) # Write out stopping time data_outs.write("# Run finished {}\n".format(util.timestamp())) # Shut down output if not options.fasta_out_fname is None: info_outs.write("# Wrote {} entries to {}\n".format(n_written, options.fasta_out_fname)) outf.close()
if local_fname is None: local_fname = "tmp{:d}".format(random.randint(0, 1e20)) # Fetch file from OrthoDB if not options.orthodb_id is None: #local_fname = "uniprot-yeast.txt" remote_fname = "http://cegg.unige.ch/orthodb7/fasta.fasta?ogs={:s}".format( options.orthodb_id) urllib.urlretrieve(remote_fname, local_fname) print "# Downloaded {} to {}".format(remote_fname, local_fname) info_outs.write("# Downloaded {} to {}\n".format( remote_fname, local_fname)) # Read input if not os.path.isfile(local_fname): raise IOError("# Error: file {} does not exist".format(local_fname)) with open(local_fname, 'r') as inf: # Read a FASTA file? (headers, seqs) = biofile.readFASTA(inf) if options.fasta_out_fname is None: # Write data biofile.writeFASTA(seqs, fasta_outs, headers=headers) # Write out stopping time info_outs.write("# Run finished {}\n".format(util.timestamp())) # Shut down output if not options.fasta_out_fname is None: info_outs.write("# Fetched {} sequences to {}\n".format( len(headers), options.fasta_out_fname)) outf.close()
new_headers = headers new_seqs = seqs for append_fname in options.append_fasta: if not os.path.isfile(append_fname): raise IOError( "# Error: file {} does not exist".format(append_fname)) with open(append_fname, 'r') as inf: # Read a FASTA file? (app_headers, app_seqs) = biofile.readFASTA(inf) info_outs.write("# Read {:d} sequences\n".format(len(app_seqs))) assert len(app_seqs) == len(new_seqs) if options.check_headers: for (h1, h2) in zip(headers, app_headers): assert h1 == h2, "# Error: headers do not match:\n\t{}\n\t{}".format( h1, h2) for (i, s) in enumerate(new_seqs): new_seqs[i] += app_seqs[i] # Write output biofile.writeFASTA(new_seqs, fasta_outs, headers=new_headers) n_written = len(new_seqs) # Write out stopping time data_outs.write("# Run finished {}\n".format(util.timestamp())) # Shut down output if not options.fasta_out_fname is None: info_outs.write("# Wrote {} entries to {}\n".format( n_written, options.fasta_out_fname)) outf.close()
opt_seq = '' for (aai, aa) in enumerate(prot_seq): #opt_seq += opt_codon_dict[aa] #random.choice(codons[aa]) codons_to_choose_from = codons[aa] # If avoiding codons and we have a choice, eliminate the avoided codon. if options.avoid_sequence and len( codons_to_choose_from) > 1: try: codons_to_choose_from.remove(orig_codons[aai]) except ValueError: # codon to be avoided not among codon choices anyway pass opt_seq += random.choice(codons_to_choose_from) assert translate.translate(opt_seq) == prot_seq header_line = "{0} Fop = {1:.4f}, CAI = {2:.4f}, GC = {3:.2f}".format( id, cai.getFop(opt_seq, opt_codons), cai_fxn(opt_seq), cai.getGC(opt_seq)) info_outs.write("# Optimized {}\n".format(header_line)) opt_headers.append(header_line) opt_seqs.append(opt_seq) out_seqs = opt_seqs out_headers = opt_headers biofile.writeFASTA(out_seqs, data_outs, headers=out_headers) elif options.reverse_translate: # Write out sequences but don't optimize (out_headers, out_seqs) = zip(*seqs) biofile.writeFASTA(out_seqs, data_outs, headers=out_headers) if not options.out_fname is None: info_outs.write("# Wrote {0} optimized sequences to {1}\n".format( len(opt_seqs), options.out_fname)) outf.close()
# Iterate over tree and write out FASTA in tree-sorted order n_written = 0 sorted_headers = [] sorted_seqs = [] for indiv in tree.get_terminals(): #print(indiv.name) #spec = extractSpeciesName(h) spec = indiv.name.strip() spec.replace('\\', '') #print("^{}$\n".format(spec)) if spec in header_dict: hdr = header_dict[spec] if options.generate_short_ids: hdr = "{} {}".format(short_species_names[spec], hdr) seq = seq_dict[spec] sorted_headers.append(hdr) sorted_seqs.append(seq) else: #print(dir(indiv)) info_outs.write("# Can't find {}\n".format(spec)) if not options.fasta_out_fname is None: fasta_outs = util.OutStreams( open(os.path.expanduser(options.fasta_out_fname), 'w')) biofile.writeFASTA(sorted_seqs, fasta_outs, headers=sorted_headers) info_outs.write("# Wrote {} entries to {}\n".format( len(sorted_seqs), options.fasta_out_fname)) # Write out stopping time data_outs.write("# Run finished {}\n".format(util.timestamp()))
info_outs.write("# No starting position or sequence given; nothing to do. Exiting\n") new_headers = [] new_seqs = [] for (h,seq) in zip(headers,seqs): if not options.exclude: ex_seq = seq[start_index:end_index] else: # Exclude the sequence #assert options.end_aa < len(seq) #assert options.begin_aa < options.end_aa ex_seq = seq[0:start_index] + seq[end_index:] #degapped_seq = seq.replace(gap,"") new_seqs.append(ex_seq) new_headers.append(h) seqs = new_seqs headers = new_headers # Write output biofile.writeFASTA(seqs, fasta_outs, headers=headers) n_written = len(seqs) # Write out stopping time data_outs.write("# Run finished {}\n".format(util.timestamp())) # Shut down output if not options.fasta_out_fname is None: info_outs.write("# Wrote {} entries to {}\n".format(n_written, options.fasta_out_fname)) outf.close()
codons[aa] = [c for c in translate.getCodonsForAA(aa, rna=False) if relad_dict[c] >= options.min_rel_adapt] opt_seq = '' for (aai, aa) in enumerate(prot_seq): #opt_seq += opt_codon_dict[aa] #random.choice(codons[aa]) codons_to_choose_from = codons[aa] # If avoiding codons and we have a choice, eliminate the avoided codon. if options.avoid_sequence and len(codons_to_choose_from)>1: try: codons_to_choose_from.remove(orig_codons[aai]) except ValueError: # codon to be avoided not among codon choices anyway pass opt_seq += random.choice(codons_to_choose_from) assert translate.translate(opt_seq) == prot_seq header_line = "{0} Fop = {1:.4f}, CAI = {2:.4f}, GC = {3:.2f}".format(id, cai.getFop(opt_seq, opt_codons), cai_fxn(opt_seq), cai.getGC(opt_seq)) info_outs.write("# Optimized {}\n".format(header_line)) opt_headers.append(header_line) opt_seqs.append(opt_seq) out_seqs = opt_seqs out_headers = opt_headers biofile.writeFASTA(out_seqs, data_outs, headers=out_headers) elif options.reverse_translate: # Write out sequences but don't optimize (out_headers,out_seqs) = zip(*seqs) biofile.writeFASTA(out_seqs, data_outs, headers=out_headers) if not options.out_fname is None: info_outs.write("# Wrote {0} optimized sequences to {1}\n".format(len(opt_seqs), options.out_fname)) outf.close()