def add_seq_to_reads(read_file, out_file): with open(read_file) as handle: with open(out_file, 'w') as ohandle: new_seqs = [] for name, seq in GeneralSeqTools.fasta_reader(handle): new_seqs.append((name + ';' + seq, seq)) GeneralSeqTools.fasta_writer(ohandle, new_seqs)
def test_fasta_writer(): items = ['>test1', 'ATCTGCTAGTCGAATCGAGTAGT', '>test2', 'ATCGATGC'] test_seq = '\n'.join(items) + '\n' handle = StringIO() GeneralSeqTools.fasta_writer(handle, [('test1', 'ATCTGCTAGTCGAATCGAGTAGT'), ('test2', 'ATCGATGC')]) handle.seek(0) data = handle.read() eq_(test_seq, data)
def run_mafft(inseqs): orig_order = [name for name, _ in inseqs] with NTF(suffix = '.fasta') as handle: GeneralSeqTools.fasta_writer(handle, inseqs) handle.flush() os.fsync(handle) cmd = 'mafft --quiet --op 10 --ep 0.123 %s' % handle.name out = check_output(shlex.split(cmd)) out_dict = dict(GeneralSeqTools.fasta_reader(StringIO(out))) return [(name, out_dict[name]) for name in orig_order]
1, counts = mot.counts) make_logo(None, pwm_name_r, fix_name, 1, counts = mot.reverse_complement().counts) if mask.mean() > 0.5: r5mask, x4mask = (mask, ~mask) else: r5mask, x4mask = (~mask, mask) x4_name = '/home/will/SubCData/TFfasta/X4-%s-%s' % (fix_name, sub) r5_name = '/home/will/SubCData/TFfasta/R5-%s-%s' % (fix_name, sub) with open(x4_name+'.fasta', 'w') as handle: x4_seqs = pred_counts['Seqs'][col][x4mask].dropna().to_dict().items() x4_scores = pred_counts['Scores'][col][x4mask].astype(float).dropna() GeneralSeqTools.fasta_writer(handle, x4_seqs) if len(x4_seqs) == 0: os.remove(x4_name+'.fasta') continue make_logo(x4_name+'.fasta', x4_name+'.png', 'X4-%s-%s' % (sub, col), start_pos) with open(r5_name+'.fasta', 'w') as handle: r5_seqs = pred_counts['Seqs'][col][r5mask].dropna().to_dict().items() r5_scores = pred_counts['Scores'][col][r5mask].astype(float).dropna() GeneralSeqTools.fasta_writer(handle, r5_seqs) make_logo(r5_name+'.fasta', r5_name+'.png', 'R5-%s-%s' % (sub, col),
# <codecell> seq_df = pd.pivot_table(pd.DataFrame(seqs), rows = 'GI', cols = 'Prot', values = 'Seq', aggfunc = 'first') # <codecell> from Bio import Seq from Bio.Alphabet import generic_dna res = Seq.Seq('ATG', alphabet=generic_dna).translate() res.tostring() # <codecell> def translate(inseq): return Seq.Seq(inseq, alphabet=generic_dna).translate().tostring() benj_seqs = seq_df[['LTR', 'Tat_1', 'Tat_2', 'Vpr', 'V3']].dropna()['Tat_2'].map(translate) # <codecell> with open('/home/will/Downloads/tat2_for_benj.fasta', 'w') as handle: GeneralSeqTools.fasta_writer(handle, benj_seqs.to_dict().items()) # <codecell>
ablocks.append(run_mafft(block)) inseqs = join_blocks(ablocks) return inseqs # <codecell> aligned_seqs = run_mafft(raw_seqs) # <codecell> refined = refine_alignment(aligned_seqs) # <codecell> with open('/home/will/SubCData/refined.fasta', 'w') as handle: GeneralSeqTools.fasta_writer(handle, refined) # <codecell> refined = join_blocks(aligned_blocks) # <codecell> aligned_seqs[0] # <codecell>