Beispiel #1
0
def add_seq_to_reads(read_file, out_file):
    with open(read_file) as handle:
        with open(out_file, 'w') as ohandle:
            new_seqs = []
            for name, seq in GeneralSeqTools.fasta_reader(handle):
                new_seqs.append((name + ';' + seq, seq))
            GeneralSeqTools.fasta_writer(ohandle, new_seqs)
def test_fasta_writer():

    items = ['>test1', 'ATCTGCTAGTCGAATCGAGTAGT', '>test2', 'ATCGATGC']
    test_seq = '\n'.join(items) + '\n'

    handle = StringIO()
    GeneralSeqTools.fasta_writer(handle, [('test1', 'ATCTGCTAGTCGAATCGAGTAGT'),
                                            ('test2', 'ATCGATGC')])
    handle.seek(0)
    data = handle.read()
    eq_(test_seq, data)
def run_mafft(inseqs):
    
    orig_order = [name for name, _ in inseqs]
    with NTF(suffix = '.fasta') as handle:
        GeneralSeqTools.fasta_writer(handle, inseqs)
        handle.flush()
        os.fsync(handle)
        
        cmd = 'mafft --quiet --op 10 --ep 0.123 %s' % handle.name
        out = check_output(shlex.split(cmd))
        
    out_dict = dict(GeneralSeqTools.fasta_reader(StringIO(out)))
        
    return [(name, out_dict[name]) for name in orig_order]
           1, counts = mot.counts)
 make_logo(None,
           pwm_name_r,
           fix_name,
           1, counts = mot.reverse_complement().counts)
 
 if mask.mean() > 0.5:
     r5mask, x4mask = (mask, ~mask)
 else:
     r5mask, x4mask = (~mask, mask)
 x4_name = '/home/will/SubCData/TFfasta/X4-%s-%s' % (fix_name, sub)
 r5_name = '/home/will/SubCData/TFfasta/R5-%s-%s' % (fix_name, sub)
 with open(x4_name+'.fasta', 'w') as handle:
     x4_seqs = pred_counts['Seqs'][col][x4mask].dropna().to_dict().items()
     x4_scores = pred_counts['Scores'][col][x4mask].astype(float).dropna()
     GeneralSeqTools.fasta_writer(handle, x4_seqs)
 if len(x4_seqs) == 0:
     os.remove(x4_name+'.fasta')
     continue
 make_logo(x4_name+'.fasta',
           x4_name+'.png',
           'X4-%s-%s' % (sub, col),
           start_pos)
 
 with open(r5_name+'.fasta', 'w') as handle:
     r5_seqs = pred_counts['Seqs'][col][r5mask].dropna().to_dict().items()
     r5_scores = pred_counts['Scores'][col][r5mask].astype(float).dropna()
     GeneralSeqTools.fasta_writer(handle, r5_seqs)
 make_logo(r5_name+'.fasta',
           r5_name+'.png',
           'R5-%s-%s' % (sub, col),
# <codecell>

seq_df = pd.pivot_table(pd.DataFrame(seqs),
                        rows = 'GI',
                        cols = 'Prot',
                        values = 'Seq',
                        aggfunc = 'first')

# <codecell>

from Bio import Seq
from Bio.Alphabet import generic_dna
res = Seq.Seq('ATG', alphabet=generic_dna).translate()
res.tostring()

# <codecell>

def translate(inseq):
    return Seq.Seq(inseq, alphabet=generic_dna).translate().tostring()
benj_seqs = seq_df[['LTR', 'Tat_1', 'Tat_2', 'Vpr', 'V3']].dropna()['Tat_2'].map(translate)

# <codecell>

with open('/home/will/Downloads/tat2_for_benj.fasta', 'w') as handle:
    GeneralSeqTools.fasta_writer(handle, benj_seqs.to_dict().items())

# <codecell>


            ablocks.append(run_mafft(block))
        inseqs = join_blocks(ablocks)
    return inseqs

# <codecell>

aligned_seqs = run_mafft(raw_seqs)

# <codecell>

refined = refine_alignment(aligned_seqs)

# <codecell>

with open('/home/will/SubCData/refined.fasta', 'w') as handle:
    GeneralSeqTools.fasta_writer(handle, refined)
    
    
    
    

# <codecell>

refined = join_blocks(aligned_blocks)

# <codecell>

aligned_seqs[0]

# <codecell>