Ejemplo n.º 1
0
def analyse_coh(args):
    import seq_manager as sm
    import os
    query = sm.WorkSeq(args['name'][:4], args['name'], sm.read_multi_fastas('all_cohs.txt')[args['name']+'.A']['seq'])
    print args
    print query
    strands = get_strands_seq(query.name, query.fasta)
    all_coh_fastas = sm.read_multi_fastas('Coh_All_Setup-Oct2014-d-for-lizi.txt')

    pass_threshold = [{'name': query.name, 'seq': query.fasta}]
    for hit_name, hit_val in all_coh_fastas.items():
        # print hit_val
        pw_aln = sm.pair_wise_aln_from_seqs(query.fasta, hit_val['seq'].upper())
        if sm.aln_identity(pw_aln[0], pw_aln[1]) >= 0.4 and not gap_in_essential(strands, pw_aln[1], 4) \
                and not gap_in_essential(strands, pw_aln[0], 4):
            pass_threshold.append(hit_val)
        # else:
        #     print 'not accepted'
        #     print pw_aln[0]
        #     print pw_aln[1]
        #     print ''.join(a if i in strands else '_' for i, a in enumerate(query.fasta))
    print 'found %i seqs that passed the thresholds' % len(pass_threshold)
    # import sys
    # sys.exit()
    pass_thresh_fasta_name = args['path']+args['name']+'_passed_thresholds.fasta'
    sm.write_multi_seqs_to_file(pass_threshold, pass_thresh_fasta_name)
    os.system('muscle -in ' + pass_thresh_fasta_name + ' -out ' + args['path']+args['name']+'_passed_thresholds.aln')
    aln_fastas = sm.read_multi_fastas(args['path']+args['name']+'_passed_thresholds.aln')
    os.system('weblogo -f %s -o %s --annotate %s' % (args['path']+args['name']+'_passed_thresholds.aln',
                                                      args['path']+args['name']+'_passed_thresholds.eps',
                                                      ','.join(a for a in aln_fastas[args['name']]['seq'].replace('-', '-'))))
    sm.write_seq_from_seq(args['name'], args['path'], query.fasta)
    sm.run_psi_blast_for_pssm(args['path']+args['name']+'.fasta', args['path']+args['name']+'_passed_thresholds.aln',
                              args['path']+args['name']+'.pssm')
Ejemplo n.º 2
0
def analyse_doc(args):
    import seq_manager as sm
    import os
    query = sm.WorkSeq(args['name'][:4], args['name'], sm.read_multi_fastas('all_docs.fasta')[args['name']+'.B']['seq'])
    helices = get_helices_seq(args['name'][:4], query.fasta)
    all_doc_fastas = sm.read_multi_fastas('all_dockerins_May2015.txt')


    pass_thresh = [{'name': args['name'], 'seq': query.fasta}]
    for hit_name, hit_val in all_doc_fastas.items():
        pw_aln = sm.pair_wise_aln_from_seqs(query.fasta, hit_val['seq'].upper())
        if sm.aln_identity(pw_aln[0], pw_aln[1]) >= 0.4 and not gap_in_essential(helices, pw_aln[1]) \
                and not gap_in_essential(helices, pw_aln[0]):
            pass_thresh.append(hit_val)

    pass_thresh_fasta_name = args['path']+args['name']+'_passed_thresholds.fasta'
    sm.write_multi_seqs_to_file({a['name']: a for a in pass_thresh}, pass_thresh_fasta_name)
    os.system('muscle -in ' + pass_thresh_fasta_name + ' -out ' + args['path']+args['name']+'_passed_thresholds.aln')
    aln_fastas = sm.read_multi_fastas(args['path']+args['name']+'_passed_thresholds.aln')
    os.system('weblogo -f %s -o %s --annotate %s' % (args['path']+args['name']+'_passed_thresholds.aln',
                                                      args['path']+args['name']+'_passed_thresholds.eps',
                                                      ','.join(a for a in aln_fastas[args['name']]['seq'].replace('-', '-'))))
    print 'aln_fastas', aln_fastas
    sm.write_seq_from_seq(args['name'], args['path'], query.fasta)
    sm.write_multi_seqs_to_file(aln_fastas, args['path']+args['name']+'_passed_thresholds.aln', args['name'])
    sm.run_psi_blast_for_pssm(args['path']+args['name']+'.fasta', args['path']+args['name']+'_passed_thresholds.aln',
                              args['path']+args['name']+'.pssm')
Ejemplo n.º 3
0
def analyse_coh(args):
    import seq_manager as sm
    import os
    query = sm.WorkSeq(
        args['name'][:4], args['name'],
        sm.read_multi_fastas('all_cohs.txt')[args['name'] + '.A']['seq'])
    print args
    print query
    strands = get_strands_seq(query.name, query.fasta)
    all_coh_fastas = sm.read_multi_fastas(
        'Coh_All_Setup-Oct2014-d-for-lizi.txt')

    pass_threshold = [{'name': query.name, 'seq': query.fasta}]
    for hit_name, hit_val in all_coh_fastas.items():
        # print hit_val
        pw_aln = sm.pair_wise_aln_from_seqs(query.fasta,
                                            hit_val['seq'].upper())
        if sm.aln_identity(pw_aln[0], pw_aln[1]) >= 0.4 and not gap_in_essential(strands, pw_aln[1], 4) \
                and not gap_in_essential(strands, pw_aln[0], 4):
            pass_threshold.append(hit_val)
        # else:
        #     print 'not accepted'
        #     print pw_aln[0]
        #     print pw_aln[1]
        #     print ''.join(a if i in strands else '_' for i, a in enumerate(query.fasta))
    print 'found %i seqs that passed the thresholds' % len(pass_threshold)
    # import sys
    # sys.exit()
    pass_thresh_fasta_name = args['path'] + args[
        'name'] + '_passed_thresholds.fasta'
    sm.write_multi_seqs_to_file(pass_threshold, pass_thresh_fasta_name)
    os.system('muscle -in ' + pass_thresh_fasta_name + ' -out ' +
              args['path'] + args['name'] + '_passed_thresholds.aln')
    aln_fastas = sm.read_multi_fastas(args['path'] + args['name'] +
                                      '_passed_thresholds.aln')
    os.system(
        'weblogo -f %s -o %s --annotate %s' %
        (args['path'] + args['name'] + '_passed_thresholds.aln',
         args['path'] + args['name'] + '_passed_thresholds.eps', ','.join(
             a for a in aln_fastas[args['name']]['seq'].replace('-', '-'))))
    sm.write_seq_from_seq(args['name'], args['path'], query.fasta)
    sm.run_psi_blast_for_pssm(
        args['path'] + args['name'] + '.fasta',
        args['path'] + args['name'] + '_passed_thresholds.aln',
        args['path'] + args['name'] + '.pssm')
Ejemplo n.º 4
0
def analyse_doc(args):
    import seq_manager as sm
    import os
    query = sm.WorkSeq(
        args['name'][:4], args['name'],
        sm.read_multi_fastas('all_docs.fasta')[args['name'] + '.B']['seq'])
    helices = get_helices_seq(args['name'][:4], query.fasta)
    all_doc_fastas = sm.read_multi_fastas('all_dockerins_May2015.txt')

    pass_thresh = [{'name': args['name'], 'seq': query.fasta}]
    for hit_name, hit_val in all_doc_fastas.items():
        pw_aln = sm.pair_wise_aln_from_seqs(query.fasta,
                                            hit_val['seq'].upper())
        if sm.aln_identity(pw_aln[0], pw_aln[1]) >= 0.4 and not gap_in_essential(helices, pw_aln[1]) \
                and not gap_in_essential(helices, pw_aln[0]):
            pass_thresh.append(hit_val)

    pass_thresh_fasta_name = args['path'] + args[
        'name'] + '_passed_thresholds.fasta'
    sm.write_multi_seqs_to_file({a['name']: a
                                 for a in pass_thresh}, pass_thresh_fasta_name)
    os.system('muscle -in ' + pass_thresh_fasta_name + ' -out ' +
              args['path'] + args['name'] + '_passed_thresholds.aln')
    aln_fastas = sm.read_multi_fastas(args['path'] + args['name'] +
                                      '_passed_thresholds.aln')
    os.system(
        'weblogo -f %s -o %s --annotate %s' %
        (args['path'] + args['name'] + '_passed_thresholds.aln',
         args['path'] + args['name'] + '_passed_thresholds.eps', ','.join(
             a for a in aln_fastas[args['name']]['seq'].replace('-', '-'))))
    print 'aln_fastas', aln_fastas
    sm.write_seq_from_seq(args['name'], args['path'], query.fasta)
    sm.write_multi_seqs_to_file(
        aln_fastas, args['path'] + args['name'] + '_passed_thresholds.aln',
        args['name'])
    sm.run_psi_blast_for_pssm(
        args['path'] + args['name'] + '.fasta',
        args['path'] + args['name'] + '_passed_thresholds.aln',
        args['path'] + args['name'] + '.pssm')