#!/usr/bin/env python
import argparse
import os
import re
from rosetta_score_files import score2dict
parser = argparse.ArgumentParser()
parser.add_argument('-score_file', default=[x for x in os.listdir('./') if re.match('^(?!purple).*\.score', x)][0])
parser.add_argument('-n', default=5, type=int)
args = vars(parser.parse_args())
scores = score2dict(args['score_file'])
passed_scores = {v['ddg']: k for k, v in scores.items() if v['purple']}
sorted_scores = sorted(passed_scores.keys())
fout = open('extract_purples', 'wr+')
try:
    print '\n'.join(passed_scores[sorted_scores[i]] for i in range(args['n']))
    fout.write('\n'.join(passed_scores[sorted_scores[i]] for i in range(args['n'])))
except:
    print 'found nothin'
fout.close()
Example #2
0
def main():
    """
    A script that takes a fasta and score files and assembles bins. a bin is a stack of
    sequences that have similar AAs at the same positions (set by type_dict and
    positions_dict respectively. each sequence is read, and it's score is examined whether
    it passes my thresholds (purple). if it is it is assigned a bin, where only Negative
    and Positive make a difference. the set of all generated bins (basically a list of
    strings of n/p/c) is subsetted to get the longest subsets of bins that differe from
    all other bins in their subset in at least 1 position N <> P.
    INPUT: 1st cmd argument fasta file
           2nd cmd argument score file
    :return:
    """
    import sys
    from rosetta_score_files import score2dict
    import operator
    from collections import Counter
    ### this positions dict is for the 1st, 8 parts switches design
    # positions_dict = {'1anu': [36, 38, 114, 115, 117, 120, 124, 126], '1ohz': [37, 39, 115, 116, 118, 121, 125, 127],
    #                   '2ccl': [37, 39, 115, 116, 118, 121, 125, 127]}
    ### this positions dict if for the second, 10 parts switches design from 1-2.3.2015
    positions_dict = {'1anu': [32, 36, 62, 65, 69, 82, 115, 126],
                      '1aoh': [33, 37, 63, 66, 70, 83, 119, 130],
                      '1ohz': [33, 37, 63, 66, 70, 83, 116, 127],
                      '2ccl': [33, 37, 63, 66, 70, 83, 116, 127]}
    type_dict = {'D': 'n', 'E': 'n', 'K': 'p', 'R': 'p'}
    scores = score2dict(sys.argv[2])
    bins = {}
    num_structs_bin = {}
    f = open(sys.argv[1], 'r')
    cont = f.read().split('>')
    seq_dict = {i.split('\n')[0]: i.split('\n')[1] for i in cont if len(i) > 0}
    for name, seq in seq_dict.items():
        if scores[name]['purple']:
            coh_name = name.split('_')[0]
            switches = ''.join([type_dict[seq[i-1]] if seq[i-1] in type_dict.keys()
                                else 'c' for i in positions_dict[coh_name]])
            ### adding a condition where total #charges is <= 7, and distributes 5/2 or 4/3:
            counter = Counter(switches)
            if counter['n']+counter['p'] != 7 or counter['n'] < 2 or counter['p'] < 2:
                continue
            ###
            if switches not in bins.keys():
                bins[switches] = []
                num_structs_bin.update({switches: 0})
            bins[switches].append(name)
            num_structs_bin[switches] += 1

    bin_subsets = bin_subsetter(bins.keys())
    bin_subsets_struct = {i: [num_structs_bin[j] for j in bini] for i,bini in enumerate(bin_subsets)}
    least_ones = 1000000
    chosen_bsss = 'no bin subsets'
    for bsss_key, bsss_val in bin_subsets_struct.items():
        if len([i for i in bsss_val if i == 1]) < least_ones:
            chosen_bsss = bsss_key
            least_ones = len([i for i in bsss_val if i == 1])

    best_bins_structs = []
    for best_bin in bin_subsets[chosen_bsss]:
        score_list = [scores[i] for i in bins[best_bin]]
        score_list.sort(key=operator.itemgetter('ddg'))
        best_bins_structs.append({best_bin: [i['description'] for i in score_list]})
    for biner in best_bins_structs:
        print biner.keys()[0].upper()
        for struct in biner.values()[0]:
            print struct
        print '\n'
#!/usr/bin/env python
import argparse
import os
import re
from rosetta_score_files import score2dict
parser = argparse.ArgumentParser()
parser.add_argument('-score_file',
                    default=[
                        x for x in os.listdir('./')
                        if re.match('^(?!purple).*\.score', x)
                    ][0])
parser.add_argument('-n', default=5, type=int)
args = vars(parser.parse_args())
scores = score2dict(args['score_file'])
passed_scores = {v['ddg']: k for k, v in scores.items() if v['purple']}
sorted_scores = sorted(passed_scores.keys())
fout = open('extract_purples', 'wr+')
try:
    print '\n'.join(passed_scores[sorted_scores[i]] for i in range(args['n']))
    fout.write('\n'.join(passed_scores[sorted_scores[i]]
                         for i in range(args['n'])))
except:
    print 'found nothin'
fout.close()