#!/usr/bin/env python import argparse import os import re from rosetta_score_files import score2dict parser = argparse.ArgumentParser() parser.add_argument('-score_file', default=[x for x in os.listdir('./') if re.match('^(?!purple).*\.score', x)][0]) parser.add_argument('-n', default=5, type=int) args = vars(parser.parse_args()) scores = score2dict(args['score_file']) passed_scores = {v['ddg']: k for k, v in scores.items() if v['purple']} sorted_scores = sorted(passed_scores.keys()) fout = open('extract_purples', 'wr+') try: print '\n'.join(passed_scores[sorted_scores[i]] for i in range(args['n'])) fout.write('\n'.join(passed_scores[sorted_scores[i]] for i in range(args['n']))) except: print 'found nothin' fout.close()
def main(): """ A script that takes a fasta and score files and assembles bins. a bin is a stack of sequences that have similar AAs at the same positions (set by type_dict and positions_dict respectively. each sequence is read, and it's score is examined whether it passes my thresholds (purple). if it is it is assigned a bin, where only Negative and Positive make a difference. the set of all generated bins (basically a list of strings of n/p/c) is subsetted to get the longest subsets of bins that differe from all other bins in their subset in at least 1 position N <> P. INPUT: 1st cmd argument fasta file 2nd cmd argument score file :return: """ import sys from rosetta_score_files import score2dict import operator from collections import Counter ### this positions dict is for the 1st, 8 parts switches design # positions_dict = {'1anu': [36, 38, 114, 115, 117, 120, 124, 126], '1ohz': [37, 39, 115, 116, 118, 121, 125, 127], # '2ccl': [37, 39, 115, 116, 118, 121, 125, 127]} ### this positions dict if for the second, 10 parts switches design from 1-2.3.2015 positions_dict = {'1anu': [32, 36, 62, 65, 69, 82, 115, 126], '1aoh': [33, 37, 63, 66, 70, 83, 119, 130], '1ohz': [33, 37, 63, 66, 70, 83, 116, 127], '2ccl': [33, 37, 63, 66, 70, 83, 116, 127]} type_dict = {'D': 'n', 'E': 'n', 'K': 'p', 'R': 'p'} scores = score2dict(sys.argv[2]) bins = {} num_structs_bin = {} f = open(sys.argv[1], 'r') cont = f.read().split('>') seq_dict = {i.split('\n')[0]: i.split('\n')[1] for i in cont if len(i) > 0} for name, seq in seq_dict.items(): if scores[name]['purple']: coh_name = name.split('_')[0] switches = ''.join([type_dict[seq[i-1]] if seq[i-1] in type_dict.keys() else 'c' for i in positions_dict[coh_name]]) ### adding a condition where total #charges is <= 7, and distributes 5/2 or 4/3: counter = Counter(switches) if counter['n']+counter['p'] != 7 or counter['n'] < 2 or counter['p'] < 2: continue ### if switches not in bins.keys(): bins[switches] = [] num_structs_bin.update({switches: 0}) bins[switches].append(name) num_structs_bin[switches] += 1 bin_subsets = bin_subsetter(bins.keys()) bin_subsets_struct = {i: [num_structs_bin[j] for j in bini] for i,bini in enumerate(bin_subsets)} least_ones = 1000000 chosen_bsss = 'no bin subsets' for bsss_key, bsss_val in bin_subsets_struct.items(): if len([i for i in bsss_val if i == 1]) < least_ones: chosen_bsss = bsss_key least_ones = len([i for i in bsss_val if i == 1]) best_bins_structs = [] for best_bin in bin_subsets[chosen_bsss]: score_list = [scores[i] for i in bins[best_bin]] score_list.sort(key=operator.itemgetter('ddg')) best_bins_structs.append({best_bin: [i['description'] for i in score_list]}) for biner in best_bins_structs: print biner.keys()[0].upper() for struct in biner.values()[0]: print struct print '\n'
#!/usr/bin/env python import argparse import os import re from rosetta_score_files import score2dict parser = argparse.ArgumentParser() parser.add_argument('-score_file', default=[ x for x in os.listdir('./') if re.match('^(?!purple).*\.score', x) ][0]) parser.add_argument('-n', default=5, type=int) args = vars(parser.parse_args()) scores = score2dict(args['score_file']) passed_scores = {v['ddg']: k for k, v in scores.items() if v['purple']} sorted_scores = sorted(passed_scores.keys()) fout = open('extract_purples', 'wr+') try: print '\n'.join(passed_scores[sorted_scores[i]] for i in range(args['n'])) fout.write('\n'.join(passed_scores[sorted_scores[i]] for i in range(args['n']))) except: print 'found nothin' fout.close()