def load_distmap(fname): return DistanceMap.from_file(fname)
def get_strand_pairings(list_beta_pairs, ec_file, init_struct=False, chain_id=' '): #Inputs: #list_beta_pairs: e.g. [(1,10),(23,43)] #ecs: pandas Dataframe with ecs # Load in ecs file ecs = pd.read_csv(ec_file) #max_linker = 10 max_linker = 6 #ec_cutoff = ecs.iloc[2*max(ecs['j'])]['cn'] ec_cutoff = ecs.iloc[min(len(ecs), (max(ecs['j']) - min(ecs['i']) + 1))]['cn'] num_betas = len(list_beta_pairs) # Create list of pairs that can be updated strand_pairs = {} # Sort beta strands in case they're out of order list_beta_pairs = sorted(list_beta_pairs, key=lambda x: x[0]) # Load in our predicted initial structure and get a dataframe with distance coords for each pairing if init_struct: coords = ClassicPDB.from_file(init_struct).get_chain(chain_id) dist_map = DistanceMap.from_coords(coords) ecs_with_dist = coupling_scores_compared(ecs, dist_map) # Check if any strands are too close together and must be coupled #length_linkers = [] for i in range(num_betas - 1): length_linker = list_beta_pairs[i + 1][0] - list_beta_pairs[i][1] - 1 if length_linker < max_linker: #print('Linker: {} to {}'.format(i+1,i)) if i in strand_pairs: strand_pairs[i] = strand_pairs[i] + [ (i + 1, ('antiparallel', 'linker')) ] else: strand_pairs[i] = [(i + 1, ('antiparallel', 'linker'))] #strand_pairs[i] = [(i+1,('antiparallel','linker'))] if (i + 1) in strand_pairs: strand_pairs[i + 1] = strand_pairs[i + 1] + [ (i, ('antiparallel', 'linker')) ] else: strand_pairs[i + 1] = [(i, ('antiparallel', 'linker'))] #strand_pairs[i+1] = [(i,('antiparallel','linker'))] #print(strand_pairs) # Now that we've eliminated clashes based on linker length, let's go through top ECs # and choose strand pairings on basis of top EC for i in range(num_betas): # Creating temporary lists of top ec scores, and the strand index the EC forms a pair with top_ec_score_list = [] indices = [] ec_pairs = [] # Go through each potential partner strand for j in range(num_betas): # Don't compare same-strand ecs if j == i: continue indices.append(j) b1_start = list_beta_pairs[i][0] b1_end = list_beta_pairs[i][1] b2_start = list_beta_pairs[j][0] b2_end = list_beta_pairs[j][1] # Get ECs between our strand of interest and its current partner strand, and append top scoring EC to list temp_list = ecs.query( '(i >= @b1_start and i <= @b1_end and j >= @b2_start and j<= @b2_end) or (j >= @b1_start and j <= @b1_end and i >= @b2_start and i<=@b2_end)' ) if len(temp_list) > 0: top_ec = temp_list.iloc[0]['cn'] top_ec_i = temp_list.iloc[0]['i'] top_ec_j = temp_list.iloc[0]['j'] else: top_ec = 0 top_ec_j = 0 top_ec_i = 0 #top_ec = ecs.query('(i >= @b1_start and i <= @b1_end and j >= @b2_start and j<= @b2_end) or (j >= @b1_start and j <= @b1_end and i >= @b2_start and i<=@b2_end)').iloc[0] #cn = top_ec['cn'] cn = top_ec top_ec_score_list.append(cn) ec_pairs.append((top_ec_i, top_ec_j)) #print(top_ec_score_list) # Get strand identifier and EC score for best and second-best strand-strand pairings best_strand = indices[np.argmax(top_ec_score_list)] max_ec = max(top_ec_score_list) sbi = [ top_ec_score_list.index(x) for x in sorted(top_ec_score_list, reverse=True) ][1] second_best_strand = indices[sbi] second_best_ec = [x for x in sorted(top_ec_score_list, reverse=True)][1] # Guess whether parallel or antiparallel based on surrounding pair scores def _check_parallel(ec_pair, ecs, list_beta_pairs, istrand, jstrand): b1_start = list_beta_pairs[istrand][0] b1_end = list_beta_pairs[istrand][1] b2_start = list_beta_pairs[jstrand][0] b2_end = list_beta_pairs[jstrand][1] i_val = ec_pair[0] j_val = ec_pair[1] #print(str(list_beta_pairs[istrand])+' '+str(list_beta_pairs[jstrand])) strand_ecs = ecs.query( '(i>=@b1_start and i<=@b1_end and i!=@i_val and j>=@b2_start and j<=@b2_end and j!=@j_val) or (j>=@b1_start and j<=@b1_end and j!=@j_val and i>=@b2_start and i<=@b2_end and i!=@i_val )' ).sort_values(by='cn', ascending=False) if len(strand_ecs) < 1: #print('not enough ecs') #print(strand_ecs) return 'not enough info to determine strand orientation' new_i = strand_ecs.iloc[0]['i'] new_j = strand_ecs.iloc[0]['j'] if (i_val - new_i) * (j_val - new_j) < 0: return 'antiparallel' elif (i_val - new_i) * (j_val - new_j) > 0: return 'parallel' else: print(strand_ecs) return 'Error: dx 0' # TODO: add check if this goes beyond length of protein sequence #antiparallel_score = float(ecs.query('i==(@i_val-1) and j==(@j_val+1)').iloc[0]['cn'])+float(ecs.query('i==(@i_val+1) and j==(@j_val-1)').iloc[0]['cn']) #parallel_score = float(ecs.query('i==(@i_val-1) and j==(@j_val-1)')['cn'])+float(ecs.query('i==(@i_val+1) and j==(@j_val+1)')['cn']) #if parallel_score > antiparallel_score: # return 'parallel' #else: # return 'antiparallel' #best_strand_orientation = _check_parallel(ec_pairs[np.argmax(top_ec_score_list)],ecs) #second_best_strand_orientation =_check_parallel(ec_pairs[sbi],ecs) best_strand_orientation = _check_parallel( ec_pairs[np.argmax(top_ec_score_list)], ecs, list_beta_pairs, i, best_strand) second_best_strand_orientation = _check_parallel( ec_pairs[sbi], ecs, list_beta_pairs, i, second_best_strand) # If EC score is below cutoff, don't include if max_ec < ec_cutoff: max_ec = 'below_cutoff' if second_best_ec < ec_cutoff: second_best_ec = 'below_cutoff' # If we haven't already added the strand based on linker constraints, put in both best and second-best pairings if i not in strand_pairs: strand_pairs[i] = [ (best_strand, (best_strand_orientation, max_ec)), (second_best_strand, (second_best_strand_orientation, second_best_ec)) ] # If we already have a strand-strand pairing based on linker constraints, put in the pairing suggested by #either best or second-best EC based on which one is already present elif len(strand_pairs[i]) == 1: if strand_pairs[i][0][0] == best_strand: strand_pairs[i] = strand_pairs[i] + [ (second_best_strand, (second_best_strand_orientation, second_best_ec)) ] else: strand_pairs[i] = strand_pairs[i] + [ (best_strand, (best_strand_orientation, max_ec)) ] # If we've already identified two partners for this strand on the basis of linker, don't add anything else else: continue # prune list to make sure we don't have any one-directional pairings, e.g. 1->3 but not 3->1 for i in range(len(strand_pairs)): s1 = strand_pairs[i][0][0] s2 = strand_pairs[i][1][0] if i not in [strand_pairs[s1][0][0], strand_pairs[s1][1][0]]: #print('{}: in {} or {}?'.format(i,strand_pairs[s1][0][0],strand_pairs[s1][0][1])) strand_pairs[i][0] = (strand_pairs[i][0][0], (strand_pairs[i][0][1][0], 'one_directional')) if i not in [strand_pairs[s2][0][0], strand_pairs[s2][1][0]]: strand_pairs[i][1] = (strand_pairs[i][1][0], (strand_pairs[i][1][1][0], 'one_directional')) return strand_pairs, list_beta_pairs