Example #1
0
def load_distmap(fname):
    return DistanceMap.from_file(fname)
def get_strand_pairings(list_beta_pairs,
                        ec_file,
                        init_struct=False,
                        chain_id=' '):

    #Inputs:
    #list_beta_pairs:  e.g. [(1,10),(23,43)]
    #ecs:  pandas Dataframe with ecs

    # Load in ecs file
    ecs = pd.read_csv(ec_file)

    #max_linker = 10
    max_linker = 6
    #ec_cutoff = ecs.iloc[2*max(ecs['j'])]['cn']
    ec_cutoff = ecs.iloc[min(len(ecs),
                             (max(ecs['j']) - min(ecs['i']) + 1))]['cn']

    num_betas = len(list_beta_pairs)

    # Create list of pairs that can be updated
    strand_pairs = {}

    # Sort beta strands in case they're out of order
    list_beta_pairs = sorted(list_beta_pairs, key=lambda x: x[0])

    # Load in our predicted initial structure and get a dataframe with distance coords for each pairing
    if init_struct:
        coords = ClassicPDB.from_file(init_struct).get_chain(chain_id)
        dist_map = DistanceMap.from_coords(coords)
        ecs_with_dist = coupling_scores_compared(ecs, dist_map)

    # Check if any strands are too close together and must be coupled
    #length_linkers = []
    for i in range(num_betas - 1):

        length_linker = list_beta_pairs[i + 1][0] - list_beta_pairs[i][1] - 1
        if length_linker < max_linker:
            #print('Linker: {} to {}'.format(i+1,i))
            if i in strand_pairs:
                strand_pairs[i] = strand_pairs[i] + [
                    (i + 1, ('antiparallel', 'linker'))
                ]
            else:
                strand_pairs[i] = [(i + 1, ('antiparallel', 'linker'))]
            #strand_pairs[i] = [(i+1,('antiparallel','linker'))]
            if (i + 1) in strand_pairs:
                strand_pairs[i + 1] = strand_pairs[i + 1] + [
                    (i, ('antiparallel', 'linker'))
                ]
            else:
                strand_pairs[i + 1] = [(i, ('antiparallel', 'linker'))]
            #strand_pairs[i+1] = [(i,('antiparallel','linker'))]
    #print(strand_pairs)

    # Now that we've eliminated clashes based on linker length, let's go through top ECs
    # and choose strand pairings on basis of top EC
    for i in range(num_betas):

        # Creating temporary lists of top ec scores, and the strand index the EC forms a pair with
        top_ec_score_list = []
        indices = []
        ec_pairs = []

        # Go through each potential partner strand
        for j in range(num_betas):

            # Don't compare same-strand ecs
            if j == i:
                continue

            indices.append(j)
            b1_start = list_beta_pairs[i][0]
            b1_end = list_beta_pairs[i][1]
            b2_start = list_beta_pairs[j][0]
            b2_end = list_beta_pairs[j][1]

            # Get ECs between our strand of interest and its current partner strand, and append top scoring EC to list
            temp_list = ecs.query(
                '(i >= @b1_start and i <= @b1_end and j >= @b2_start and j<= @b2_end) or (j >= @b1_start and j <= @b1_end and i >= @b2_start and i<=@b2_end)'
            )
            if len(temp_list) > 0:
                top_ec = temp_list.iloc[0]['cn']
                top_ec_i = temp_list.iloc[0]['i']
                top_ec_j = temp_list.iloc[0]['j']

            else:
                top_ec = 0
                top_ec_j = 0
                top_ec_i = 0
            #top_ec = ecs.query('(i >= @b1_start and i <= @b1_end and j >= @b2_start and j<= @b2_end) or (j >= @b1_start and j <= @b1_end and i >= @b2_start and i<=@b2_end)').iloc[0]

            #cn = top_ec['cn']
            cn = top_ec
            top_ec_score_list.append(cn)
            ec_pairs.append((top_ec_i, top_ec_j))

        #print(top_ec_score_list)

        # Get strand identifier and EC score for best and second-best strand-strand pairings
        best_strand = indices[np.argmax(top_ec_score_list)]
        max_ec = max(top_ec_score_list)

        sbi = [
            top_ec_score_list.index(x)
            for x in sorted(top_ec_score_list, reverse=True)
        ][1]
        second_best_strand = indices[sbi]
        second_best_ec = [x
                          for x in sorted(top_ec_score_list, reverse=True)][1]

        # Guess whether parallel or antiparallel based on surrounding pair scores
        def _check_parallel(ec_pair, ecs, list_beta_pairs, istrand, jstrand):
            b1_start = list_beta_pairs[istrand][0]
            b1_end = list_beta_pairs[istrand][1]
            b2_start = list_beta_pairs[jstrand][0]
            b2_end = list_beta_pairs[jstrand][1]

            i_val = ec_pair[0]
            j_val = ec_pair[1]

            #print(str(list_beta_pairs[istrand])+' '+str(list_beta_pairs[jstrand]))

            strand_ecs = ecs.query(
                '(i>=@b1_start and i<=@b1_end and i!=@i_val and j>=@b2_start and j<=@b2_end and j!=@j_val) or (j>=@b1_start and j<=@b1_end and j!=@j_val and i>=@b2_start and i<=@b2_end and i!=@i_val )'
            ).sort_values(by='cn', ascending=False)
            if len(strand_ecs) < 1:
                #print('not enough ecs')
                #print(strand_ecs)
                return 'not enough info to determine strand orientation'
            new_i = strand_ecs.iloc[0]['i']
            new_j = strand_ecs.iloc[0]['j']

            if (i_val - new_i) * (j_val - new_j) < 0:
                return 'antiparallel'
            elif (i_val - new_i) * (j_val - new_j) > 0:
                return 'parallel'
            else:
                print(strand_ecs)
                return 'Error: dx 0'
            # TODO:  add check if this goes beyond length of protein sequence

            #antiparallel_score = float(ecs.query('i==(@i_val-1) and j==(@j_val+1)').iloc[0]['cn'])+float(ecs.query('i==(@i_val+1) and j==(@j_val-1)').iloc[0]['cn'])
            #parallel_score = float(ecs.query('i==(@i_val-1) and j==(@j_val-1)')['cn'])+float(ecs.query('i==(@i_val+1) and j==(@j_val+1)')['cn'])

            #if parallel_score > antiparallel_score:
            #    return 'parallel'
            #else:
            #    return 'antiparallel'

        #best_strand_orientation = _check_parallel(ec_pairs[np.argmax(top_ec_score_list)],ecs)
        #second_best_strand_orientation =_check_parallel(ec_pairs[sbi],ecs)
        best_strand_orientation = _check_parallel(
            ec_pairs[np.argmax(top_ec_score_list)], ecs, list_beta_pairs, i,
            best_strand)
        second_best_strand_orientation = _check_parallel(
            ec_pairs[sbi], ecs, list_beta_pairs, i, second_best_strand)

        # If EC score is below cutoff, don't include
        if max_ec < ec_cutoff:
            max_ec = 'below_cutoff'
        if second_best_ec < ec_cutoff:
            second_best_ec = 'below_cutoff'

        # If we haven't already added the strand based on linker constraints, put in both best and second-best pairings
        if i not in strand_pairs:
            strand_pairs[i] = [
                (best_strand, (best_strand_orientation, max_ec)),
                (second_best_strand, (second_best_strand_orientation,
                                      second_best_ec))
            ]

        # If we already have a strand-strand pairing based on linker constraints, put in the pairing suggested by
        #either best or second-best EC based on which one is already present
        elif len(strand_pairs[i]) == 1:

            if strand_pairs[i][0][0] == best_strand:
                strand_pairs[i] = strand_pairs[i] + [
                    (second_best_strand,
                     (second_best_strand_orientation, second_best_ec))
                ]
            else:
                strand_pairs[i] = strand_pairs[i] + [
                    (best_strand, (best_strand_orientation, max_ec))
                ]

        # If we've already identified two partners for this strand on the basis of linker, don't add anything else
        else:
            continue

    # prune list to make sure we don't have any one-directional pairings, e.g. 1->3 but not 3->1
    for i in range(len(strand_pairs)):

        s1 = strand_pairs[i][0][0]
        s2 = strand_pairs[i][1][0]

        if i not in [strand_pairs[s1][0][0], strand_pairs[s1][1][0]]:
            #print('{}: in {} or {}?'.format(i,strand_pairs[s1][0][0],strand_pairs[s1][0][1]))
            strand_pairs[i][0] = (strand_pairs[i][0][0],
                                  (strand_pairs[i][0][1][0],
                                   'one_directional'))
        if i not in [strand_pairs[s2][0][0], strand_pairs[s2][1][0]]:
            strand_pairs[i][1] = (strand_pairs[i][1][0],
                                  (strand_pairs[i][1][1][0],
                                   'one_directional'))

    return strand_pairs, list_beta_pairs