def minimum_contact_distance(a_H, b_H, return_indices=False, strip_H=True):
    """
    Calculates the minimum distance between two sets of coordinates
    :param a_H: prody object of first set (rows of dist matrix)
    :param b_H: prody object of second set (columns of dist matrix)
    :param return_indices: boolean, whether or not to return row and column indicies of atoms with min distance in matrix
    :return: minimum distance in angstroms
    """

    if strip_H:
        a = a_H.select('not hydrogen').getCoords()
        b = b_H.select('not hydrogen').getCoords()

    else:
        a = a_H.getCoords()
        b = b_H.getCoords()

    ligand_residue_distance_matrix = prody.buildDistMatrix(a, b)

    # Find minimum score in matrix
    row_min_indicies = np.amin(ligand_residue_distance_matrix, axis=0)
    ligand_index = np.argmin(row_min_indicies, axis=0)
    residue_index = np.argmin(ligand_residue_distance_matrix, axis=0)

    column_index_low = ligand_index
    row_index_low = residue_index[column_index_low]

    # Contact distance
    if return_indices:
        return ligand_residue_distance_matrix.item(
            row_index_low, column_index_low), row_index_low, column_index_low
    else:
        return ligand_residue_distance_matrix.item(row_index_low,
                                                   column_index_low)
Exemple #2
0
def set_bonds(prody_pdb):
    """Sets backbone bonds of chain based on proximity of atoms."""
    bb_sel = prody_pdb.select('protein and name N C CA')
    dm = pr.buildDistMatrix(bb_sel)
    ind = np.where((np.tril(dm) < 1.7) & (np.tril(dm) > 0))
    atom_ind = bb_sel.getIndices()
    prody_pdb.setBonds([(atom_ind[i], atom_ind[j])
                        for i, j in zip(ind[0], ind[1])])
Exemple #3
0
 def set_bonds(self):
     """Sets backbone bonds of chain based on proximity of atoms, used for vdM fragment selection."""
     # This needs to be for the whole protein because vdMs can reach across chains.
     bb_sel = self.prody_pdb.select('protein and name N C CA')
     dm = pr.buildDistMatrix(bb_sel)
     ind = np.where((np.tril(dm) < 1.7) & (np.tril(dm) > 0))
     atom_ind = bb_sel.getIndices()
     self.prody_pdb.setBonds([(atom_ind[i], atom_ind[j])
                              for i, j in zip(ind[0], ind[1])])
Exemple #4
0
def calcSpectrusSims(distFlucts, pdb, cutoff=10., sigma='MRSDF', **kwargs):

    coords = pdb.getCoords()
    n = coords.shape[0]

    if distFlucts.shape != (n, n):
        raise ValueError('distFlucts and atoms must have same linear '
                         'size (now %d and %d)' % (distFlucts.shape[0], n))

    # identify atom pairs within cutoff and store relative dist. flucts
    nearestNeighs = np.full((n, n), True, dtype=bool)
    np.fill_diagonal(nearestNeighs, False)
    if isinstance(cutoff, (int, float)):
        # compute inter-atomic distances
        dist = buildDistMatrix(coords)
        nearestNeighs &= (dist <= cutoff)
    elif cutoff is not None:
        raise ValueError('cutoff must be either a number or None. '
                         'Got: {0}'.format(type(cutoff)))
    nnDistFlucts = distFlucts[nearestNeighs]

    # set the sigma parameter for the Gaussian weights
    if sigma == 'MRSDF':
        # sigma is computed as the average of the root distance fluctuations
        # between residues within the distance cutoff, as defined in the
        # SPECTRUS algorithm
        sigma = np.mean(np.sqrt(nnDistFlucts))
    elif sigma == 'RMSDF':
        # sigma is computed as the root mean squared dist. fluctuations
        # (faster to compute than MRSDF)
        sigma = np.sqrt(np.mean(nnDistFlucts))

    # check if sigma is a number
    try:
        ss = 2. * sigma**2
    except:
        raise ValueError('sigma must be \'MRSDF\', \'RMSDF\' or a number.')

    # compute the Gaussian weights only for residue pairs
    # within the distance cutoff
    reducedSims = np.where(nearestNeighs, np.exp(-distFlucts / ss), 0)
    np.fill_diagonal(reducedSims, 1.)
    sparseSims = sparse.csr_matrix(reducedSims)
    sparse.csr_matrix.eliminate_zeros(sparseSims)

    return sparseSims, sigma
Exemple #5
0
def pathAnalysisApp():
    inp_file, out_file, sel_type, pdb_file,val_fltr, \
    dis_fltr, src_res, trgt_res, num_paths\
            = handle_arguments_pathAnalysisApp()

    print(f"""
@> Running 'paths' app

@> Input file     : {inp_file}
@> PDB file       : {pdb_file}
@> Data type      : {sel_type}
@> Output         : {out_file}
@> Value filter   : {val_fltr}
@> Distance filter: {dis_fltr}
@> Source residue : {src_res}
@> Target residue : {trgt_res}
@> Number of paths: {num_paths}""")

    if (os.path.isfile(inp_file) == False):
        print("@> ERROR: Could not find the correlation matrix: " + inp_file +
              "!")
        print(
            "@>        The file does not exist or it is not in the folder!\n")
        sys.exit(-1)

    if (os.path.isfile(pdb_file) == False):
        print("@> ERROR: Could not find the pdb file: " + pdb_file + "!")
        print(
            "@>        The file does not exist or it is not in the folder!\n")
        sys.exit(-1)

    ##########################################################################
    # Read PDB file
    # TODO: This is the only place where I use Prody.
    # Maybe, I can replace it with a library that only parses
    # PDB files. Prody does a lot more!
    selectedAtoms = parsePDB(pdb_file, subset='ca')

    ##########################################################################
    # Read data file and assign to a numpy array
    if sel_type.lower() == "ndcc":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = np.loadtxt(inp_file, dtype=float)
    elif sel_type.lower() == "absndcc":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = np.absolute(parseSparseCorrData(inp_file, selectedAtoms, \
                                                        Ctype=True,
                                                        symmetric=True,
                                                        writeAllOutput=False))
        else:
            ccMatrix = np.absolute(np.loadtxt(inp_file, dtype=float))
    elif sel_type.lower() == "lmi":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = convertLMIdata2Matrix(inp_file, writeAllOutput=False)
    elif sel_type.lower() == "coeviz":
        ccMatrix = np.loadtxt(inp_file, dtype=float)
    elif sel_type.lower() == "evcouplings":
        ccMatrix = parseEVcouplingsScores(inp_file, selectedAtoms, False)
    elif sel_type.lower() == "generic":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = np.loadtxt(inp_file, dtype=float)
    elif sel_type.lower() == "eg":
        # The data type is elasticity graph
        ccMatrix = parseElasticityGraph(inp_file, selectedAtoms, \
                                            writeAllOutput=False)
    else:
        print(
            "@> ERROR: Unknown data type: Type can only be ndcc, absndcc, lmi,\n"
        )
        print(
            "@>        coeviz or evcouplings. If you have your data in full \n"
        )
        print(
            "@>        matrix format and your data type is none of the options\n"
        )
        print("@>        mentionned, you can set data type 'generic'.\n")
        sys.exit(-1)

    sourceResid = src_res
    targetResid = trgt_res
    distanceMatrix = buildDistMatrix(selectedAtoms)
    resDict = mapResid2ResIndex(selectedAtoms)

    if ((sel_type.lower() == "evcouplings") or \
        (sel_type.lower() == "generic") or \
        (sel_type.lower() == "eg")):
        network = buildSequenceNetwork(ccMatrix, distanceMatrix, \
                                    float(val_fltr), float(dis_fltr),\
                                    selectedAtoms)
    else:
        network = buildDynamicsNetwork(ccMatrix, distanceMatrix, \
                                    float(val_fltr), float(dis_fltr),\
                                    selectedAtoms)

    suboptimalPaths = pathAnalysis(network, \
                                   float(val_fltr), float(dis_fltr),\
                                   resDict[sourceResid], resDict[targetResid], \
                                   selectedAtoms,\
                                   int(num_paths))

    out_file_full_name = out_file + "-source" + sourceResid + "-target" + targetResid + ".tcl"
    writePath2VMDFile(suboptimalPaths, selectedAtoms, \
                    resDict[sourceResid], resDict[targetResid], \
                    pdb_file, out_file_full_name)

    out_file_full_name = out_file + "-source" + sourceResid + "-target" + targetResid + ".pml"
    writePath2PMLFile(suboptimalPaths, selectedAtoms,\
                    resDict[sourceResid], resDict[targetResid], \
                    pdb_file, out_file_full_name)
Exemple #6
0
def centralityAnalysisApp():
    inp_file, out_file, sel_type, pdb_file, centrality_type, value_cutoff,\
            distance_cutoff = handle_arguments_centralityAnalysisApp()

    print(f"""
@> Running 'analyze' app

@> Input file     : {inp_file}
@> PDB file       : {pdb_file}
@> Data type      : {sel_type}
@> Output         : {out_file}
@> Centrality     : {centrality_type}
@> Value filter   : {value_cutoff}
@> Distance filter: {distance_cutoff}""")

    if (os.path.isfile(inp_file) == False):
        print("@> ERROR: Could not find the correlation matrix: " + inp_file +
              "!")
        print(
            "@>        The file does not exist or it is not in the folder!\n")
        sys.exit(-1)

    if (os.path.isfile(pdb_file) == False):
        print("@> ERROR: Could not find the pdb file: " + pdb_file + "!")
        print(
            "@>        The file does not exist or it is not in the folder!\n")
        sys.exit(-1)

    ##########################################################################
    # Read PDB file
    # TODO: This is the only place where I use Prody.
    # Maybe, I can replace it with a library that only parses
    # PDB files. Prody does a lot more!
    selectedAtoms = parsePDB(pdb_file, subset='ca')
    valueFilter = float(value_cutoff)
    distanceFilter = float(distance_cutoff)
    distanceMatrix = buildDistMatrix(selectedAtoms)

    ##########################################################################
    # Read data file and assign to a numpy array
    if sel_type.lower() == "ndcc":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = np.loadtxt(inp_file, dtype=float)
    elif sel_type.lower() == "absndcc":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = np.absolute(parseSparseCorrData(inp_file, selectedAtoms, \
                                                        Ctype=True,
                                                        symmetric=True,
                                                        writeAllOutput=False))
        else:
            ccMatrix = np.absolute(np.loadtxt(inp_file, dtype=float))
    elif sel_type.lower() == "lmi":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = convertLMIdata2Matrix(inp_file, writeAllOutput=False)
    elif sel_type.lower() == "coeviz":
        ccMatrix = np.loadtxt(inp_file, dtype=float)
    elif sel_type.lower() == "evcouplings":
        ccMatrix = parseEVcouplingsScores(inp_file, selectedAtoms, False)
    elif sel_type.lower() == "generic":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = np.loadtxt(inp_file, dtype=float)
    elif sel_type.lower() == "eg":
        # The data type is elasticity graph
        ccMatrix = parseElasticityGraph(inp_file, selectedAtoms, \
                                            writeAllOutput=False)
    else:
        print(
            "@> ERROR: Unknown data type: Type can only be ndcc, absndcc, lmi,\n"
        )
        print(
            "@>        coeviz or evcouplings. If you have your data in full \n"
        )
        print(
            "@>        matrix format and your data type is none of the options\n"
        )
        print("@>        mentionned, you can set data type 'generic'.\n")
        sys.exit(-1)

    if ((sel_type.lower() == "evcouplings") or \
        (sel_type.lower() == "generic")  or \
        (sel_type.lower() == "eg")):
        network = buildSequenceNetwork(ccMatrix, distanceMatrix, \
                                    valueFilter, distanceFilter,\
                                    selectedAtoms)
    else:
        network = buildDynamicsNetwork(ccMatrix, distanceMatrix, \
                                    valueFilter, distanceFilter,\
                                    selectedAtoms)

    if centrality_type == "all":
        centralityAnalysis(network, valueFilter, distanceFilter, out_file,
                           "degree", selectedAtoms)
        centralityAnalysis(network, valueFilter, distanceFilter, out_file,
                           "betweenness", selectedAtoms)
        centralityAnalysis(network, valueFilter, distanceFilter, out_file,
                           "closeness", selectedAtoms)
        centralityAnalysis(network, valueFilter, distanceFilter, out_file,
                           "current_flow_betweenness", selectedAtoms)
        centralityAnalysis(network, valueFilter, distanceFilter, out_file,
                           "current_flow_closeness", selectedAtoms)
        centralityAnalysis(network, valueFilter, distanceFilter, out_file,
                           "eigenvector", selectedAtoms)
        # Community analysis is time consuming. Therefore, it will not be called by default.
        # centralityAnalysis(ccMatrix, valueFilter, distanceFilter, out_file, "community",
        #                    selectedAtoms)
    else:
        centralityAnalysis(network, valueFilter, distanceFilter, out_file,
                           centrality_type, selectedAtoms)