Exemple #1
0
    def from_prody_atomgroup(
        cls,
        name: ProteinKey,
        protein: pd.AtomGroup,
        split_type: SplitType = SplitType.KMER,
        split_size: int = 16,
        selection: str = "calpha",
        upsample_rate: int = 50,
        moment_types: List[MomentType] = (
            MomentType.O_3,
            MomentType.O_4,
            MomentType.O_5,
            MomentType.F,
        ),
    ):
        """
        Construct MomentInvariants instance from a ProDy AtomGroup object.
        Selects according to `selection` string, (default = alpha carbons)
        `moment_types` determines which moments are calculated.

        Example
        --------
        >>> invariants = MomentInvariants.from_prody_atomgroup(atom_group, split_type=SplitType.RADIUS, moment_types=[MomentType.O_3, MomentType.F, MomentType.phi_7, MomentType.phi_12])
        """
        protein: pd.AtomGroup = protein.select("protein").select(selection)
        coordinates: np.ndarray = protein.getCoords()
        residue_splits = group_indices(protein.getResindices())
        shape = cls(
            name,
            len(residue_splits),
            coordinates,
            residue_splits,
            protein.getIndices(),
            sequence=protein.getSequence(),
            split_type=split_type,
            split_size=split_size,
            upsample_rate=upsample_rate,
            moment_types=moment_types,
        )
        shape._split(split_type)
        return shape
Exemple #2
0
def visualizemapApp(inp_file, out_file, sel_type, atom_group: prody.AtomGroup,
                    vmin_fltr, vmax_fltr, dmin_fltr, dmax_fltr, cyl_rad):

    ##########################################################################
    selectedAtoms = atom_group.select('protein and name CA')
    ##########################################################################
    minColorBarLimit = 0.0
    maxColorBarLimit = 1.0
    # Read data file and assign to a numpy array
    if sel_type.lower() == "ndcc":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = np.loadtxt(inp_file, dtype=float)
        # Check the data range in the matrix.
        minCorrelationValue = np.min(ccMatrix)
        maxCorrelationValue = np.max(ccMatrix)
        if minCorrelationValue < 0.0:
            # Assume that it is an nDCC file
            minColorBarLimit = -1.0
        if maxCorrelationValue > 1.00001:
            print("This correlation map is not normalized!")
            # TODO: At this point, one can ask the user if s/he wants to normalize it!
            sys.exit(-1)
        else:
            maxColorBarLimit = 1.0
    elif sel_type.lower() == "dcc":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = np.loadtxt(inp_file, dtype=float)
        # Check the data range in the matrix.
        minCorrelationValue = np.min(ccMatrix)
        maxCorrelationValue = np.max(ccMatrix)
        minColorBarLimit = minCorrelationValue
        maxColorBarLimit = maxCorrelationValue
    elif sel_type.lower() == "absndcc":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = np.absolute(parseSparseCorrData(inp_file, selectedAtoms, \
                                                        Ctype=True,
                                                        symmetric=True,
                                                        writeAllOutput=False))
        else:
            ccMatrix = np.absolute(np.loadtxt(inp_file, dtype=float))
        minColorBarLimit = 0.0
        maxColorBarLimit = 1.0
    elif sel_type.lower() == "lmi":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = convertLMIdata2Matrix(inp_file, writeAllOutput=False)
        minCorrelationValue = np.min(ccMatrix)
        maxCorrelationValue = np.max(ccMatrix)
        minColorBarLimit = minCorrelationValue
        maxColorBarLimit = maxCorrelationValue
        #minColorBarLimit = 0.0

    elif sel_type.lower() == "nlmi":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = convertLMIdata2Matrix(inp_file, writeAllOutput=False)
        #minCorrelationValue = np.min(ccMatrix)
        maxCorrelationValue = np.max(ccMatrix)
        minColorBarLimit = 0.0

        # Ideally, it is supposed to be 1 but I used 1.00001 to avoid
        # rounding problems
        if maxCorrelationValue > 1.00001:
            print("This LMI map is not normalized!")
            # TODO: At this point, one can ask the user if s/he wants to normalize it!
            sys.exit(-1)
        else:
            maxColorBarLimit = 1.0

    elif sel_type.lower() == "coeviz":
        ccMatrix = np.loadtxt(inp_file, dtype=float)
        minColorBarLimit = 0.0
        maxColorBarLimit = 1.0

    elif sel_type.lower() == "evcouplings":
        ccMatrix = parseEVcouplingsScores(inp_file, selectedAtoms, False)
        minCorrelationValue = np.min(ccMatrix)
        maxCorrelationValue = np.max(ccMatrix)
        minColorBarLimit = minCorrelationValue
        maxColorBarLimit = maxCorrelationValue
    elif sel_type.lower() == "generic":
        # Check if the data type is sparse matrix
        data_file = open(inp_file, 'r')
        allLines = data_file.readlines()
        data_file.close()

        # Read the first line to determine if the matrix is sparse format
        words = allLines[0].split()

        # Read the 1st line and check if it has three columns
        if (len(words) == 3):
            ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \
                                            Ctype=True,
                                            symmetric=True,
                                            writeAllOutput=False)
        else:
            ccMatrix = np.loadtxt(inp_file, dtype=float)

        minCorrelationValue = np.min(ccMatrix)
        maxCorrelationValue = np.max(ccMatrix)
        minColorBarLimit = minCorrelationValue
        maxColorBarLimit = maxCorrelationValue
    elif sel_type.lower() == "eg":
        # The data type is elasticity graph
        ccMatrix = parseElasticityGraph(inp_file, selectedAtoms, \
                                            writeAllOutput=False)

        minCorrelationValue = np.min(ccMatrix)
        maxCorrelationValue = np.max(ccMatrix)
        minColorBarLimit = minCorrelationValue
        maxColorBarLimit = maxCorrelationValue
    else:
        print(
            "@> ERROR: Unknown data type: Type can only be ndcc, absndcc, lmi,\n"
        )
        print(
            "@>        coeviz or evcouplings. If you have your data in full \n"
        )
        print(
            "@>        matrix format and your data type is none of the options\n"
        )
        print("@>        mentionned, you can set data type 'generic'.\n")
        sys.exit(-1)

    # Set vmin_fltr and vmax_fltr
    if (vmin_fltr == None):
        vmin_fltr = minColorBarLimit
    if (vmax_fltr == None):
        vmax_fltr = maxColorBarLimit

    print(f"""@> Min. value filter: {vmin_fltr}""")
    print(f"""@> Max. value filter: {vmax_fltr}""")

    ##########################################################################
    # Call overall correlation calculation

    overallCorrelationMap(ccMatrix, minColorBarLimit, maxColorBarLimit,
                          out_file, " ", selectedAtoms)

    plotDistributions = True
    VMDcylinderRadiusScale = 0.5
    PMLcylinderRadiusScale = 0.3

    if (cyl_rad == None):
        if sel_type.lower() == "evcouplings":
            VMDcylinderRadiusScale = 0.02
            PMLcylinderRadiusScale = 0.02
        else:
            VMDcylinderRadiusScale = 0.5
            PMLcylinderRadiusScale = 0.3
        print(f"""@> VMD Cylinder radius: {VMDcylinderRadiusScale}""")
        print(f"""@> PyMol Cylinder radius: {PMLcylinderRadiusScale}""")

    else:
        VMDcylinderRadiusScale = float(cyl_rad)
        PMLcylinderRadiusScale = float(cyl_rad)
        print(f"""@> Cylinder radius: {cyl_rad}""")

    if plotDistributions:
        if sel_type.lower() == "ndcc":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "nDCC",
                                 selectedAtoms,
                                 absoluteValues=False,
                                 writeAllOutput=True)
        elif sel_type.lower() == "dcc":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "DCC",
                                 selectedAtoms,
                                 absoluteValues=False,
                                 writeAllOutput=True)

        elif sel_type.lower() == "absndcc":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "Abs(nDCC)",
                                 selectedAtoms,
                                 absoluteValues=True,
                                 writeAllOutput=False)

        elif sel_type.lower() == "lmi":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "LMI",
                                 selectedAtoms,
                                 absoluteValues=True,
                                 writeAllOutput=True)
        elif sel_type.lower() == "nlmi":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "nLMI",
                                 selectedAtoms,
                                 absoluteValues=True,
                                 writeAllOutput=True)
        elif sel_type.lower() == "coeviz":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "CoeViz",
                                 selectedAtoms,
                                 absoluteValues=True,
                                 writeAllOutput=True)

        elif sel_type.lower() == "evcouplings":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "EVcoupling Score",
                                 selectedAtoms,
                                 absoluteValues=False,
                                 writeAllOutput=True)

        elif sel_type.lower() == "generic":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "Correlation",
                                 selectedAtoms,
                                 absoluteValues=False,
                                 writeAllOutput=True)
        elif sel_type.lower() == "eg":
            distanceDistribution(ccMatrix,
                                 out_file,
                                 "Force Constants",
                                 selectedAtoms,
                                 absoluteValues=False,
                                 writeAllOutput=True)
        else:
            print("Warning: Unknows correlation data.\n")
            print("         Correlations can be dcc, ndcc, absndcc, lmi,\n")
            print("         nlmi, coeviz or evcouplings!\n")

    ##########################################################################
    # Check number of chains. If there are multiple chains, plot inter and
    # intra chain correlations
    chains = Counter(selectedAtoms.getChids()).keys()
    saveMatrix = False
    plotChains = True
    if len(chains) > 1 and plotChains:
        intraChainCorrelationMaps(ccMatrix, minColorBarLimit, maxColorBarLimit,
                                  out_file, " ", selectedAtoms, saveMatrix)
        interChainCorrelationMaps(ccMatrix, minColorBarLimit, maxColorBarLimit,
                                  out_file, " ", selectedAtoms, saveMatrix)

    # Here, we can filter some correlation values closer than a distance.
    # Typically, it is supposed to filter out the correlation within the
    # same secondary structure etc.
    filterByDistance = True
    if filterByDistance:
        disMinValue = float(dmin_fltr)
        disMaxValue = float(dmax_fltr)
        ccMatrix = filterCorrelationMapByDistance(ccMatrix,
                                                  out_file,
                                                  " ",
                                                  selectedAtoms,
                                                  disMinValue,
                                                  disMaxValue,
                                                  absoluteValues=False,
                                                  writeAllOutput=False)

    # Overall projection
    projectCorrelationsOntoProteinVMD(
        out_file,
        ccMatrix,
        out_file,
        selectedAtoms,
        vminFilter=float(vmin_fltr),
        vmaxFilter=float(vmax_fltr),
        cylinderRadiusScaler=VMDcylinderRadiusScale,
        absoluteValues=True,
        writeAllOutput=True)

    projectCorrelationsOntoProteinPyMol(
        out_file,
        ccMatrix,
        out_file,
        selectedAtoms,
        vminFilter=float(vmin_fltr),
        vmaxFilter=float(vmax_fltr),
        cylinderRadiusScaler=PMLcylinderRadiusScale,
        absoluteValues=True,
        writeAllOutput=True)