def from_prody_atomgroup( cls, name: ProteinKey, protein: pd.AtomGroup, split_type: SplitType = SplitType.KMER, split_size: int = 16, selection: str = "calpha", upsample_rate: int = 50, moment_types: List[MomentType] = ( MomentType.O_3, MomentType.O_4, MomentType.O_5, MomentType.F, ), ): """ Construct MomentInvariants instance from a ProDy AtomGroup object. Selects according to `selection` string, (default = alpha carbons) `moment_types` determines which moments are calculated. Example -------- >>> invariants = MomentInvariants.from_prody_atomgroup(atom_group, split_type=SplitType.RADIUS, moment_types=[MomentType.O_3, MomentType.F, MomentType.phi_7, MomentType.phi_12]) """ protein: pd.AtomGroup = protein.select("protein").select(selection) coordinates: np.ndarray = protein.getCoords() residue_splits = group_indices(protein.getResindices()) shape = cls( name, len(residue_splits), coordinates, residue_splits, protein.getIndices(), sequence=protein.getSequence(), split_type=split_type, split_size=split_size, upsample_rate=upsample_rate, moment_types=moment_types, ) shape._split(split_type) return shape
def visualizemapApp(inp_file, out_file, sel_type, atom_group: prody.AtomGroup, vmin_fltr, vmax_fltr, dmin_fltr, dmax_fltr, cyl_rad): ########################################################################## selectedAtoms = atom_group.select('protein and name CA') ########################################################################## minColorBarLimit = 0.0 maxColorBarLimit = 1.0 # Read data file and assign to a numpy array if sel_type.lower() == "ndcc": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = np.loadtxt(inp_file, dtype=float) # Check the data range in the matrix. minCorrelationValue = np.min(ccMatrix) maxCorrelationValue = np.max(ccMatrix) if minCorrelationValue < 0.0: # Assume that it is an nDCC file minColorBarLimit = -1.0 if maxCorrelationValue > 1.00001: print("This correlation map is not normalized!") # TODO: At this point, one can ask the user if s/he wants to normalize it! sys.exit(-1) else: maxColorBarLimit = 1.0 elif sel_type.lower() == "dcc": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = np.loadtxt(inp_file, dtype=float) # Check the data range in the matrix. minCorrelationValue = np.min(ccMatrix) maxCorrelationValue = np.max(ccMatrix) minColorBarLimit = minCorrelationValue maxColorBarLimit = maxCorrelationValue elif sel_type.lower() == "absndcc": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = np.absolute(parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False)) else: ccMatrix = np.absolute(np.loadtxt(inp_file, dtype=float)) minColorBarLimit = 0.0 maxColorBarLimit = 1.0 elif sel_type.lower() == "lmi": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = convertLMIdata2Matrix(inp_file, writeAllOutput=False) minCorrelationValue = np.min(ccMatrix) maxCorrelationValue = np.max(ccMatrix) minColorBarLimit = minCorrelationValue maxColorBarLimit = maxCorrelationValue #minColorBarLimit = 0.0 elif sel_type.lower() == "nlmi": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = convertLMIdata2Matrix(inp_file, writeAllOutput=False) #minCorrelationValue = np.min(ccMatrix) maxCorrelationValue = np.max(ccMatrix) minColorBarLimit = 0.0 # Ideally, it is supposed to be 1 but I used 1.00001 to avoid # rounding problems if maxCorrelationValue > 1.00001: print("This LMI map is not normalized!") # TODO: At this point, one can ask the user if s/he wants to normalize it! sys.exit(-1) else: maxColorBarLimit = 1.0 elif sel_type.lower() == "coeviz": ccMatrix = np.loadtxt(inp_file, dtype=float) minColorBarLimit = 0.0 maxColorBarLimit = 1.0 elif sel_type.lower() == "evcouplings": ccMatrix = parseEVcouplingsScores(inp_file, selectedAtoms, False) minCorrelationValue = np.min(ccMatrix) maxCorrelationValue = np.max(ccMatrix) minColorBarLimit = minCorrelationValue maxColorBarLimit = maxCorrelationValue elif sel_type.lower() == "generic": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = np.loadtxt(inp_file, dtype=float) minCorrelationValue = np.min(ccMatrix) maxCorrelationValue = np.max(ccMatrix) minColorBarLimit = minCorrelationValue maxColorBarLimit = maxCorrelationValue elif sel_type.lower() == "eg": # The data type is elasticity graph ccMatrix = parseElasticityGraph(inp_file, selectedAtoms, \ writeAllOutput=False) minCorrelationValue = np.min(ccMatrix) maxCorrelationValue = np.max(ccMatrix) minColorBarLimit = minCorrelationValue maxColorBarLimit = maxCorrelationValue else: print( "@> ERROR: Unknown data type: Type can only be ndcc, absndcc, lmi,\n" ) print( "@> coeviz or evcouplings. If you have your data in full \n" ) print( "@> matrix format and your data type is none of the options\n" ) print("@> mentionned, you can set data type 'generic'.\n") sys.exit(-1) # Set vmin_fltr and vmax_fltr if (vmin_fltr == None): vmin_fltr = minColorBarLimit if (vmax_fltr == None): vmax_fltr = maxColorBarLimit print(f"""@> Min. value filter: {vmin_fltr}""") print(f"""@> Max. value filter: {vmax_fltr}""") ########################################################################## # Call overall correlation calculation overallCorrelationMap(ccMatrix, minColorBarLimit, maxColorBarLimit, out_file, " ", selectedAtoms) plotDistributions = True VMDcylinderRadiusScale = 0.5 PMLcylinderRadiusScale = 0.3 if (cyl_rad == None): if sel_type.lower() == "evcouplings": VMDcylinderRadiusScale = 0.02 PMLcylinderRadiusScale = 0.02 else: VMDcylinderRadiusScale = 0.5 PMLcylinderRadiusScale = 0.3 print(f"""@> VMD Cylinder radius: {VMDcylinderRadiusScale}""") print(f"""@> PyMol Cylinder radius: {PMLcylinderRadiusScale}""") else: VMDcylinderRadiusScale = float(cyl_rad) PMLcylinderRadiusScale = float(cyl_rad) print(f"""@> Cylinder radius: {cyl_rad}""") if plotDistributions: if sel_type.lower() == "ndcc": distanceDistribution(ccMatrix, out_file, "nDCC", selectedAtoms, absoluteValues=False, writeAllOutput=True) elif sel_type.lower() == "dcc": distanceDistribution(ccMatrix, out_file, "DCC", selectedAtoms, absoluteValues=False, writeAllOutput=True) elif sel_type.lower() == "absndcc": distanceDistribution(ccMatrix, out_file, "Abs(nDCC)", selectedAtoms, absoluteValues=True, writeAllOutput=False) elif sel_type.lower() == "lmi": distanceDistribution(ccMatrix, out_file, "LMI", selectedAtoms, absoluteValues=True, writeAllOutput=True) elif sel_type.lower() == "nlmi": distanceDistribution(ccMatrix, out_file, "nLMI", selectedAtoms, absoluteValues=True, writeAllOutput=True) elif sel_type.lower() == "coeviz": distanceDistribution(ccMatrix, out_file, "CoeViz", selectedAtoms, absoluteValues=True, writeAllOutput=True) elif sel_type.lower() == "evcouplings": distanceDistribution(ccMatrix, out_file, "EVcoupling Score", selectedAtoms, absoluteValues=False, writeAllOutput=True) elif sel_type.lower() == "generic": distanceDistribution(ccMatrix, out_file, "Correlation", selectedAtoms, absoluteValues=False, writeAllOutput=True) elif sel_type.lower() == "eg": distanceDistribution(ccMatrix, out_file, "Force Constants", selectedAtoms, absoluteValues=False, writeAllOutput=True) else: print("Warning: Unknows correlation data.\n") print(" Correlations can be dcc, ndcc, absndcc, lmi,\n") print(" nlmi, coeviz or evcouplings!\n") ########################################################################## # Check number of chains. If there are multiple chains, plot inter and # intra chain correlations chains = Counter(selectedAtoms.getChids()).keys() saveMatrix = False plotChains = True if len(chains) > 1 and plotChains: intraChainCorrelationMaps(ccMatrix, minColorBarLimit, maxColorBarLimit, out_file, " ", selectedAtoms, saveMatrix) interChainCorrelationMaps(ccMatrix, minColorBarLimit, maxColorBarLimit, out_file, " ", selectedAtoms, saveMatrix) # Here, we can filter some correlation values closer than a distance. # Typically, it is supposed to filter out the correlation within the # same secondary structure etc. filterByDistance = True if filterByDistance: disMinValue = float(dmin_fltr) disMaxValue = float(dmax_fltr) ccMatrix = filterCorrelationMapByDistance(ccMatrix, out_file, " ", selectedAtoms, disMinValue, disMaxValue, absoluteValues=False, writeAllOutput=False) # Overall projection projectCorrelationsOntoProteinVMD( out_file, ccMatrix, out_file, selectedAtoms, vminFilter=float(vmin_fltr), vmaxFilter=float(vmax_fltr), cylinderRadiusScaler=VMDcylinderRadiusScale, absoluteValues=True, writeAllOutput=True) projectCorrelationsOntoProteinPyMol( out_file, ccMatrix, out_file, selectedAtoms, vminFilter=float(vmin_fltr), vmaxFilter=float(vmax_fltr), cylinderRadiusScaler=PMLcylinderRadiusScale, absoluteValues=True, writeAllOutput=True)