コード例 #1
0
ファイル: trajectoryHandler.py プロジェクト: lowks/pyProCT
    def __init__(self, parameters, observer):
        """
        Class creator. It parses the needed files and extracts info and coordinates.
        """
        super(TrajectoryHandler,self).__init__(observer)
        self.parameters = parameters


        matrix_parameters = parameters["data"]["matrix"]['parameters']
        parameters["data"]["files"] = self.expand_file_lists(parameters["data"]["files"])
        self.files = parameters["data"]["files"]
        self.pdbs = []

        if len(self.files) == 0:
            common.print_and_flush( "[ERROR] no pdbs. Exiting...\n")
            self.notify("SHUTDOWN","No pdbs defined in script.")
            exit()

        self.notify("Loading","Loading Trajectories")

        # Bookmarking structure
        self.bookmarking = {
                             "pdb": None,
                             "selections": {}
        }

        self.coordsets = self.getMergedStructure().getCoordsets()
        self.number_of_conformations = self.coordsets.shape[0]
        self.number_of_atoms = self.coordsets.shape[1]

        self.handle_selection_parameters(matrix_parameters)
コード例 #2
0
ファイル: dataLoader.py プロジェクト: migonsu/pyProCT
 def close(self):
     """
     Must return the result of merging all the previously loaded data pieces as 
     a Data Object.
     If the number of loaded elements is 0 the program must exit (we 
     cannot perform any useful analysis without data!)  
     """
     if self.number_of_elements == 0:
         common.print_and_flush("[ERROR DataLoader:close] No loaded data. Exiting...\n")
         exit()
コード例 #3
0
 def close(self):
     """
     Must return the result of merging all the previously loaded data pieces as 
     a Data Object.
     If the number of loaded elements is 0 the program must exit (we 
     cannot perform any useful analysis without data!)  
     """
     if self.number_of_elements == 0:
         common.print_and_flush(
             "[ERROR DataLoader:close] No loaded data. Exiting...\n")
         exit()
コード例 #4
0
ファイル: trajectoryHandler.py プロジェクト: lowks/pyProCT
    def check_extension(self, ext):
        """
        Helper function to check if the file extension is allowed. If not it shuts down the program.

        @param ext: The extension string (with the separating period!)

        @return: Nothing (exits if the condition is not fulfilled)
        """
        if not ext in [".dcd",".pdb"]:
            common.print_and_flush( "[ERROR] pyProCT cannot read this file format.\n")
            self.notify("SHUTDOWN","Wrong file format.")
            exit()
コード例 #5
0
ファイル: stateGraphTools.py プロジェクト: lowks/pyProCT
def do_graph(clustering,num_elems_of_traj_2,std_deviations,filename):
    """
    """
    graph = digraph()
    labels = populate_nodes_with_labels(clustering, num_elems_of_traj_2,std_deviations, graph)
    prob_matrix = calculate_probability_matrix(clustering)
    add_graph_edges(graph,labels,clustering,prob_matrix)
    tmp_file = open("tmp_dot","w")
    tmp_file.write(write(graph))
    tmp_file.close()
    common.print_and_flush("delegating to dot...")
    os.system("cat tmp_dot|dot -Tpng -o "+filename+";rm tmp_dot")
コード例 #6
0
def do_graph(clustering,num_elems_of_traj_2,std_deviations,filename):
    """
    """
    graph = digraph()
    labels = populate_nodes_with_labels(clustering, num_elems_of_traj_2,std_deviations, graph)
    prob_matrix = calculate_probability_matrix(clustering)
    add_graph_edges(graph,labels,clustering,prob_matrix)
    tmp_file = open("tmp_dot","w")
    tmp_file.write(write(graph))
    tmp_file.close()
    common.print_and_flush("delegating to dot...")
    os.system("cat tmp_dot|dot -Tpng -o "+filename+";rm tmp_dot")
コード例 #7
0
ファイル: trajectoryHandler.py プロジェクト: gabocic/python
    def check_extension(self, ext):
        """
        Helper function to check if the file extension is allowed. If not it shuts down the program.

        @param ext: The extension string (with the separating period!)

        @return: Nothing (exits if the condition is not fulfilled)
        """
        if not ext in [".dcd", ".pdb"]:
            common.print_and_flush(
                "[ERROR] pyProCT cannot read this file format.\n")
            self.notify("SHUTDOWN", "Wrong file format.")
            exit()
コード例 #8
0
ファイル: stateGraphTools.py プロジェクト: lowks/pyProCT
def purge_mixed_clusters_and_do_graph(mixed, pure_clusters_traj1,condensed_distance_matrix,std_devs_from_A,path):
    """
    """
    common.print_and_flush( "Purging clusters...")
    # Purge all mixed clusters of elements from traj2
    purged = []
    num_elems_of_traj_2 = []
    for i in range(len(mixed)):
        cluster, elems_in_traj1, elems_in_traj2 = mixed[i] #@UnusedVariable
        num_elems_of_traj_2.append(len(elems_in_traj2))
        # We rebuild the cluster with only elements of traj 1
        purged.append(Cluster(prototype=None,elements = elems_in_traj1))
#        print "l ",len(elems_in_traj1)," ",len(elems_in_traj2)
    
    # we also need to have traj 1 pure clusters
    purged.extend(pure_clusters_traj1)
    
    # Those don't have any element of traj 2, so we put 0s in the number of 
    # elements list
    num_elems_of_traj_2.extend([0]*len(pure_clusters_traj1))
    
    #Calculate statistics for the remaining clusters
    for i in range(len(pure_clusters_traj1)):
        medoid = pure_clusters_traj1[i].calculate_medoid(condensed_distance_matrix)
        std_devs_from_A.append(get_distance_std_dev_for_elems(pure_clusters_traj1[i].all_elements,medoid,condensed_distance_matrix))
    common.print_and_flush( "Done.\n")
    
    common.print_and_flush("Trying to draw state graph...")
    do_graph(Clustering(purged,sort =  False),num_elems_of_traj_2,std_devs_from_A,path)
    common.print_and_flush("Done.\n")
コード例 #9
0
def purge_mixed_clusters_and_do_graph(mixed, pure_clusters_traj1,condensed_distance_matrix,std_devs_from_A,path):
    """
    """
    common.print_and_flush( "Purging clusters...")
    # Purge all mixed clusters of elements from traj2
    purged = []
    num_elems_of_traj_2 = []
    for i in range(len(mixed)):
        cluster, elems_in_traj1, elems_in_traj2 = mixed[i] #@UnusedVariable
        num_elems_of_traj_2.append(len(elems_in_traj2))
        # We rebuild the cluster with only elements of traj 1
        purged.append(Cluster(prototype=None,elements = elems_in_traj1))
#        print "l ",len(elems_in_traj1)," ",len(elems_in_traj2)
    
    # we also need to have traj 1 pure clusters
    purged.extend(pure_clusters_traj1)
    
    # Those don't have any element of traj 2, so we put 0s in the number of 
    # elements list
    num_elems_of_traj_2.extend([0]*len(pure_clusters_traj1))
    
    #Calculate statistics for the remaining clusters
    for i in range(len(pure_clusters_traj1)):
        medoid = pure_clusters_traj1[i].calculate_medoid(condensed_distance_matrix)
        std_devs_from_A.append(get_distance_std_dev_for_elems(pure_clusters_traj1[i].all_elements,medoid,condensed_distance_matrix))
    common.print_and_flush( "Done.\n")
    
    common.print_and_flush("Trying to draw state graph...")
    do_graph(Clustering(purged,sort =  False),num_elems_of_traj_2,std_devs_from_A,path)
    common.print_and_flush("Done.\n")
コード例 #10
0
ファイル: trajectoryHandler.py プロジェクト: gabocic/python
    def __init__(self, parameters, observer):
        """
        Class creator. It parses the needed files and extracts info and coordinates.
        """

        super(TrajectoryHandler, self).__init__(observer)

        print "Reading conformations..."
        prody.confProDy(verbosity="none")

        self.parameters = parameters
        matrix_parameters = parameters.get_value(
            "data.matrix.parameters", default_value=ProtocolParameters.empty())
        parameters["data"]["files"] = self.expand_file_lists(
            parameters["data"]["files"])
        self.files = parameters["data"]["files"]
        self.pdbs = []

        if len(self.files) == 0:
            common.print_and_flush("[ERROR] no pdbs. Exiting...\n")
            self.notify("SHUTDOWN", "No pdbs defined in script.")
            exit()

        self.notify("Loading", "Loading Trajectories")

        # Bookmarking structure
        self.bookmarking = {"pdb": None, "selections": {}}

        merged_structure = self.getMergedStructure()
        self.coordsets = merged_structure.getCoordsets()
        self.number_of_conformations = self.coordsets.shape[0]
        self.number_of_atoms = self.coordsets.shape[1]

        self.handle_selection_parameters(matrix_parameters)
        print "%d conformations of %d atoms were read." % (
            merged_structure.numCoordsets(), merged_structure.numAtoms())
コード例 #11
0
    def load_data_from_source(self, source):
        """
        Loads a structure file (pdb or dcd) and updates source info.

        :param source: Is a DataSource object with one of this sets of keywords:
        
        - For 'pdb' files:

            {
                "source": ... ,
                "base_selection": ...
            }

        Where 'file' contains the path of the pdb file we want to load.

        - For 'dcd' files:

            {
                "source": ...,
                "atoms_source": ...,
                "base_selection": ...
            }

        Where 'file' contains the path of the 'dcd' file we want to load and 'atoms_file' the source of the pdb file containing
        the atomic information.

        In both cases 'base_selection' is a Prody selection string that performs an initial selection of the atoms. This is
        useful when we want to load more than one file with different number of atoms and its goal is to allow the selection
        of the common atoms. It is up to the user to maintain a 1 to 1 mapping between the atoms of each of the files.

        The source object will be enriched with some extra information from the loaded structure ensemble.

        :return: Prody's structure object with the loaded ensemble
        """
        _, ext = os.path.splitext(source.get_path())
        
        if ext == ".dcd":
            structure = prody.parsePDB(source.get_info("atoms_source"))
            # Leave only atomic information
            removeAllCoordsetsFromStructure(structure)
            dcd_data = prody.DCDFile(source.get_path())
            coordsets = dcd_data.getCoordsets()
            # Add all coordsets to atomic information
            for coordset in coordsets:
                structure.addCoordset(coordset)
            
        elif ext == ".pdb":
            structure = prody.parsePDB(source.get_path())
        else:
            print "[ERROR][ProteinStructureEnsembleData::get_structure] pyProCT does not know hot to load the file %s (unknown extension '%s')"%(source.get_path(),ext)
            exit()
        
        if source.has_info("base_selection"):
            structure = structure.select(source.get_info("base_selection")).copy()
            if structure is None:
                common.print_and_flush("[ERROR ProteinStructureEnsembleData::get_structure] Improductive base selection (%s). Exiting...\n"%source.get_info("base_selection"))
                exit()

        source.add_info("number_of_conformations", structure.numCoordsets())
        source.add_info("number_of_atoms", structure.numAtoms())
        
        self.model_numbers.extend(self.get_model_numbers(source, structure.numCoordsets()))
        self.model_remarks.extend(self.get_remarks(source, structure.numCoordsets()))
        
        return  structure, structure.numCoordsets()
コード例 #12
0
ファイル: trajectoryHandler.py プロジェクト: gabocic/python
    def get_structure(self, file_info):
        """
        Loads a structure file (pdb or dcd) and fills its structure_info data for logging.

        @param file_info: Is a string containing the path of the file or a dictionary with this structure:
        'pdb' files:

            {
                "file": ... ,
                "base_selection": ...
            }

        Where 'file' contains the path of the pdb file we want to load.

        'dcd' files:

            {
                "file": ...,
                "atoms_file": ...,
                "base_selection": ...
            }

        Where 'file' contains the path of the 'dcd' file we want to load and 'atoms_file' the source of the pdb file containing
        the atomic information.

        In both cases 'base_selection' is a Prody selection string that performs an initial selection of the atoms. This is
        useful when we want to load more than one file with different number of atoms and its goal is to allow the selection
        of the common atoms. It is up to the user to maintain a 1 to 1 mapping between the atoms of each of the files.

        @return: A tuple containing the structure object and a structure_info dictionary.
        """
        structure_info = {
            "source": "",
            "source of atoms": "",
            "base selection": "",
            "number of conformations": "",
            "number of atoms": ""
        }

        if isinstance(file_info, basestring):
            # Then is a path, and must be a pdb
            path = file_info
            structure_info["source"] = path

            name, ext = os.path.splitext(path)

            self.check_extension(ext)

            if ext == ".dcd":
                common.print_and_flush(
                    "[ERROR TrajectoryHandler::get_structure] Path format can only be used with pdb files. Exiting...\n"
                )
                self.notify("SHUTDOWN", "Fatal error reading pdbs.")
                exit()
            else:
                structure = prody.parsePDB(path)
                structure_info[
                    "number of conformations"] = structure.numCoordsets()
                structure_info["number of atoms"] = structure.numAtoms()
                return structure, structure_info
        else:
            # {"file":  , "selection":  } object or
            # {"file": , "atoms_file":, "selection"} if the file is a dcd file
            path = file_info["file"]
            structure_info["source"] = path
            name, ext = os.path.splitext(path)
            self.check_extension(ext)

            if ext == ".dcd":
                structure_info["source of atoms"] = file_info["atoms_file"]

                structure = prody.parsePDB(file_info["atoms_file"])
                removeAllCoordsetsFromStructureLeavingFirst(structure)
                dcd_data = prody.DCDFile(path)
                coordsets = dcd_data.getCoordsets()

                for coordset in coordsets:
                    structure.addCoordset(coordset)
            else:
                structure = prody.parsePDB(path)

            if "base_selection" in file_info and file_info[
                    "base_selection"] != "":
                structure = structure.select(file_info["base_selection"])
                structure_info["base selection"] = file_info["base_selection"]

            structure_info["number of conformations"] = structure.numCoordsets(
            )
            structure_info["number of atoms"] = structure.numAtoms()
            return structure, structure_info
コード例 #13
0
    def load_data_from_source(self, source):
        """
        Loads a structure file (pdb or dcd) and updates source info.

        :param source: Is a DataSource object with one of this sets of keywords:
        
        - For 'pdb' files:

            {
                "source": ... ,
                "base_selection": ...
            }

        Where 'file' contains the path of the pdb file we want to load.

        - For 'dcd' files:

            {
                "source": ...,
                "atoms_source": ...,
                "base_selection": ...
            }

        Where 'file' contains the path of the 'dcd' file we want to load and 'atoms_file' the source of the pdb file containing
        the atomic information.

        In both cases 'base_selection' is a Prody selection string that performs an initial selection of the atoms. This is
        useful when we want to load more than one file with different number of atoms and its goal is to allow the selection
        of the common atoms. It is up to the user to maintain a 1 to 1 mapping between the atoms of each of the files.

        The source object will be enriched with some extra information from the loaded structure ensemble.

        :return: Prody's structure object with the loaded ensemble
        """
        _, ext = os.path.splitext(source.get_path())
        
        if ext == ".dcd":
            structure = prody.parsePDB(source.get_info("atoms_source"))
            # Leave only atomic information
            removeAllCoordsetsFromStructure(structure)
            dcd_data = prody.DCDFile(source.get_path())
            coordsets = dcd_data.getCoordsets()
            # Add all coordsets to atomic information
            for coordset in coordsets:
                structure.addCoordset(coordset)
            
        elif ext == ".pdb":
            structure = prody.parsePDB(source.get_path())
        else:
            print "[ERROR][ProteinStructureEnsembleData::get_structure] pyProCT does not know how to load the file %s (unknown extension '%s')"%(source.get_path(),ext)
            exit()
        
        if source.has_info("base_selection"):
            structure = structure.select(source.get_info("base_selection")).copy()
            if structure is None:
                common.print_and_flush("[ERROR ProteinStructureEnsembleData::get_structure] Improductive base selection (%s). Exiting...\n"%source.get_info("base_selection"))
                exit()
        
        print "Loaded %d conformations with %d atoms from %s."%(structure.numCoordsets(), 
                                                                structure.numAtoms(),
                                                                source.get_path())
#         prody.writePDB("%s_test"%source.get_path(), structure, csets= [1])

        source.add_info("number_of_conformations", structure.numCoordsets())
        source.add_info("number_of_atoms", structure.numAtoms())
        
        self.model_numbers.extend(self.get_model_numbers(source, structure.numCoordsets()))
        self.model_remarks.extend(self.get_remarks(source, structure.numCoordsets()))
        
        return  structure, structure.numCoordsets()
コード例 #14
0
ファイル: TestCommonTools.py プロジェクト: migonsu/pyProCT
 def test_print_and_flush(self):
     handler = cStringIO.StringIO()
     print_and_flush("Hello", handler)
     self.assertEqual(handler.getvalue(), "Hello")
コード例 #15
0
ファイル: trajectoryHandler.py プロジェクト: lowks/pyProCT
    def get_structure(self, file_info):
        """
        Loads a structure file (pdb or dcd) and fills its structure_info data for logging.

        @param file_info: Is a string containing the path of the file or a dictionary with this structure:
        'pdb' files:

            {
                "file": ... ,
                "base_selection": ...
            }

        Where 'file' contains the path of the pdb file we want to load.

        'dcd' files:

            {
                "file": ...,
                "atoms_file": ...,
                "base_selection": ...
            }

        Where 'file' contains the path of the 'dcd' file we want to load and 'atoms_file' the source of the pdb file containing
        the atomic information.

        In both cases 'base_selection' is a Prody selection string that performs an initial selection of the atoms. This is
        useful when we want to load more than one file with different number of atoms and its goal is to allow the selection
        of the common atoms. It is up to the user to maintain a 1 to 1 mapping between the atoms of each of the files.

        @return: A tuple containing the structure object and a structure_info dictionary.
        """
        structure_info = {
              "source":"",
              "source of atoms":"",
              "base selection": "",
              "number of conformations": "",
              "number of atoms":  ""
        }

        if isinstance(file_info, basestring):
            # Then is a path, and must be a pdb
            path = file_info
            structure_info["source"] = path

            name, ext = os.path.splitext(path)

            self.check_extension(ext)

            if ext == ".dcd":
                common.print_and_flush( "[ERROR TrajectoryHandler::get_structure] Path format can only be used with pdb files. Exiting...\n")
                self.notify("SHUTDOWN", "Fatal error reading pdbs.")
                exit()
            else:
                structure = prody.parsePDB(path)
                structure_info["number of conformations"] = structure.numCoordsets()
                structure_info["number of atoms"] = structure.numAtoms()
                return  structure, structure_info
        else:
            # {"file":  , "selection":  } object or
            # {"file": , "atoms_file":, "selection"} if the file is a dcd file
            path = file_info["file"]
            structure_info["source"] = path
            name, ext = os.path.splitext(path)
            self.check_extension(ext)

            if ext == ".dcd":
                structure_info["source of atoms"] = file_info["atoms_file"]

                structure = prody.parsePDB(file_info["atoms_file"])
                removeAllCoordsetsFromStructureLeavingFirst(structure)
                dcd_data = prody.DCDFile(path)
                coordsets = dcd_data.getCoordsets()

                for coordset in coordsets:
                    structure.addCoordset(coordset)
            else:
                structure = prody.parsePDB(path)

            if "base_selection" in file_info and file_info["base_selection"] !=  "":
                structure = structure.select(file_info["base_selection"])
                structure_info["base selection"]=file_info["base_selection"]

            structure_info["number of conformations"] = structure.numCoordsets()
            structure_info["number of atoms"] = structure.numAtoms()
            return  structure, structure_info
コード例 #16
0
ファイル: TestCommonTools.py プロジェクト: ztypaker/pyProCT
 def test_print_and_flush(self):
     handler = cStringIO.StringIO()
     print_and_flush("Hello", handler)
     self.assertEqual(handler.getvalue(), "Hello")