예제 #1
0
    def _urls_for_tranches_2d(self, col_list: List[str], row_list: List[str], 
                            fileformat: str = "smi") -> List[str]:
        """ Returns a list of urls to download files in smi format for the specified tranches.
        
            Parameters
            ----------
            col_list : list of str
                List with the columns names. Columns are named with letters from A
                to K. They correspond to molecular weight.
                
            row_list : list of str
                List with the row names. Rows are named with letters from A to K.
                They correspond to LogP.
                
            Returns
            -------
            url_list : list of str
                The urls. 
            
        """
        url_list = []
        if fileformat != "smi" and fileformat != "txt":
            raise InvalidFileFormat(f"{fileformat} is not a valid file format. Valid formats are smi or txt")
        
        for col in col_list:
            for row in row_list:           
                tranch = col + row
                # Each tranch is divided into various files from A to E
                tranch_subcategories = itertools.product("ABCE", "ABCD", repeat=1)
                url = self._tranches_2d_url + tranch + "/" + tranch      
                for subtranch in tranch_subcategories:
                    url_download = url + subtranch[0] + subtranch[1] + "." + fileformat
                    url_list.append(url_download)

        return url_list
예제 #2
0
 def _validate_filters(self, fileformat: str,
                      availability: Optional[str] = None, 
                      bioactive: Optional[str] = None, 
                      biogenic: Optional[str] = None, 
                      reactivity: Optional[str] = None) -> None:
     """ Validate the filters passed to the download_substances and download_catalogs methods.
     
         Parameters
         ----------
         availability : str, default is None
             The availability of the molecules.
         
         bioactive : str, default is None
             Subset of bioactivity and drugs.
             
         biogenic : str, default is None
             Subset of biogenic.
             
         reactivity: str, default is None
             The reactivity of the molecules. 
             
         Raises
         ------
         InvalidFileFormat
         
         InvalidAvailabilityError
         
         InvalidBioactiveError
         
         InvalidBiogenicError
         
         InvalidReactivityError
     """
     if fileformat not in self.file_formats:
         raise InvalidFileFormat(f"{fileformat} is not a valid fileformat.")
     
     if availability:
         if availability not in self.filters["Availability"]:
             raise InvalidAvailabilityError(f"{availability} is not a valid availability.")
     
     if bioactive:
         if bioactive not in self.filters["BioactiveAndDrugs"]:
             raise InvalidBioactiveError(f"{bioactive} is not a valid bioactivity.")
     
     if biogenic:    
         if biogenic not in self.filters["Biogenic"]:
             raise InvalidBiogenicError(f"{biogenic} is not a valid biogenic.")
     
     if reactivity:    
         if reactivity not in self.filters["Reactivity"]:
             raise InvalidReactivityError(f"{reactivity} is not a valid reactivity.")
예제 #3
0
    def from_ligand_file(cls, file_name: str, method: str, radius: float,
        feat_def: Callable, feat_list: Optional[List[str]] = None) -> "LigandBasedPharmacophore":
        """ Get a pharmacophore from a file of ligands

        Accepted file formats: smi, mol2, sdf, pdb 

        Parameters
        ----------
        file_name : str
            Name or path of the file containing the ligands.
        
        method : str
            Name of method or algorithm to compute the ligand based pharmacophore.

        radius : float, default=1.0
            The radius in angstroms of the parmacohporic points.
        
        feat_list : list of str, optional
            List of features that will be used to derive the pharmacophore. If None is passed the
            default features will be used: donors, acceptors, aromatic rings, hydrophobics, positive
            and negative charges.
        
        feat_def : dict, optional
            Definitions of the pharmacophoric features. Dictionary which keys are SMARTS strings and 
            values are feature names. If None is passed the default rdkit definition will be used.

        """
        fextension = file_name.split(".")[-1]
        
        if fextension == "smi":
            ligands = Chem.SmilesMolSupplier(file_name, delimiter='\t', titleLine=False)
        elif fextension == "mol2":
            ligands = load_mol2_file(file_name)
        elif fextension == "sdf":
            ligands = Chem.SDMolSupplier(file_name)
        elif fextension == "pdb":
            ligands = Chem.rdmolfiles.MolFromPDBFile(file_name)
        else:
            raise InvalidFileFormat(f"{fextension} is not a supported file format")

        len(ligands)
        ligands = list(ligands)
        assert len(ligands) > 0
        tmp_pharmacophore = LigandBasedPharmacophore().from_ligand_list(
                                                        ligands=ligands, 
                                                        method=method, 
                                                        radius=radius, 
                                                        feat_list=feat_list, 
                                                        feat_def=feat_def)
        return cls(pharmacophoric_points=tmp_pharmacophore.pharmacophoric_points, ligands=tmp_pharmacophore.ligands, feat_def=feat_def)
예제 #4
0
 def _urls_for_tranches_3d(self, col_list: List[str], row_list: List[str], 
                         fileformat: str) -> List[str]:
     """ Get a list of urls to download files in a 3D format for the specified tranches.
     
         Parameters
         ----------
         col_list : list of str
             List with the columns names. Columns are named with letters from A
             to K. They correspond to molecular weight.
             
         row_list : list of str
             List with the row names. Rows are named with letters from A to K.
             They correspond to LogP.
     
         fileformat : {"sdf", "mol2", "db2"}
             The format of the files.
             
         Returns
         -------
         url_list : list of str
             The urls. 
     
     """
     formats_3d = ["sdf", "mol2", "db2"] 
     if fileformat not in formats_3d:
         raise InvalidFileFormat(f"{fileformat} is not a valid 3D file format. Valid formats are: {formats_3d}")
     
     tranches = []
     for column in col_list:
         for row in row_list:
             tranches.append(column + row)
             
     base_url = "http://files.docking.org/3D/"
     
     urls3d_dir = "./data/zinc/urls3d/"
     url_list = []
     for tranch in tranches:
         file = pkg_resources.resource_filename("openpharmacophore", urls3d_dir + tranch + ".uri")
         with open(file, "r") as fh:
             for line in fh.readlines():
                 url = base_url + tranch + "/" + line.rstrip() + "." + fileformat + ".gz"
                 url_list.append(url)
     
     return url_list
예제 #5
0
    def from_file(cls,
                  file_name: str,
                  load_mol_sys: bool = True) -> "StructuredBasedPharmacophore":
        """ Class method to load an structured based pharmacophore from a file.
        
        Currently supports only json format from pharmer.

        Parameters
        ---------
        file_name : str
            Name of the file containing the pharmacophore.

        """
        fextension = file_name.split(".")[-1]
        if fextension == "json":
            points, receptor, ligand = from_pharmer(file_name, load_mol_sys)
        else:
            raise InvalidFileFormat(
                f"Invalid file type, \"{file_name}\" is not a supported file format"
            )

        return cls(points, receptor, ligand)
예제 #6
0
 def download_predifined_subset(self, download_path: str, subset: str, fileformat: str, 
                             tree: bool = True, ignore_failures: bool = True) -> None:
     """ Download one of ZINC's predifined subsets.
     
         Predifined substs can only be downloaded in the following formats: smi, txt, sdf, mol2 and db2.
     
         Parameters
         ----------
         download_path : str
             The path were files will be downloaded.
         
         subset : str
             Name of the subset
         
         fileformat : str
             The format of the files that will be downloaded. Use mol2 or sdf for 3D molecules, otherwise
             use any of the other formats for 2D molecules (smiles).
         
         tree : bool
             Whether to use a tree directory structure or to download all the 
             files to a single folder.
             
         ignore_failures : bool
             Whether to raise an exception if a file could not be downloaded. 
     """
     col_list, row_list = self._predefined_subset_tranches(subset)
     
     formats_2d = ["smi", "txt"]
     formats_3d = ["sdf", "mol2", "db2"] 
     
     if fileformat in formats_2d:
         url_list = self._urls_for_tranches_2d(col_list, row_list, fileformat)
         self._download_batch_of_files(url_list, download_path, fileformat, "2D",  tree, ignore_failures)
     elif fileformat in formats_3d:
         url_list = self._urls_for_tranches_3d(col_list, row_list, fileformat)
         self._download_batch_of_files(url_list, download_path, fileformat, "3D",  tree, ignore_failures)
     else:
         raise InvalidFileFormat(f"{fileformat} is not a valid fileformat")
예제 #7
0
    def from_file(cls, file_name: str) -> "Pharmacophore":
        """
        Class method to load a pharmacophore from a file.

        Parameters
        ---------
        file_name : str
            Name of the file containing the pharmacophore

        """
        fextension = file_name.split(".")[-1]
        if fextension == "json":
            points, _ , _ = from_pharmer(file_name, False)

        elif fextension == "ph4":
            points = from_moe(file_name)
           
        elif fextension == "pml":
            points = from_ligandscout(file_name)
        
        else:
            raise InvalidFileFormat(f"Invalid file format, \"{file_name}\" is not a supported file format")
        
        return cls(pharmacophoric_points=points)    
예제 #8
0
 def download_custom_subset(self, download_path: str, fileformat: str, 
                      mw_range: Tuple[float, float], 
                      logp_range: Tuple[float, float], 
                      tree: bool = True, 
                      ignore_failures: bool = True,         
                      availability: Optional[str] = None, 
                      bioactive: Optional[str] = None, 
                      biogenic: Optional[str] = None, 
                      reactivity: Optional[str] = None) -> None:
     """ Download subset with a custom molecular weight range and logP range from ZINC.
     
         This method accepts all file formats as specified in the attribute fileformats.
     
         Parameters
         ----------
         download_path : str
             The path were files will be downloaded.
             
         mw_range : 2-tuple of float
             Range of molecular weight in daltons for the downloaded molecules.
     
         logp_range : 2-tuple of float
             Range of logP for the downloaded molecules.
         
         tree : bool
             Whether to use a tree directory structure or to download all the 
             files to a single folder.
             
         ignore_failures : bool
             Whether to raise an exception if a file could not be downloaded. 
             
         availability : str, default is None
             The availability of the molecules.
         
         bioactive : str, default is None
             Subset of bioactivity and drugs.
             
         biogenic : str, default is None
             Subset of biogenic.
             
         reactivity: str, default is None
             The reactivity of the molecules.      
     """
     
     formats_2d = ["xml" ,"csv","js","json","db","solv"]
     formats_3d = ["sdf", "mol2", "db2"] 
     
     col_list, row_list = self._mw_and_logp_tranches(mw_range, logp_range)
     
     if any([availability, bioactive, biogenic, reactivity]):
         url_list = self._tranche_with_filters_url_list(col_list, row_list, availability, 
                                                     bioactive, biogenic, reactivity, 
                                                     fileformat=fileformat)
         self._download_batch_of_files(url_list, download_path, fileformat, "CS", tree, ignore_failures) 
     else:
         if fileformat == "smi" or fileformat == "txt":
             url_list = self._urls_for_tranches_2d(col_list, row_list, fileformat)
             self._download_batch_of_files(url_list, download_path, fileformat, "2D", tree, ignore_failures)    
         elif fileformat in formats_2d:
             url_list = self._tranche_with_filters_url_list(col_list, row_list, availability, 
                                                     bioactive, biogenic, reactivity, 
                                                     fileformat=fileformat)
             self._download_batch_of_files(url_list, download_path, fileformat, "CS", tree, ignore_failures)
         elif fileformat in formats_3d:
             url_list = self._urls_for_tranches_3d(col_list, row_list, fileformat)
             self._download_batch_of_files(url_list, download_path, fileformat, "3D", tree, ignore_failures)
         else:
             raise InvalidFileFormat(f"{fileformat} is not a valid file format.")
예제 #9
0
    def draw(self,
             file_name: str,
             img_size: Tuple[int, int] = (500, 500),
             legend: str = "") -> None:
        """ Draw a 2d representation of the pharmacophore. 
        
            This is a drawing of the ligand with the pharmacophoric features highlighted.

            Parameters
            ----------
            file_name : str
                File where the drawing will be saved. Must be a png file.

            img_size : 2-tuple of int, optional
                The size of the image. (Default=(500,500))

            legend : str, optional
                Image legend.

        """
        if self.ligand is None:
            raise NoLigandsError(
                "Cannot draw pharmacophore if there is no ligand")

        if not file_name.endswith(".png"):
            raise InvalidFileFormat("File must be a png.")

        ligand = copy.deepcopy(self.ligand)
        ligand.RemoveAllConformers()
        ligand = Chem.RemoveHs(ligand)

        atoms = []
        bond_colors = {}
        atom_highlights = defaultdict(list)
        highlight_radius = {}

        for point in self.pharmacophoric_points:

            indices = point.atom_indices
            for idx in indices:

                atoms.append(idx)
                atom_highlights[idx].append(
                    get_color_from_palette_for_feature(point.feature_name))
                highlight_radius[idx] = 0.6

                # Draw aromatic rings bonds
                if point.feature_name == "aromatic ring":
                    for neighbor in ligand.GetAtomWithIdx(idx).GetNeighbors():
                        nbr_idx = neighbor.GetIdx()
                        if nbr_idx not in indices:
                            continue
                        bond = ligand.GetBondBetweenAtoms(idx,
                                                          nbr_idx).GetIdx()
                        bond_colors[bond] = [
                            get_color_from_palette_for_feature("aromatic ring")
                        ]

                # If an atom has more than one feature label will contain both names
                if idx in atoms:
                    if ligand.GetAtomWithIdx(idx).HasProp("atomNote"):
                        label = ligand.GetAtomWithIdx(idx).GetProp("atomNote")
                        label += "|" + str(point.short_name)
                    else:
                        label = point.short_name
                else:
                    label = point.short_name

            ligand.GetAtomWithIdx(idx).SetProp("atomNote", label)

        drawing = rdMolDraw2D.MolDraw2DCairo(img_size[0], img_size[1])
        drawing.DrawMoleculeWithHighlights(ligand, legend,
                                           dict(atom_highlights), bond_colors,
                                           highlight_radius, {})
        drawing.FinishDrawing()
        drawing.WriteDrawingText(file_name)
예제 #10
0
    def draw(self,
             file_name: str,
             img_size: Tuple[int, int] = (500, 500),
             legend: str = "",
             freq_threshold: float = 0.2) -> None:
        """ Draw a 2d representation of the dynamic pharmacophore. This is a drawing of the
            ligand with the pharmacophoric features highlighted and the frequency if each
            one. 

            Parameters
            ----------
            file_name : str
                Name or path og the file where the drawing will be saved. Must be a png file.

            img_size : 2-tuple of int, optional 
                The size of the image (default=(500,500))

            legend : str, optional
                Image legend.

            freq_threshold : double , optional
                The minimun frequency of a pharmacophoric point to be drawn. Number
                between 0.0 and 1.0 (default=0.2). 
        """
        if freq_threshold < 0.0 or freq_threshold > 1.0:
            raise ValueError(
                "Freqency threshold must be a value between 0 and 1")

        if not file_name.endswith(".png"):
            raise InvalidFileFormat("File must be a png.")

        # Extract a ligand
        if self.pharmacophores[0].ligand is None:
            raise NoLigandsError("Ligand could not be extracted")
        ligand = copy.deepcopy(self.pharmacophores[0].ligand)
        ligand.RemoveAllConformers()

        atoms = []
        bond_colors = {}
        atom_highlights = defaultdict(list)
        highlight_radius = {}

        for up in self.unique_pharmacophoric_points:

            if up.frequency < freq_threshold:
                continue

            indices = up.atom_indices
            update_freq = True
            for idx in indices:

                # If an atom has more than one feature keep higher frequency value
                if idx in atoms:
                    if ligand.GetAtomWithIdx(idx).HasProp("atomNote"):
                        freq = int(
                            ligand.GetAtomWithIdx(idx).GetProp("atomNote")[2:])
                        if freq > up.frequency:
                            update_freq = False

                atoms.append(idx)
                if "hydrophobicity" in up.feature_name:
                    feat_name = "hydrophobicity"
                else:
                    feat_name = " ".join(up.feature_name.split()[0:2])

                atom_highlights[idx].append(
                    get_color_from_palette_for_feature(feat_name))
                highlight_radius[idx] = 0.6

                # Draw aromatic rings bonds
                if up.short_name == "R":
                    for neighbor in ligand.GetAtomWithIdx(idx).GetNeighbors():
                        nbr_idx = neighbor.GetIdx()
                        if nbr_idx not in indices:
                            continue
                        bond = ligand.GetBondBetweenAtoms(idx,
                                                          nbr_idx).GetIdx()
                        bond_colors[bond] = [
                            get_color_from_palette_for_feature("aromatic ring")
                        ]

            if update_freq:
                frequency = int(up.frequency * 100)
                ligand.GetAtomWithIdx(idx).SetProp("atomNote",
                                                   f"f={frequency}")

        drawing = rdMolDraw2D.MolDraw2DCairo(img_size[0], img_size[1])
        drawing.DrawMoleculeWithHighlights(ligand, legend,
                                           dict(atom_highlights), bond_colors,
                                           highlight_radius, {})
        drawing.FinishDrawing()
        drawing.WriteDrawingText(file_name)