def test_empty_fprints(self): """Empty fingerprints cannot be compared. """ desc_1 = Descriptor([0]) desc_2 = Descriptor([0]) sim_mes = SimilarityMeasure("tanimoto") with self.assertRaises(ValueError): sim_mes(desc_1, desc_2)
def showAllDescriptorsCallback(self): """update the descriptors dropdown to show descriptors.""" if "selected" in self.showAllDescriptorsButton.state(): self.molecularDescriptorCombobox[ "values"] = Descriptor.get_all_supported_descriptors() else: self.molecularDescriptorCombobox[ "values"] = values = Descriptor.get_supported_fprints() return
def test_binary_only_metrics(self): """Metrics which only allow bit vectors should raise a value error on other inputs. """ desc_1 = Descriptor([1, 2]) desc_2 = Descriptor([3, 4]) for metric in SimilarityMeasure.get_supported_binary_metrics(): sim_mes = SimilarityMeasure(metric) with self.assertRaises(ValueError): sim_mes(desc_1, desc_2)
def __init__( self, mol_graph=None, mol_text=None, mol_property_val=None, mol_descriptor_val=None, mol_src=None, mol_smiles=None, ): """Constructor Args: mol_graph (RDKIT mol object): Graph-level information of molecule. Implemented as an RDKIT mol object. Default is None. mol_text (str): Text identifier of the molecule. Default is None. Identifiers can be: ------------------ 1. Name of the molecule. 2. SMILES string representing the molecule. mol_property_val (float): Some property associated with the molecule. This is typically the response being studied. E.g. Boiling point, Selectivity etc. Default is None. mol_descriptor_val (numpy ndarray): Descriptor value for the molecule. Must be numpy array or list. Default is None. mol_src (str): Source file or SMILES string to load molecule. Acceptable files: -> .pdb file -> .txt file with SMILE string in first column, first row and (optionally) property in second column, first row. Default is None. If provided mol_graph is attempted to be loaded from it. mol_smiles (str): SMILES string for molecule. If provided, mol_graph is loaded from it. If mol_text not set in keyword argument, this string is used to set it. """ self.mol_graph = mol_graph self.mol_text = mol_text self.mol_property_val = mol_property_val self.descriptor = (Descriptor() if mol_descriptor_val is None else Descriptor(value=np.array(mol_descriptor_val))) if mol_src is not None: try: self._set_molecule_from_file(mol_src) except LoadingError as e: raise e if mol_smiles is not None: try: self._set_molecule_from_smiles(mol_smiles) except LoadingError as e: raise e
def test_vectornorm_length_errors(self): """ Vector norm-based similarities should only work with descriptors of the same length, otherwise it should raise a value error. """ desc_1 = Descriptor([1, 2]) desc_2 = Descriptor([3]) sim_mes_1 = SimilarityMeasure("l0_similarity") sim_mes_2 = SimilarityMeasure("l1_similarity") sim_mes_3 = SimilarityMeasure("l2_similarity") with self.assertRaises(ValueError): sim_mes_1(desc_1, desc_2) with self.assertRaises(ValueError): sim_mes_2(desc_1, desc_2) with self.assertRaises(ValueError): sim_mes_3(desc_1, desc_2)
def __init__( self, molecule_database_src, molecule_database_src_type, is_verbose, similarity_measure, n_threads=1, fingerprint_type=None, fingerprint_params=None, sampling_ratio=1.0, sampling_random_state=42, ): """Constructor for the MoleculeSet class. Args: sampling_ratio (float): Fraction of the molecules to keep. Useful for selection subset of dataset for quick computations. sampling_random_state (int): Random state used for sampling. Default is 42. """ self.is_verbose = is_verbose self.molecule_database = None self.descriptor = Descriptor() self.molecule_database, features = self._get_molecule_database( molecule_database_src, molecule_database_src_type) if features is not None: self._set_descriptor(arbitrary_descriptor_vals=features) if 0.0 < sampling_ratio < 1.0: if self.is_verbose: print(f"Using {int(sampling_ratio * 100)}% of the database...") self._subsample_database(sampling_ratio=sampling_ratio, random_state=sampling_random_state) if fingerprint_type is not None: # overrides if descriptor set in self._set_molecule_database self._set_descriptor(fingerprint_type=fingerprint_type, fingerprint_params=fingerprint_params) self.similarity_measure = SimilarityMeasure(similarity_measure) if n_threads == 'auto': def speedup_eqn(n_mols, n_procs): return 1.8505e-4 * n_mols + 2.235e-1 * n_procs + 7.082e-2 n_cores = psutil.cpu_count(logical=False) n_mols = len(self.molecule_database) if speedup_eqn(n_mols, n_cores) > 1.0: self.n_threads = n_cores elif speedup_eqn(n_mols, n_cores // 2) > 1.0: self.n_threads = n_cores // 2 else: self.n_threads = n_cores else: self.n_threads = n_threads self.similarity_matrix = None self._set_similarity_matrix()
def __init__(self, master=None): """Constructor for AIMSim. Args: master (tk, optional): tk window. Defaults to None. """ # build ui self.window = tk.Tk() if master is None else tk.Toplevel(master) self.window.title("AIMSim") resource_path = pkg_resources.resource_filename( __name__, "AIMSim-logo.png", ) self.window.iconphoto(False, tk.PhotoImage(file=resource_path)) self.databaseFile = tk.StringVar(self.window) self.targetMolecule = tk.StringVar(self.window) self.similarityMeasure = tk.StringVar(self.window) self.molecularDescriptor = tk.StringVar(self.window) self.titleLabel = ttk.Label(self.window) self.titleLabel.configure(font="TkDefaultFont 14 bold", text="AI Molecular Similarity") self.titleLabel.place(anchor="center", relx="0.5", rely="0.05", x="0", y="0") self.mainframe = ttk.Frame(self.window) self.verboseCheckbutton = ttk.Checkbutton(self.mainframe) self.verboseCheckbutton.configure(compound="top", cursor="arrow", offvalue="False", onvalue="True") self.verboseCheckbutton.configure(state="normal", text="Verbose") self.verboseCheckbutton.place(anchor="center", relx="0.1", rely="0.95", x="0", y="0") self.databaseFileEntry = ttk.Entry(self.mainframe, textvariable=self.databaseFile) _text_ = """smiles_responses.txt""" self.databaseFileEntry.delete("0", "end") self.databaseFileEntry.insert("0", _text_) self.databaseFileEntry.place(anchor="center", relx="0.5", rely="0.03", x="0", y="0") self.databaseFileLabel = ttk.Label(self.mainframe) self.databaseFileLabel.configure(text="Database File:") self.databaseFileLabel.place(anchor="center", relx=".2", rely="0.03", x="0", y="0") self.targetMoleculeEntry = ttk.Entry(self.mainframe, textvariable=self.targetMolecule) _text_ = """CO""" self.targetMoleculeEntry.delete("0", "end") self.targetMoleculeEntry.insert("0", _text_) self.targetMoleculeEntry.place(anchor="center", relx="0.5", rely="0.27", x="0", y="0") self.targetMoleculeLabel = ttk.Label(self.mainframe) self.targetMoleculeLabel.configure(text="Target Molecule:") self.targetMoleculeLabel.place(anchor="center", relx="0.17", rely="0.27", x="0", y="0") self.similarityPlotsCheckbutton = ttk.Checkbutton(self.mainframe) self.similarityPlotsCheckbutton.configure(text="Similarity Plots") self.similarityPlotsCheckbutton.place(anchor="center", relx="0.5", rely="0.1", x="0", y="0") self.propertySimilarityCheckbutton = ttk.Checkbutton(self.mainframe) self.propertySimilarityCheckbutton.configure( text="Property Similarity Plot") self.propertySimilarityCheckbutton.place(anchor="center", relx="0.5", rely="0.15", x="0", y="0") self.similarityPlotCheckbutton = ttk.Checkbutton(self.mainframe) self.similarityPlotCheckbutton.configure(text="Similarity Plot") self.similarityPlotCheckbutton.place(anchor="center", relx="0.5", rely="0.35", x="0", y="0") self.similarityMeasureCombobox = ttk.Combobox( self.mainframe, textvariable=self.similarityMeasure, state="readonly") self.similarityMeasureCombobox.configure( takefocus=False, values=SimilarityMeasure.get_supported_metrics()) self.similarityMeasureCombobox.current(0) self.similarityMeasureCombobox.place(anchor="center", relx="0.55", rely="0.45", x="0", y="0") self.similarityMeasureLabel = ttk.Label(self.mainframe) self.similarityMeasureLabel.configure(text="Similarity Measure:") self.similarityMeasureLabel.place(anchor="center", relx="0.2", rely="0.45", x="0", y="0") self.molecularDescriptorLabel = ttk.Label(self.mainframe) self.molecularDescriptorLabel.configure(text="Molecular Descriptor:") self.molecularDescriptorLabel.place(anchor="center", relx="0.18", rely="0.55", x="0", y="0") self.molecularDescriptorCombobox = ttk.Combobox( self.mainframe, textvariable=self.molecularDescriptor, state="readonly") self.molecularDescriptorCombobox.configure( cursor="arrow", justify="left", takefocus=False, # values=Descriptor.get_all_supported_descriptors(), values=Descriptor.get_supported_fprints(), ) # define the callback for the descriptor def updateCompatibleMetricsListener(event): """Show only compatible metrics, given a descriptor.""" self.similarityMeasureCombobox[ "values"] = SimilarityMeasure.get_compatible_metrics().get( self.molecularDescriptor.get(), "Error") self.similarityMeasureCombobox.current(0) return # bind this listener to the combobox self.molecularDescriptorCombobox.bind("<<ComboboxSelected>>", updateCompatibleMetricsListener) self.molecularDescriptorCombobox.place(anchor="center", relx="0.55", rely="0.55", x="0", y="0") self.molecularDescriptorCombobox.current(0) self.runButton = ttk.Button(self.mainframe) self.runButton.configure(text="Run") self.runButton.place(anchor="center", relx="0.5", rely="0.75", x="0", y="0") self.runButton.configure(command=self.runCallback) self.openConfigButton = ttk.Button(self.mainframe) self.openConfigButton.configure(text="Open Config") self.openConfigButton.place(anchor="center", relx="0.5", rely="0.85", x="0", y="0") self.openConfigButton.configure(command=self.openConfigCallback) self.showAllDescriptorsButton = ttk.Checkbutton(self.mainframe) self.showAllDescriptorsButton.configure( compound="top", cursor="arrow", offvalue="False", onvalue="True", command=self.showAllDescriptorsCallback, ) self.showAllDescriptorsButton.configure( state="normal", text="Show experimental descriptors") self.showAllDescriptorsButton.place(anchor="center", relx="0.45", rely="0.65", x="0", y="0") self.multiprocessingCheckbutton = ttk.Checkbutton(self.mainframe) self.multiprocessingCheckbutton.configure(compound="top", cursor="arrow", offvalue="False", onvalue="True") self.multiprocessingCheckbutton.configure( state="normal", text="Enable Multiple Workers") self.multiprocessingCheckbutton.place(anchor="center", relx="0.78", rely="0.95", x="0", y="0") self.identifyOutliersCheckbutton = ttk.Checkbutton(self.mainframe) self.identifyOutliersCheckbutton.configure(compound="top", cursor="arrow", offvalue="False", onvalue="True") self.identifyOutliersCheckbutton.configure(state="normal", text="Outlier Check") self.identifyOutliersCheckbutton.place(anchor="center", relx="0.4", rely="0.95", x="0", y="0") self.mainframe.configure(height="400", width="400") self.mainframe.place(anchor="nw", relheight="0.9", rely="0.1", x="0", y="0") self.window.configure(cursor="arrow", height="400", relief="flat", takefocus=False) self.window.configure(width="400") # Main widget self.mainwindow = self.window