Ejemplo n.º 1
0
    def set_reaction(self,product_smiles):
        try:
            product_mol = SM(product_smiles)
            prod_num_rings = CalcNumRings(product_mol)
        except:
            print("error in the product",product_smiles)
            return []

        try:
            reactant_list = self.rxn.RunReactants([product_mol])
        except:
            print("Reaction failed")
            print(self.reaction_name,self.smarts,product_smiles)
            exit()

        approved_reactants = []
        for reactant_mol in reactant_list:
            #condition 1 - conserved ring count
            try:
                [Chem.SanitizeMol(r) for r in reactant_mol]
                if np.sum([CalcNumRings(r) for r in reactant_mol]) - prod_num_rings == self.ring_change_count:
                    approved_reactants.append(reactant_mol)
            except:
                print("could not sanitize ",product_smiles,".".join([MS(r) for r in reactant_mol]))
        return approved_reactants
Ejemplo n.º 2
0
    def preprocess(dataset, dir_input):

        train_smiles = list(dataset['SMILES'])
        train_adducts = dataset['Adducts']
        train_ccs = list(dataset['CCS'])

        adducts_encoder = AdductToOneHotEncoder()
        adducts_encoder.fit(train_adducts)
        adducts = adducts_encoder.transform(train_adducts)

        Smiles, molecules, adjacencies, properties, descriptors = '', [], [], [], []
        for i, smi in enumerate(train_smiles):
            if '.' in smi:
                continue
            smi = Chem.MolToSmiles(Chem.MolFromSmiles(smi))
            mol = Chem.MolFromSmiles(smi)
            mol = Chem.AddHs(mol)
            atoms = create_atoms(mol)
            i_jbond_dict = create_ijbonddict(mol)

            fingerprints = extract_fingerprints(atoms, i_jbond_dict, radius)
            adjacency = create_adjacency(mol)

            Smiles += smi + '\n'
            molecules.append(fingerprints)
            adjacencies.append(adjacency)
            properties.append([[train_ccs[i]]])
            descriptors.append([
                ExactMolWt(mol),
                MolLogP(mol),
                GetFormalCharge(mol),
                CalcNumRings(mol),
                CalcNumRotatableBonds(mol),
                CalcLogS(mol),
                AcidCount(mol),
                BaseCount(mol),
                APolar(mol),
                BPolar(mol)
            ])

        properties = np.array(properties)
        mean, std = np.mean(properties), np.std(properties)
        properties = np.array((properties - mean) / std)

        os.makedirs(dir_input, exist_ok=True)

        with open(dir_input + 'Smiles.txt', 'w') as f:
            f.write(Smiles)
        np.save(dir_input + 'molecules', molecules)
        np.save(dir_input + 'adducts', adducts)
        np.save(dir_input + 'adjacencies', adjacencies)
        np.save(dir_input + 'properties', properties)
        np.save(dir_input + 'descriptors', descriptors)
        np.save(dir_input + 'mean', mean)
        np.save(dir_input + 'std', std)
        dump_dictionary(fingerprint_dict,
                        dir_input + 'fingerprint_dict.pickle')
Ejemplo n.º 3
0
    def _construct(self,
                   molecules,
                   ring_cutoff=10,
                   progress=False,
                   annotate=True):
        """Private method for graph construction, called by constructors.

        Parameters
        ----------
        molecules : iterable
            An iterable of rdkit molecules for processing
        ring_cutoff : int, optional
            Ignore molecules with more than the specified number of rings to avoid
            extended processing times. The default is 10.
        annotate : bool, optional
            If True write an annotated murcko scaffold SMILES string to each
            molecule edge (molecule --> scaffold). The default is True.
        progress : bool
            If True show a progress bar monitoring progress. The default is False

        """
        rdlogger.setLevel(4)  # Suppress the RDKit logs
        progress = progress is False
        desc = self.__class__.__name__
        for molecule in tqdm(molecules,
                             disable=progress,
                             desc=desc,
                             miniters=1,
                             dynamic_ncols=True):
            if molecule is None:  # logged in suppliers
                continue
            init_molecule_name(molecule)
            if CalcNumRings(molecule) > ring_cutoff:
                name = molecule.GetProp('_Name')
                logger.warning(
                    f'Molecule {name} filtered (> {ring_cutoff} rings)')
                continue
            rdmolops.RemoveStereochemistry(molecule)
            scaffold = Scaffold(get_murcko_scaffold(molecule))
            if scaffold:  # Checks that a scaffold has at least 1 atom
                annotation = None
                if annotate:
                    annotation = get_annotated_murcko_scaffold(
                        molecule, scaffold.mol, False)
                self.add_scaffold_node(scaffold)
                self.add_molecule_node(molecule)
                self.add_molecule_edge(molecule,
                                       scaffold,
                                       annotation=annotation)
                if scaffold.rings.count > 1:
                    self._recursive_constructor(scaffold)
            else:
                name = molecule.GetProp('_Name')
                logger.warning(f'No top level scaffold for molecule {name}')
        rdlogger.setLevel(3)  # Enable the RDKit logs
Ejemplo n.º 4
0
    def _construct(self, molecules, init_args, ring_cutoff=10, progress=False):
        """Private method for graph construction, called by constructors.

        The constructor is fairly generic allowing the user to customise
        hierarchy construction by changing the _hierarchy_constructor
        function. The user also has further control of this process and
        is able to change how a scaffold is initialized (`_initialize_scaffold`),
        how it is preprocessed (`_preprocess_scaffold`) and how molecules with
        no top-level scaffold (i.e. linear molecules) are handled.

        Parameters
        ----------
        molecules : iterable
            An iterable of rdkit molecules for processing
        init_args : dict
            A dictionary containing arguments for scaffold initialization and
            preprocessing.
        ring_cutoff : int, optional
            Ignore molecules with more than the specified number of rings to avoid
            extended processing times. The default is 10.
        progress : bool, optional
            If True show a progress bar monitoring progress. The default is False.

        See Also
        --------
        _initialize_scaffold
        _preprocess_scaffold
        _process_no_top_level

        """
        desc, progress = self.__class__.__name__, progress is False
        for molecule in tqdm(
                molecules,
                disable=progress,
                desc=desc,
                miniters=1,
                dynamic_ncols=True,
        ):
            if molecule is None:
                continue
            init_molecule_name(molecule)
            if CalcNumRings(molecule) > ring_cutoff:
                name = molecule.GetProp('_Name')
                logger.warning(
                    f'Molecule {name} filtered (> {ring_cutoff} rings)')
                self.graph['num_filtered'] = self.graph.get('num_filtered',
                                                            0) + 1
                continue
            scaffold = self._initialize_scaffold(molecule, init_args)
            if scaffold is not None:
                self._hierarchy_constructor(scaffold)
Ejemplo n.º 5
0
def get_numRings(mol):
    ''' Number of Rings '''
    return CalcNumRings(mol)
Ejemplo n.º 6
0
def predict():
    req_data = request.get_json()
    print("Data requested")
    print(req_data)
    conditions = req_data["conditions"]
    num_rounds = req_data["num_rounds"]
    loyality = req_data["loyality"]
    num_of_mols = req_data["num_of_mols"]

    # molecules closer to aspirin
    # "Melting point", "Boiling point", "Water Solubility", loyality to drug design rules, number of rounds, number of molecules
    #conditions = [120, 285, -2.1, 0.7, 10, 10]
    #data = conditions[]
    result_arr = []
    for round in range(num_rounds):
        print(f"round {round}")
        number_generate = 100
        endp = torch.tensor(scaler.transform(np.array([conditions])))
        print(endp.shape)

        c = deepcopy(endp)
        c = [str(l) for l in list(c.numpy())]
        # endp = endp.unsqueeze(0)
        endp = endp.repeat(100, 1)
        endp = endp.unsqueeze(0)
        endp = endp.repeat(3, 1, 1)

        endp = endp.float()
        endp = endp.cuda()
        res = model.sample(endp, number_generate, dataset.model)
        valid = len(res) * 100 / number_generate
        print("valid : {} %".format(valid))
        # writer.add_scalar("Valid", valid, cnt)
        res = [robust_standardizer(mol) for mol in res]
        res = list(filter(lambda x: x is not None, res))
        mols = res
        print("Mols obtained")
        print(mols)
        vals_another = requests.post("https://backend.syntelly.com/tempSmilesArrToPredict",
                                     json={'smiles': mols}).json()
        for idx in range(len(vals_another)):
            elem = vals_another[idx]['data']
            for e in elem:
                e["endpoint_id"] = endpoints_id2name[e["endpoint_id"]]
        e2v = []
        for idx in range(len(vals_another)):
            e2v.append(dict(zip([e['endpoint_id'] for e in vals_another[idx]['data']],
                                [e['value'] for e in vals_another[idx]['data']])))
        smiles = [val['smiles'] for val in vals_another]
        mols = [robust_standardizer(mol) for mol in smiles]
        mols = [Chem.MolFromSmiles(mol) for mol in mols]
        molecular_weights = [CalcExactMolWt(mol) for mol in mols]
        logp = [MolLogP(mol) for mol in mols]
        atom_count = [mol.GetNumAtoms() for mol in mols]
        molar_reflactivity = [MolMR(mol) for mol in mols]
        numRings = [CalcNumRings(mol) for mol in mols]
        numRotBonds = [CalcNumRotatableBonds(mol) for mol in mols]
        numHAcceptors = [NumHAcceptors(mol) for mol in mols]
        numHDonors = [NumHDonors(mol) for mol in mols]
        bcf = [e['Bioconcentration factor'] for e in e2v]
        dev_tox = [e['Developmental toxicity'] for e in e2v]
        flash_point = [e['Flash point'] for e in e2v]
        boiling_point = [e['Boiling point'] for e in e2v]
        melting_points = [e['Melting point'] for e in e2v]
        water_solubility = [e['Water Solubility'] for e in e2v]

        result = [0] * len(smiles)
        for idx in range(len(smiles)):
            val = 0
            if (molecular_weights[idx] <= 480 and molecular_weights[idx] >= 160):
                val += 1
            if (logp[idx] <= 5.6 and logp[idx] >= -0.4):
                val += 1
            if (atom_count[idx] <= 70 and atom_count[idx] >= 20):
                val += 1
            if (molar_reflactivity[idx] >= 40 and molar_reflactivity[idx] <= 130):
                val += 1
            if (bcf[idx] < 3):
                val += 1
            if (dev_tox[idx] == 'Negative'):
                val += 1
            if (flash_point[idx] > (350 - 273.15)):
                val += 1
            if (boiling_point[idx] > (300 - 273.15)):
                val += 1
            if (numRings[idx] > 0):
                val += 1
            if (numRotBonds[idx] < 5):
                val += 1
            if (numHAcceptors[idx] <= 10):
                val += 1
            if (numHDonors[idx] <= 5):
                val += 1

            if (val / 12 >= loyality):
                result[idx] = val

        print(result)
        for idx in range(len(result)):
            if (result[idx] > 0):
                result_arr.append((smiles[idx], result[idx],
                                   (melting_points[idx], boiling_point[idx], water_solubility[idx]),
                                   mean_squared_error(
                                       scaler.transform(np.array(
                                           [[melting_points[idx], boiling_point[idx], water_solubility[idx]]])),
                                       scaler.transform(np.array([conditions]))
                                   )))

    result_arr.sort(key=lambda x: x[3])

    print(result_arr[:num_of_mols])
    return jsonify(result_arr[:num_of_mols])
Ejemplo n.º 7
0
 def _calculate_phys_chem_property(self, mol):
     return CalcNumRings(mol)