def compound_identity(
        query: Molmap,
        target: Optional[Mapping[str, Mol]]) -> Mapping[str, List[str]]:
    target_set = set((target if target is not None else query).keys())
    match_sets = {q: target_set.copy() for q in query.keys()}
    for fp_type in ["morgan", "topological"]:
        query_arena = make_fingerprint_arena(query, fingerprint_type=fp_type)
        target_arena = (make_fingerprint_arena(
            target, fingerprint_type=fp_type) if target is not None else None)
        matches = find_similarity_matches(query_arena,
                                          target_arena,
                                          threshold=1)
        for q in match_sets.keys():
            match_sets[q] &= set(x for x in matches.get(q, {}).keys())
    query_weights = {k: MolWt(m) for k, m in query.items()}
    target_weights = ({k: MolWt(m)
                       for k, m in target.items()}
                      if target is not None else query_weights)
    for q, ts in match_sets.items():
        weight_matches = set()
        for t in ts:
            if isclose(query_weights[q], target_weights[t], rel_tol=0.001):
                weight_matches.add(t)
        match_sets[q] &= weight_matches
    return {k: list(v) for k, v in match_sets.items()}
Esempio n. 2
0
    def keep_biggest(cls, mol_in):
        """Strip small fragments from compound.

        Returns a new compound where only the "biggest" fragment is conserved
        according to (i) the number of non-Hs atoms and if there is tie then 
        according to (ii) the molecular weight.
        
        :param  mol_in:  RDKit Mol
        :return mol_out: new RDKit Mol having only one connected component
        """
        def count_non_hs_atom(mol):
            ans = 0
            for atm in mol.GetAtoms():
                if atm.GetAtomicNum() != 1:
                    ans += 1
            return ans

        # Remove "other" molecules
        molfrag = GetMolFrags(mol_in, asMols=True, sanitizeFrags=False)
        mol_out = mol_in
        if len(molfrag) > 1:
            accepted_nbr_atm = 0  # flag number of atoms in fragment
            accepted_mw = 0  # flag the molecular weight of the biggest fragment
            for f in molfrag:
                nbr_atm = count_non_hs_atom(f)
                if nbr_atm > accepted_nbr_atm or (nbr_atm == accepted_nbr_atm
                                                  and
                                                  MolWt(f) > accepted_mass):
                    accepted_nbr_atm = nbr_atm
                    accepted_mass = MolWt(f)
                    mol_out = f  # keep only the biggest fragment
            cls._copy_properties(mol_in, mol_out)  # save the name and stuff
        return mol_out
Esempio n. 3
0
def calculate_mass_route():
    """Calculate compound molecular mass.
    ---
    post:
      summary: Calculate compound molecular mass.
      requestBody:
        required: true
        content:
          application/json:
            schema: CalculateMassSchema
      responses:
        '200':
          content:
            application/json:
              schema: CalculateMassResultSchema
    """
    data = CalculateMassSchema().load(request.json)
    compounds, skipped = convert_compound_request(data["compounds"])
    mass_out = {}
    for n, m in compounds.items():
        try:
            mass_out[n] = MolWt(m)
        except Exception as e:
            skipped.append(n)
    out = {"mass": mass_out, "skipped": skipped}
    CalculateMassResultSchema().validate(out)
    return out
Esempio n. 4
0
 def evaluate(self, lst_in):
     """
     Evaluate structure alerts on a list of SMILES
     :param lst_in: input list of [SMILES, Name]
     :return: list of alerts matched or "OK"
     """
     smiles, name = lst_in
     mol = Chem.MolFromSmiles(smiles)
     if mol is None:
         return [
             smiles, name, 'INVALID', -999, -999, -999, -999, -999, -999
         ]
     desc_list = [
         MolWt(mol),
         MolLogP(mol),
         NumHDonors(mol),
         NumHAcceptors(mol),
         TPSA(mol),
         CalcNumRotatableBonds(mol)
     ]
     for row in self.rule_list:
         patt, max_val, desc = row
         if len(mol.GetSubstructMatches(patt)) > max_val:
             return [smiles, name] + [desc + " > %d" %
                                      (max_val)] + desc_list
     return [smiles, name] + ["OK"] + desc_list
    def __call__(self, smiles: str):
        mol = Chem.MolFromSmiles(smiles)
        if not (self.rule_dict["MW"][0] <= MolWt(mol) <=
                self.rule_dict["MW"][1]):
            return False

        if not (self.rule_dict["LogP"][0] <= MolLogP(mol) <=
                self.rule_dict["LogP"][1]):
            return False

        if not (self.rule_dict["HBD"][0] <= NumHDonors(mol) <=
                self.rule_dict["HBD"][1]):
            return False

        if not (self.rule_dict["HBA"][0] <= NumHAcceptors(mol) <=
                self.rule_dict["HBA"][1]):
            return False

        if not (self.rule_dict["TPSA"][0] <= TPSA(mol) <=
                self.rule_dict["TPSA"][1]):
            return False

        for row in self.rule_list:
            patt, max_val, desc = row
            if len(mol.GetSubstructMatches(patt)) > max_val:
                return False

        return True
Esempio n. 6
0
    def evaluate(self, point: Any) -> float:
        """
        Evaluate a point.

        Args:
            point: point to evaluate.

        Returns:
            evaluation for the given point.
        """
        latent_point = torch.tensor([[point]])
        batch_latent = latent_point.repeat(1, self.batch, 1)

        smiles = self.generator.generate_smiles(batch_latent)

        mweights = []
        for smile in smiles:
            try:
                mweights.append(MolWt(Chem.MolFromSmiles(smile)))
            except Exception:
                logger.warning("MW calculation failed.")

        if len(mweights) > 0:
            return 1.0 - exp(-abs(self.target -
                                  (sum(mweights) / len(mweights))))
        else:
            return 1.0
Esempio n. 7
0
def get_similar_molecules(query_descriptors: list, query_smiles: str,
                          mol_inchi: str, database_binary_path: str,
                          database_smiles_path: str, database_id: int,
                          num_to_keep: int):
    """Celery task that reads database binary files comparing query descriptors

    Args:
        query_descriptors (list): USRCAT descriptors of query.
        query_smiles (str): SMILES representation of the query molecule.
        database_binary_path (str): Binary file path - must be string, not path, as Path is non-serialisable.
        database_smiles_path (str): Smiles file path - same as above, must be a string.
        database_id (int): Int representing database ID, used to cache results against specific databases
    """
    print("Worker running for " + query_smiles)
    mol = Chem.MolFromSmiles(query_smiles)
    query_mol_identifier = mol_inchi + "_" + str(database_id) + "_" + str(
        num_to_keep)

    # CPP program bellow called for speed of processing
    ###################################################################################
    # Build the command line -    //Arguments must be
    # 0: Executable
    # 1: Binary file location without last .bin extension, so that .bin and .smi file locations can be derived
    # 2: Number of best to keep
    # 3-63: USRCAT descriptors of query
    command_line = [
        "/home/ubuntu/similarity_lab/utils/usrcat_binary_reader_similarity_lab",
        database_binary_path.replace(".bin", ""),
        str(num_to_keep)
    ]
    for i in range(60):
        command_line.append(str(query_descriptors[i]))
    process = Popen(command_line, stdout=PIPE)
    output, err = process.communicate()
    exit_code = process.wait()
    lines = output.decode("utf-8").splitlines()

    with open(
            Path(app.config['QUERY_SIMILARS_DIRECTORY']) /
        (query_mol_identifier + ".csv"), "w") as outfile:
        outfile.write(
            "Candidate SMILES,USRCAT Score,Morgan Score,eMolecules ID,MW\n")
        for line in lines:
            stripped_line = line.strip()
            candidate_smiles, title_comma_score = stripped_line.split(
                " ", maxsplit=1)
            candidate_title, candidate_score = title_comma_score.split(",")
            candidate_score = float(candidate_score)
            candidate_mol = Chem.MolFromSmiles(candidate_smiles)
            candidate_morgan_score = DiceSimilarity(
                GetMorganFingerprint(candidate_mol, 2),
                GetMorganFingerprint(mol, 2))
            mw = MolWt(candidate_mol)
            outfile.write(
                f'{candidate_smiles},{round(candidate_score,3)},{round(candidate_morgan_score,3)},{candidate_title.replace("_1","")},{round(mw,3)}\n'
            )
    print("Worker done")
Esempio n. 8
0
def check_lipinski(mol):
    fgs = load_functional_groups()
    h_donors = Lipinski.NumHDonors(mol.rdmol)
    h_acceptors = Lipinski.NumHAcceptors(mol.rdmol)
    log_p = MolLogP(mol.rdmol)
    wt = MolWt(mol.rdmol)
    if h_donors <= 5 and h_acceptors <= 5 and log_p < 5:
        if wt >= 450:
            mol.join(fgs['terminal_fg'].get_random())
            return True, False
        else:
            return True, False
    else:
        return False, False
Esempio n. 9
0
    def fragments(self) -> List[Fragment]:
        """Fragments ligand using RDKit.

        Includes self as first element.

        Returns:
            List[Fragment]: Ordered by weight, heaviest first

        """
        reactant = self.rdkit_mol
        mols = [reactant]
        products = Reactor().react(reactant)
        mols.extend(products)
        mols.sort(key=lambda m: MolWt(m), reverse=True)
        return [Fragment(self.atomium_mol, mol) for mol in mols]
    def __call__(self, mol):
        """
        Returns the QED of a SMILES string or a RdKit molecule.
        """

        # Error handling.
        if type(mol) == rdkit.Chem.rdchem.Mol:
            pass
        elif type(mol) == str:
            mol = Chem.MolFromSmiles(mol, sanitize=True)
            if mol is None:
                raise ValueError("Invalid SMILES string.")
        else:
            raise TypeError("Input must be from {str, rdkit.Chem.rdchem.Mol}")

        return MolWt(mol)
Esempio n. 11
0
def process_by_folder(fd, inpath):
    cycle = fd.strip("cycle_")
    sd = inpath + '/' + fd + '/ranked_designs.sd'
    if os.path.exists(sd):
        cir_mols = [PropertyMol(m) for m in Chem.SDMolSupplier(sd)]
        for m in cir_mols:
            # Calculate properties for each mol
            m.SetProp('Cycle', cycle)
            m.SetProp('MolWeight', str(MolWt(m)))
            m.SetProp('LogP', str(LogP(m)))
            m.SetProp('QED', str(QED(m)))
            m.SetProp('SAS', str(SAS(m)))
        # Select the highest score design in the cycle
        # (the first one in the ranked sd file)
        best_mol = cir_mols[0]
    return cir_mols, best_mol
Esempio n. 12
0
def drawmol(s):
    bsize = 200
    tsize = 80
    size = (bsize, bsize + tsize)
    m = Chem.MolFromSmiles(s)
    #print("wt", MolWt(m))
    img, canvas, drawer = Draw.MolToImage(m, size=size, returnCanvas=True)
    #                        centerIt=False, drawingTrans=(bsize/2+1,bsize/2+tsize))
    font = Font(face='sans', size=12)
    pos = bsize / 2, bsize, 0
    canvas.addCanvasText('%s\r\nMolWt: %g\tTPSA: %g' % (s, MolWt(m), TPSA(m)),
                         pos, font)

    with open('xx' + s + '.png', 'w') as f:
        canvas.flush()
        img.save(f)
Esempio n. 13
0
def smiles_reaction_matrix(smarts, *sources, **kwargs):
    sep = kwargs.setdefault('sep', ' ')
    molValue = int(kwargs.get('molValue', 400))
    logValue = float(kwargs.get('logValue', 4.0))
    reaction = ReactionFromSmarts(smarts)
    smilesLists = [load_smiles_file(source) for source in sources]
    products = reaction_matrix(reaction, *smilesLists)
    for reactants, product in products:
        cids = [r.GetProp("_Name") for r in reactants]
        product_id = '.'.join(cids)
        for mol in product:
            smiles = MolToSmiles(mol, isomericSmiles=True)
            mol.UpdatePropertyCache(strict=False)
            mh = AddHs(mol, addCoords=True)
            mwt = MolWt(mol)
            if mwt <= molValue:
                logp = MolLogP(mol)
                if logp < logValue:
                    yield sep.join((smiles, product_id, str(mwt), str(logp)))+"\n"
Esempio n. 14
0
def cpd_inform(SMILES):
    """
    A function for getting compound information from SMILES string it received
        a SMILES string and return a dictionary of information consisted of
        number of C, H, O , N, P, S, X, Degree of Unsaturation
        and Molecular Weight
    """
    info = []
    mol = Chem.rdmolfiles.MolFromSmiles(SMILES)
    info.append(float(count_C(mol)))
    info.append(float(count_H(mol)))
    info.append(float(count_O(mol)))
    info.append(float(count_N(mol)))
    info.append(float(count_P(mol)))
    info.append(float(count_S(mol)))
    info.append(float(count_X(mol)))
    info.append((2*info[0] + 2 + info[3] + info[4] - info[6] - info[1])/2) # it is (2*C + 2 + N + P - X - H)/2
    info.append(MolWt(mol))
    return info
Esempio n. 15
0
def get_global_features(mol):
    """Computes global-level features for a molecule.

    Parameters
    ----------
    mol : rdkit mol

    Returns
    -------
    [np.ndarray]
        Global-level features
    """
    # MW, TPSA, logP, n.hdonors
    mw = MolWt(mol)
    tpsa = CalcTPSA(mol)
    logp = MolLogP(mol)
    n_hdonors = NumHDonors(mol)

    desc = np.array([mw, tpsa, logp, n_hdonors], dtype=np.float32)
    return desc
Esempio n. 16
0
def cal_prop(q, return_dict_prop):

    nbits = 1024
    while True:
        qqq = q.get()
        if qqq == 'DONE':
            #            print('proc =', os.getpid())
            break
        idx, smi = qqq

        #        if idx%10000==0:
        #            print(idx)
        mol = Chem.MolFromSmiles(smi)
        logP = MolLogP(mol)
        SAS = sascorer.calculateScore(mol)
        QED = qed(mol)
        MW = MolWt(mol)
        TPSA0 = TPSA(mol)

        return_dict_prop[idx] = [logP, SAS, QED, MW, TPSA0]
Esempio n. 17
0
df["res_stnd_SMILES"] = res_stnd_smi_list  # Salt Removed from Standardized SMILES

error_mask2 = np.any(
    [df["res_stnd_SMILES"] == "", df["res_stnd_SMILES"] == "-"], axis=0)

df3 = df[np.logical_not(error_mask2)]
print(df3[df3.duplicated("res_SMILES", False)][["Result",
                                                "res_SMILES"]])  # CSV로 하나씩 살피기
df3 = df.drop_duplicates("res_stnd_SMILES")  # Drop first duplicated ones

# 5. MW 짜르기
mw_lst = []
for i, row in df3.iterrows():
    smi = row["res_stnd_SMILES"]
    mol = MolFromSmiles(smi)
    mw = MolWt(mol)
    mw_lst.append(mw)
df3["MW"] = mw_lst
print(np.mean(df3[df3["MW"] < 40]["Result"] == "P"),
      np.mean(df3[df3["MW"] > 800]["Result"] == "P"))
df3_mw = df3[df3["MW"] > 40]
df3_mw = df3_mw[df3_mw["MW"] < 800]
df3_mw = df3_mw.reset_index(drop=True)

seeds = [1043]  #np.random.randint(1,1e4,10)

for seed in seeds:
    trn, test = split_df(df3_mw, seed)  # df3,df3_mw
    trn.to_csv(f"{data_path}/TG471_train_all_stdn_curated_mw_{seed}.csv")
    test.to_csv(f"{data_path}/TG471_test_all_stdn_curated_mw_{seed}.csv")
Esempio n. 18
0
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#

def str2mol(s):
    if isinstance(s, str):
        return Chem.MolFromSmiles(s)
    else:
        return s

rdkit_funcs = {"QED": lambda x: QED(str2mol(x)),
               "MOLWT": lambda x: MolWt(str2mol(x)),
               "SAS": lambda x: calculateScore(str2mol(x)),
               "LOGP": lambda x: LogP(str2mol(x))}


class AttnParams:
    _params = None

    def __init__(self):
        self._params = {
            "model": None,
            "data": None,  # Data stuff
            "len_limit": 120,
            "num_props": 4,
            "current_epoch": 1,  # Training params
            "epochs": 20,
Esempio n. 19
0
    canvas.addCanvasText('%s\r\nMolWt: %g\tTPSA: %g' % (s, MolWt(m), TPSA(m)),
                         pos, font)

    with open('xx' + s + '.png', 'w') as f:
        canvas.flush()
        img.save(f)


if __name__ == '__main__':
    drawmol('CN1CCC[C@H]1c2cccnc2')
    drawmol('CC(=O)OC1=CC=CC=C1C(=O)O')
    drawmol('O1C=C[C@H]([C@H]1O2)c3c2cc(OC)c4c3OC(=O)C5=C4CCC(=O)5')
    sys.exit(0)

    # sample code to use new drawing API (older rdkit do not have DrawString)
    from rdkit.Chem.AllChem import EmbedMolecule
    assert EmbedMolecule(m) >= 0
    x = Draw.rdMolDraw2D.MolDraw2DSVG(200, 250)
    x.DrawMolecule(m)
    x.DrawString('Test String', 20, 200)
    x.FinishDrawing()
    print(x.GetDrawingText())

    # sample code to generate a legend
    legstr = ''
    if molname:
        legstr += molname + '\n'
    legstr += '%s\nWt=%g LogP=%g TPSA=%g\nHBA=%d HBD=%d RotBond=%d\n' % \
        (smiles, MolWt(mol), MolLogP(mol), TPSA(mol),
         NumHAcceptors(mol), NumHDonors(mol), NumRotatableBonds(mol))
Esempio n. 20
0
def Girolami(smiles):

	# Get RDKit molecule
	mol = Chem.MolFromSmiles(smiles)
	mol = Chem.AddHs(mol)

	# Calculate molecular weight
	M = MolWt(mol)

	# Iterate over all atoms and get group
	# contributions
	group_contributions = 0.0
	for atom in mol.GetAtoms():
		Z = atom.GetAtomicNum()
		if Z == 1:
			group_contributions += 1
		elif 3 <= Z <= 9:
			group_contributions += 2
		elif 11 <= Z <= 17:
			group_contributions += 4
		elif 19 <= Z <= 35:
			group_contributions += 5
		elif 37 <= Z <= 53:
			group_contributions += 7.5
		elif 55 <= Z <= 83:
			group_contributions += 9
		else:
			raise ValueError('The molecule contains atoms for whom contributions are not defined.')

	# Calculate initial density
	rho = M / (5 * group_contributions)

	# Define functional groups for correction
	alcohol   = Chem.MolFromSmarts('[OX2H]')
	acid      = Chem.MolFromSmarts('[CX3](=O)[OX2H1]')
	amine     = Chem.MolFromSmarts('[NX3;H2,H1;!$(NC=O)]')
	sulfoxide = Chem.MolFromSmarts('[$([#16X3]=[OX1]),$([#16X3+][OX1-])]')       
	sulfone   = Chem.MolFromSmarts('[$([#16X4](=[OX1])=[OX1]),$([#16X4+2]([OX1-])[OX1-])]')
	n_alcohol   = len(mol.GetSubstructMatches(alcohol))
	n_acid      = len(mol.GetSubstructMatches(acid))
	n_amine     = len(mol.GetSubstructMatches(amine))
	n_sulfoxide = len(mol.GetSubstructMatches(sulfoxide))
	n_sulfone   = len(mol.GetSubstructMatches(sulfone))

	# Find rings
	sssr = Chem.GetSymmSSSR(mol)
	n_rings = len(sssr)
	n_condrings = 0
	if n_rings > 1:
		for ring in range(n_rings):
			for other_ring in range(ring, n_rings):
				r1 = sssr[ring]
				r2 = sssr[other_ring]
				t = 0
				for j in r1:
					if j in r2:
						t += 0
				if t >= 2:
					n_condrings += 2 
		n_rings -= n_condrings

	# Define groups for corrections
	first_group  = [n_alcohol, n_acid, n_amine, n_sulfoxide, n_rings]
	second_group = [n_sulfone]
	third_group  = [n_condrings]

	# Add corrections
	correction = 0.0
	for n in first_group:
		if correction + n * 0.1 <= 1.3:
			correction += n * 0.1
		else:
			return 1.3 * rho

	for n in second_group:
		if correction + n * 0.2 <= 1.3:
			correction += n * 0.2
		else:
			return 1.3 * rho

	for n in third_group:
		if correction + n * 0.075 <= 1.3:
			correction += n * 0.075
		else:
			return 1.3 * rho
			
	return (1 + correction) * rho
Esempio n. 21
0
 def _calculate_phys_chem_property(self, mol):
     return MolWt(mol)
Esempio n. 22
0
def cal_fp_props(mol):
    # Wrapper function for multiprocessing
    mol_props = Props(MolToSmiles(mol), MolWt(mol), LogP(mol), QED(mol),
                      SAS(mol))
    fp = enc.encode_mol(mol)
    return fp, mol_props
def mol_weight_get(smiles):
    m = Chem.MolFromSmiles(smiles)
    if (m is None):
        return ("Could not parse input: " + smiles, 500)

    return {"smiles": smiles, "molWeight": MolWt(m)}
Esempio n. 24
0
    def calculate(self):
        w = ExactMolWt(self.mol) if self._exact else MolWt(self.mol)
        if self._averaged:
            w /= self.mol.GetNumAtoms()

        return w
Esempio n. 25
0
def mol_wt(smiles):
    """Get molecular weight (in Daltons)"""
    return MolWt(Chem.MolFromSmiles(smiles))
Esempio n. 26
0
    mws = []
    logps = []
    nhdonors = []
    values = []
    dataset = []

    for data in list(LABEL_GUIDE.keys()) + ["cyp"]:
        with open(os.path.join(DATA_PATH, data, f"data_{data}.pt"),
                  "rb") as handle:
            inchis, v = pickle.load(handle)

        values.extend(v)

        for inchi in tqdm(inchis):
            mol = MolFromInchi(inchi)
            mws.append(MolWt(mol))
            logps.append(MolLogP(mol))
            nhdonors.append(NumHDonors(mol))
            dataset.append(DATASET_GUIDE[data])

    df = pd.DataFrame({
        "Molecular weight (gr./mol)": mws,
        r"aLog$P$": logps,
        "No. hydrogen donors": nhdonors,
        "values": values,
        "dataset": dataset,
    })

    f, axs = plt.subplots(1, 3, figsize=(18, 6))

    axs[0].grid(alpha=0.5)
Esempio n. 27
0
    df = pd.read_hdf(H5_FILE, h5_table)
    idx = np.loadtxt(good_dirs, dtype=np.str)
    iidx = [int(i[2:]) for i in idx]
    smiles = df.loc[iidx]['smiles']
except:
    raise Exception('lock section error')
finally:
    lock.release()

n = 0
for i, smile in zip(idx, smiles):
    n += 1
    # can prepend MolWt with Exact
    m = Chem.MolFromSmiles(smile)
    m = Chem.AddHs(m)
    mw = MolWt(m)

    if n < 11:
        print('molecule: {}  smile: {} mw: {}'.format(i, smile, mw))
    mws.append(mw)

print('processed {} molecules!'.format(len(smiles)))
# plot historgrams
from plot_settings import *


def plot_hist(save_path,
              data,
              xlabel=None,
              ylabel=None,
              label=None,