class Compound(models.Model): name = models.CharField(max_length=256) molecule = models.MolField() torsionbv = models.BfpField(null=True) mfp2 = models.BfpField(null=True) ffp2 = models.BfpField(null=True)
class Compound(rdkit_models.Model): smiles = models.CharField(max_length=2048) molecule = rdkit_models.MolField(null=False) inchi = models.CharField(max_length=2048, blank=True, null=True) inchi_key = models.CharField(max_length=2048, blank=True, null=True) ctab = models.TextField(blank=True, null=True) datetime_loaded = models.DateTimeField( auto_now_add=True, verbose_name="Datetime When Compound Registered") fingerprint = models.OneToOneField(Fingerprint, blank=True, null=True, on_delete=models.SET_NULL, verbose_name="Compound Fingerprint") property = models.OneToOneField(Property, blank=True, null=True, on_delete=models.SET_NULL, verbose_name="Compound Property") class Meta: db_table = 'compound' verbose_name = "Compound" verbose_name_plural = "Compounds" def __unicode__(self): return u"Compound ID {id} loaded on {date}".format( id=self.id, date=self.datetime_loaded.strftime('%-I:%M %p %B %-d %Y'))
class Compound(models.Model): unique_id = models.CharField(max_length=13, unique=True, blank=False) description = models.TextField(blank=True) smiles = models.CharField(max_length=4096, blank=False, unique=True) inchi = models.CharField(max_length=4096, blank=False, unique=True) inchi_key = models.CharField(max_length=27, blank=False, unique=True) mol_weight_exact = models.FloatField(blank=False) heavy_atoms_count = models.IntegerField(blank=False) ring_count = models.IntegerField(blank=False) mol = models.MolField() torsionbv = models.BfpField(null=True) mfp2 = models.BfpField(null=True) ffp2 = models.BfpField(null=True) class MoleculeAlreadyExists(Exception): pass def _generate_id(self): number = datetime.now().timestamp() number=int(number * 10e6) # get seconds random_data = number.to_bytes(8, byteorder='big') return 'MI-M-' + hashlib.md5(random_data).hexdigest()[:8] def __init__(self, *args, **kwargs): if len(args) > 2: super(Compound, self).__init__(*args, **kwargs) return mol_as_RDmol = args[0] if len(args) > 0 else None if not mol_as_RDmol: mol_as_RDmol = kwargs['mol_as_RDmol'] if 'mol_as_RDmol' in kwargs else None if not mol_as_RDmol: raise RuntimeError("No RDMol specified") description = args[1] if len(args) > 1 else None if not description: description = kwargs['description'] if 'description' in kwargs else '' new_kwargs = dict() new_kwargs['unique_id'] = self._generate_id() new_kwargs['smiles'] = Chem.MolToSmiles(mol_as_RDmol, isomericSmiles=True, canonical=True) new_kwargs['inchi'] = Chem.MolToInchi(mol_as_RDmol) new_kwargs['inchi_key'] = Chem.InchiToInchiKey(new_kwargs['inchi']) new_kwargs['mol_weight_exact'] = Descriptors.ExactMolWt(mol_as_RDmol) new_kwargs['heavy_atoms_count'] = Lipinski.HeavyAtomCount(mol_as_RDmol) new_kwargs['ring_count'] = Lipinski.RingCount(mol_as_RDmol) new_kwargs['mol'] = mol_as_RDmol super(Compound, self).__init__(description=description, **new_kwargs) def save(self, *args, **kwargs): if Compound.objects.filter(inchi_key=self.inchi_key).exists(): raise Compound.MoleculeAlreadyExists("Molecule with the same InchiKey was found. Cannot save.") super(Compound, self).save(*args, **kwargs) def __str__(self): return self.unique_id
class ChemicalEntity(models.Model): canonicalSMILES = models.CharField(max_length=65536, unique=True, blank=False) inchi = models.CharField(max_length=65536, unique=True, blank=False) inchiKey = models.CharField(max_length=65536, unique=True, blank=False) # from django-rdkit rdMol = models.MolField() morganFP = models.BfpField(null=True) class Meta: unique_together = ('canonicalSMILES', 'inchiKey') def __str__(self): return '%s object <%s>' % (self.__class__.__name__, self.inchiKey) @property def fingerprint(self): if not self.morganFP: self.morganFP = AllChem.GetMorganFingerprintAsBitVect(self.rdMol, radius=2, nBits=512) return self.morganFP
class Molecule(BaseModel): name = models.CharField(max_length=255, default="") inchi_key = models.CharField(max_length=27, default=None, unique=True, db_index=True) mol_rdkit = rd_models.MolField(default=None) smiles_with_limit = models.CharField(max_length=255, default="", db_index=True) chemdoodle_json = JSONField(default=None, null=True, blank=True) def __str__(self): return self.smiles() def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) if self.chemdoodle_json is None: self.chemdoodle_json = ChemDoodle().mol_to_json(self) self.save() class JSONAPIMeta: resource_name = "molecules" def smiles(self, kekulize=False): if (not kekulize) and (self.smiles_with_limit != ""): return self.smiles_with_limit else: return RDKit.mol_to_smiles(self.mol_rdkit, kekulize) def mass_exact(self): return RDKit.mass_exact(self.mol_rdkit) def mass_exact_isotopes(self): # return a list of masses including Brome isotopes mass = self.mass_exact() br_count = self.smiles().count("Br") return [mass + 1.9979535 * (inc) for inc in range(br_count + 1)] @classmethod def create_from_smiles(cls, sm): try: m_rdkit = RDKit.mol_from_smiles(sm) if m_rdkit: m_inchi_key = RDKit.mol_to_inchi_key(m_rdkit) m = Molecule(mol_rdkit=m_rdkit, inchi_key=m_inchi_key) sm_can = RDKit.mol_to_smiles(m_rdkit) if len(sm_can) < 255: m.smiles_with_limit = sm_can m.save() return m else: raise Exception except: return False @classmethod def find_from_smiles(cls, sm): # Find a molecule from smiles by using inchi_key try: filter_res = Molecule.objects.filter(smiles_with_limit=sm) if filter_res.count() > 0: return filter_res.first() filter_res = Molecule.objects.filter( smiles_with_limit=RDKit.mol_to_smiles(RDKit.mol_from_smiles( sm))) if filter_res.count() > 0: return filter_res.first() filter_res = Molecule.objects.filter( inchi_key=RDKit.mol_to_inchi_key(RDKit.mol_from_smiles(sm))) if filter_res.count() > 0: return filter_res.first() return False except: return False @classmethod def load_from_smiles(cls, sm): # Return a Molecule instance if already exist (use find_from_smiles) # Or create it if correct smiles. Return False if error if sm != "": m = Molecule.find_from_smiles(sm) if not m: m = Molecule.create_from_smiles(sm) if not m: return False else: return m else: return False @classmethod def load_from_rdkit(cls, mol): # Return a Molecule instance corresponding to the rdkit mol in input # create it if not exist from rdkit import Chem try: Chem.SanitizeMol(mol) m_smiles = RDKit.mol_to_smiles(mol) m = Molecule.load_from_smiles(m_smiles) return m except: return False @classmethod def gen_molecules(cls, file_path, queryset): with open(file_path, "w") as fw: fw.writelines(",".join([ "molecule_id", "inchi_key", "smiles", ]) + "\n") for m in queryset.order_by("id"): fw.writelines(",".join([ str(m.id), m.inchi_key, m.smiles(), ]) + "\n")
class MoleculeModel(models.Model): molecule = models.MolField(null=True)
class SmilesModel(models.Model): smiles = models.CharField(max_length=2048, blank=True, null=False) molecule = models.MolField(null=True)
class Molecule(models.Model): """ Represents one molecule. """ # fields, which can be calculated on save(): rdmol = models.MolField() internal_id = models.CharField(max_length=32, db_index=True) image_svg = models.TextField(null=True) mw = models.FloatField(db_index=True) sum_formula = models.CharField(max_length=32, db_index=True) fingerprint = models.CharField(max_length=1024, db_index=True) inchi = models.TextField(db_index=True) inchi_key = models.CharField(max_length=27, db_index=True) name = models.TextField(db_index=True, null=True) smiles = models.TextField(db_index=True) amount = models.FloatField() created = models.DateTimeField(auto_now_add=True) # excluded molecules SMILES (they cause rdKit stuck) EXCLUDED_MOLECULES = ["C", "CH3", "CH4", "[CH3]", "[C]", "[CH4]"] def __str__(self): return "Molecule ({id}): '{name}', formula: '{formula}'".format( id=self.internal_id, name=self.name, formula=self.sum_formula) def save(self, smiles=None, molfile=None, rdmol=None, inchi=None, name=None, update=False, *args, **kwargs): if not update: if molfile: mol = AllChem.MolFromMolBlock(molfile) elif smiles: mol = AllChem.MolFromSmiles(smiles) elif rdmol: mol = rdmol elif inchi: mol = AllChem.MolFromInchi(inchi) if mol: inchi = AllChem.MolToInchi(mol) smiles = AllChem.MolToSmiles(mol) if inchi and Molecule.objects.filter( inchi=inchi).count() == 0 and len(inchi) > 1: self.inchi = inchi self.mw = float("{0:.2f}".format( AllChem.CalcExactMolWt(mol))) self.sum_formula = AllChem.CalcMolFormula(mol) self.fingerprint = AllChem.GetMorganFingerprintAsBitVect( mol, 4, nBits=1024).ToBitString() self.inchi_key = AllChem.InchiToInchiKey(self.inchi) self.molfile = AllChem.MolToMolBlock(mol) self.smiles = smiles self.rdmol = mol # generating SVG image if self.smiles not in self.EXCLUDED_MOLECULES: binMol = AllChem.Mol(self.rdmol.ToBinary()) if not binMol.GetNumConformers(): rdDepictor.Compute2DCoords(self.rdmol) drawer = rdMolDraw2D.MolDraw2DSVG(100, 100) drawer.DrawMolecule(self.rdmol) drawer.FinishDrawing() svg = drawer.GetDrawingText().replace('svg:', '') # remove first line containg XML meta information self.image_svg = "\n".join(svg.split("\n")[1:]).strip() else: self.image_svg = None if name: self.name = name else: try: self.name = mol.GetProp("LONGNAME") except KeyError: self.name = None if Molecule.objects.all().count() == 0: self.internal_id = "MI-J-1" else: self.internal_id = "MI-J-{}".format( Molecule.objects.latest("id").id + 1) super(Molecule, self).save(*args, **kwargs) else: raise self.MoleculeExistsInDatabase(smiles) else: raise self.MoleculeCreationError else: super(Molecule, self).save(*args, **kwargs) class Meta: ordering = ['id'] class MoleculeExistsInDatabase(Exception): def __init__(self, smiles): super(Exception, self).__init__(smiles) self.smiles = smiles self.message = "Cannot add the molecule: it already exists in database." class MoleculeCreationError(Exception): def __init__(self): super(Exception, self).__init__() self.message = "Cannot add the molecule: check your structure (valence etc.)."
class MoleculeModel(models.Model): molecule = models.MolField()