class ChEMBLTargetData(models.Model): unique_id = models.CharField(max_length=13, unique=True, blank=False) uniprot_accession = models.CharField(max_length=6, blank=False) chembl_id = models.CharField(max_length=16, blank=False) organism = models.CharField(max_length=128, blank=False) preffered_name = models.CharField(max_length=128, blank=False) description = models.TextField(blank=True) def _generate_id(self): number = datetime.now().timestamp() number=int(number * 10e6) # get seconds random_data = number.to_bytes(8, byteorder='big') return 'MI-T-' + hashlib.md5(random_data).hexdigest()[:8] def __init__(self, *args, **kwargs): kwargs['unique_id'] = self._generate_id() super(ChEMBLTargetData, self).__init__(*args, **kwargs) def __str__(self): return self.unique_id class Meta: verbose_name = 'ChEMBL Target Data' verbose_name_plural = verbose_name
class Compound(models.Model): unique_id = models.CharField(max_length=13, unique=True, blank=False) description = models.TextField(blank=True) smiles = models.CharField(max_length=4096, blank=False, unique=True) inchi = models.CharField(max_length=4096, blank=False, unique=True) inchi_key = models.CharField(max_length=27, blank=False, unique=True) mol_weight_exact = models.FloatField(blank=False) heavy_atoms_count = models.IntegerField(blank=False) ring_count = models.IntegerField(blank=False) mol = models.MolField() torsionbv = models.BfpField(null=True) mfp2 = models.BfpField(null=True) ffp2 = models.BfpField(null=True) class MoleculeAlreadyExists(Exception): pass def _generate_id(self): number = datetime.now().timestamp() number=int(number * 10e6) # get seconds random_data = number.to_bytes(8, byteorder='big') return 'MI-M-' + hashlib.md5(random_data).hexdigest()[:8] def __init__(self, *args, **kwargs): if len(args) > 2: super(Compound, self).__init__(*args, **kwargs) return mol_as_RDmol = args[0] if len(args) > 0 else None if not mol_as_RDmol: mol_as_RDmol = kwargs['mol_as_RDmol'] if 'mol_as_RDmol' in kwargs else None if not mol_as_RDmol: raise RuntimeError("No RDMol specified") description = args[1] if len(args) > 1 else None if not description: description = kwargs['description'] if 'description' in kwargs else '' new_kwargs = dict() new_kwargs['unique_id'] = self._generate_id() new_kwargs['smiles'] = Chem.MolToSmiles(mol_as_RDmol, isomericSmiles=True, canonical=True) new_kwargs['inchi'] = Chem.MolToInchi(mol_as_RDmol) new_kwargs['inchi_key'] = Chem.InchiToInchiKey(new_kwargs['inchi']) new_kwargs['mol_weight_exact'] = Descriptors.ExactMolWt(mol_as_RDmol) new_kwargs['heavy_atoms_count'] = Lipinski.HeavyAtomCount(mol_as_RDmol) new_kwargs['ring_count'] = Lipinski.RingCount(mol_as_RDmol) new_kwargs['mol'] = mol_as_RDmol super(Compound, self).__init__(description=description, **new_kwargs) def save(self, *args, **kwargs): if Compound.objects.filter(inchi_key=self.inchi_key).exists(): raise Compound.MoleculeAlreadyExists("Molecule with the same InchiKey was found. Cannot save.") super(Compound, self).save(*args, **kwargs) def __str__(self): return self.unique_id
class CtabModel(models.Model): ctab = models.TextField(blank=True, null=False)
class Molecule(models.Model): """ Represents one molecule. """ # fields, which can be calculated on save(): rdmol = models.MolField() internal_id = models.CharField(max_length=32, db_index=True) image_svg = models.TextField(null=True) mw = models.FloatField(db_index=True) sum_formula = models.CharField(max_length=32, db_index=True) fingerprint = models.CharField(max_length=1024, db_index=True) inchi = models.TextField(db_index=True) inchi_key = models.CharField(max_length=27, db_index=True) name = models.TextField(db_index=True, null=True) smiles = models.TextField(db_index=True) amount = models.FloatField() created = models.DateTimeField(auto_now_add=True) # excluded molecules SMILES (they cause rdKit stuck) EXCLUDED_MOLECULES = ["C", "CH3", "CH4", "[CH3]", "[C]", "[CH4]"] def __str__(self): return "Molecule ({id}): '{name}', formula: '{formula}'".format( id=self.internal_id, name=self.name, formula=self.sum_formula) def save(self, smiles=None, molfile=None, rdmol=None, inchi=None, name=None, update=False, *args, **kwargs): if not update: if molfile: mol = AllChem.MolFromMolBlock(molfile) elif smiles: mol = AllChem.MolFromSmiles(smiles) elif rdmol: mol = rdmol elif inchi: mol = AllChem.MolFromInchi(inchi) if mol: inchi = AllChem.MolToInchi(mol) smiles = AllChem.MolToSmiles(mol) if inchi and Molecule.objects.filter( inchi=inchi).count() == 0 and len(inchi) > 1: self.inchi = inchi self.mw = float("{0:.2f}".format( AllChem.CalcExactMolWt(mol))) self.sum_formula = AllChem.CalcMolFormula(mol) self.fingerprint = AllChem.GetMorganFingerprintAsBitVect( mol, 4, nBits=1024).ToBitString() self.inchi_key = AllChem.InchiToInchiKey(self.inchi) self.molfile = AllChem.MolToMolBlock(mol) self.smiles = smiles self.rdmol = mol # generating SVG image if self.smiles not in self.EXCLUDED_MOLECULES: binMol = AllChem.Mol(self.rdmol.ToBinary()) if not binMol.GetNumConformers(): rdDepictor.Compute2DCoords(self.rdmol) drawer = rdMolDraw2D.MolDraw2DSVG(100, 100) drawer.DrawMolecule(self.rdmol) drawer.FinishDrawing() svg = drawer.GetDrawingText().replace('svg:', '') # remove first line containg XML meta information self.image_svg = "\n".join(svg.split("\n")[1:]).strip() else: self.image_svg = None if name: self.name = name else: try: self.name = mol.GetProp("LONGNAME") except KeyError: self.name = None if Molecule.objects.all().count() == 0: self.internal_id = "MI-J-1" else: self.internal_id = "MI-J-{}".format( Molecule.objects.latest("id").id + 1) super(Molecule, self).save(*args, **kwargs) else: raise self.MoleculeExistsInDatabase(smiles) else: raise self.MoleculeCreationError else: super(Molecule, self).save(*args, **kwargs) class Meta: ordering = ['id'] class MoleculeExistsInDatabase(Exception): def __init__(self, smiles): super(Exception, self).__init__(smiles) self.smiles = smiles self.message = "Cannot add the molecule: it already exists in database." class MoleculeCreationError(Exception): def __init__(self): super(Exception, self).__init__() self.message = "Cannot add the molecule: check your structure (valence etc.)."
class MolSetExport(models.Model): name = models.CharField(max_length=128, blank=False, null=False) description = models.TextField(max_length=10000, blank=True) molset = models.ForeignKey(MolSet, on_delete=models.CASCADE, null=False, related_name='exports') exporter = models.ForeignKey(MolSetExporter, on_delete=models.CASCADE, null=False)