예제 #1
0
 def testSameCanSpiro(self):
     """Test several representations of the same spiro molecule."""
     can = pybel.readstring("smi", "C1CN[C@]12CCCN2").write("can").split()[0]
     for smile in ['C1CN[C@]12CCCN2', 'C1CN[C@@]21CCCN2',
                    'C1CN[C@@]2(C1)CCN2']:
         mycan = pybel.readstring("smi", smile).write("can").split()[0]
         self.assertEqual(can, mycan, smile)
예제 #2
0
 def testSquarePlanar(self):
     """Tighten up the parsing of SP stereochemistry in SMILES"""
     good = [
             "C[S@SP1](Cl)(Br)I",
             "C[S@SP2](Cl)(Br)I",
             "C[S@SP3](Cl)(Br)I",
             ]
     bad = [ # raises error
             "C[S@SP0](Cl)(Br)I",
             "C[S@SP4](Cl)(Br)I",
             "C[S@@SP1](Cl)(Br)I",
             "C[S@SP11](Cl)(Br)I",
             "C[S@SO1](Cl)(Br)I",
           ]
     alsobad = [ # just a warning
             "C[S@SP1](Cl)(Br)(F)I",
             "C[S@SP1](Cl)(Br)(F)1CCCC1",
             ]
     for smi in good:
         mol = pybel.readstring("smi", smi)
         self.assertTrue(mol.OBMol.GetData(ob.StereoData))
     for smi in bad:
         self.assertRaises(IOError, pybel.readstring, "smi", smi)
     for smi in alsobad:
         mol = pybel.readstring("smi", smi)
         self.assertTrue(mol.OBMol.GetData(ob.StereoData))
예제 #3
0
def canonicalize(lig, preserve_bond_order=False):
    """Get the canonical atom order for the ligand."""
    atomorder = None
    # Get canonical atom order

    lig = pybel.ob.OBMol(lig.OBMol)
    if not preserve_bond_order:
        for bond in pybel.ob.OBMolBondIter(lig):
            if bond.GetBondOrder() != 1:
                bond.SetBondOrder(1)
    lig.DeleteData(pybel.ob.StereoData)
    lig = pybel.Molecule(lig)
    testcan = lig.write(format='can')
    try:
        pybel.readstring('can', testcan)
        reference = pybel.readstring('can', testcan)
    except IOError:
        testcan, reference = '', ''
    if testcan != '':
        reference.removeh()
        isomorphs = get_isomorphisms(reference, lig)  # isomorphs now holds all isomorphisms within the molecule
        if not len(isomorphs) == 0:
            smi_dict = {}
            smi_to_can = isomorphs[0]
            for x in smi_to_can:
                smi_dict[int(x[1]) + 1] = int(x[0]) + 1
            atomorder = [smi_dict[x + 1] for x in range(len(lig.atoms))]
        else:
            atomorder = None
    return atomorder
예제 #4
0
 def testCan(self):
     can = self.mol.write("can").split()[0]
     smi = self.mol.write("smi").split()[0]
     can_fromsmi = pybel.readstring("smi", smi).write("can").split()[0]
     self.assertEqual(can, can_fromsmi)
     can_fromcan = pybel.readstring("smi", can).write("can").split()[0]
     self.assertEqual(can, can_fromcan)
예제 #5
0
    def testReadingMassDifferenceInMolfiles(self):
        """Previously we were rounding incorrectly when reading the mass diff"""
        template = """
 OpenBabel02181811152D

  1  0  0  0  0  0  0  0  0  0999 V2000
    0.0000    0.0000    0.0000 %2s %2d  0  0  0  0  0  0  0  0  0  0  0
M  END
"""
        # Positive test cases:
        # These are the BIOVIA Draw answers for the first 50 elements for
        # a mass diff of 1
        answers = [2,5,8,10,12,13,15,17,20,21,24,25,28,29,32,33,36,41,40,41,46,49,52,53,56,57,60,60,65,66,71,74,76,80,81,85,86,89,90,92,94,97,99,102,104,107,109,113,116,120,123]
        for idx, answer in enumerate(answers):
            elem = idx + 1
            molfile = template % (ob.GetSymbol(elem), 1)
            mol = pybel.readstring("mol", molfile).OBMol
            iso = mol.GetAtom(1).GetIsotope()
            self.assertEqual(answer, iso)

        # Also test D and T - BIOVIA Draw ignores the mass diff
        for elem, answer in zip("DT", [2, 3]):
            molfile = template % (elem, 1)
            mol = pybel.readstring("mol", molfile).OBMol
            iso = mol.GetAtom(1).GetIsotope()
            self.assertEqual(answer, iso)

        # Negative test cases:
        # Test error message for out-of-range values
        for value in [5, -4]:
            molfile = template % ("C", value)
            mol = pybel.readstring("mol", molfile).OBMol
            iso = mol.GetAtom(1).GetIsotope()
            self.assertEqual(0, iso)
예제 #6
0
파일: views.py 프로젝트: katrakolsek/DoTS
def prediction(request):
    """
    Form for submitting user calculations
    """
    allreceptors = Receptor.objects.all()
    
    
    if 'submitdocking' in request.POST:
        form = SubmitDocking(request.POST)
        form.is_valid()
        smiles = str(form.cleaned_data['smiles'])
        name = form.cleaned_data['name']
        
        error = []
        try:
            pybel.readstring("smi", str(smiles))
        except:
            error.append("Error in SMILES or compound molecular weight too big")
        if not error:
            uniquestring = ''.join(random.choice(string.ascii_lowercase) for x in range(10))
            dockid = adddocking(uniquestring,smiles,name)
            return HttpResponseRedirect('/docking/%s/' % uniquestring)
        else:
            form = SubmitDocking()
            return render(request, 'prediction.html', {'form':form, 'error':error, 'allreceptors':allreceptors})
    else:
        form = SubmitDocking()
        return render(request, 'prediction.html', {'form':form, 'allreceptors':allreceptors})
예제 #7
0
    def _generate_conformers(self, input_sdf, n_conf=10, method="rmsd"):
        """Conformer generation.

        Given an input sdf string, call obabel to construct a specified
        number of conformers.
        """
        import subprocess
        import pybel as pb
        import re

        if n_conf == 0:
            return [pb.readstring("sdf", input_sdf)]

        command_string = 'echo "%s" | obabel -i sdf -o sdf --conformer --nconf %d\
        --score rmsd --writeconformers 2>&-' % (input_sdf, n_conf)
        sdf = subprocess.check_output(command_string, shell=True)
        # Clean the resulting output
        first_match = re.search('OpenBabel', sdf)
        clean_sdf = sdf[first_match.start():]
        # Accumulate molecules in a list
        mols = []
        # Each molecule in the sdf output begins with the 'OpenBabel' string
        matches = list(re.finditer('OpenBabel', clean_sdf))
        for i in range(len(matches) - 1):
            # The newline at the beginning is needed for obabel to
            # recognize the sdf format
            mols.append(
                pb.readstring("sdf", '\n' +
                              clean_sdf[matches[i].start():
                                        matches[i + 1].start()]))
        mols.append(pb.readstring("sdf", '\n' +
                                  clean_sdf[matches[-1].start():]))
        return mols
예제 #8
0
 def testReadSmi(self):
     can = self.mol.write("can")
     smi = self.mol.write("smi")
     fromsmi = pybel.readstring("smi", smi)
     fromcan = pybel.readstring("smi", can)
     self.assertEqual(can, fromsmi.write("can"))
     self.assertEqual(can, fromcan.write("can"))
예제 #9
0
파일: logP.py 프로젝트: IU9-BMSTU/Synthesio
def print_results(results):
    t = PrettyTable(['smiles', 'predicted quality', 'logP', 'molwt'])
    [t.add_row([''.join(mol), predict,
                pybel.readstring('smi', ''.join(mol)).calcdesc(['logP'])['logP'],
                pybel.readstring("smi", ''.join(mol)).molwt])
            for mol, predict in results[:5]]
    print t
예제 #10
0
    def testOBMolSeparatePreservesAtomOrder(self):
        """Originally Separate() preserved DFS order rather
        than atom order"""
        # First test
        smi = "C123.F3.Cl2.Br1"
        mol = pybel.readstring("smi", smi)
        atomicnums = [atom.OBAtom.GetAtomicNum() for atom in mol]
        mols = mol.OBMol.Separate()
        new_atomicnums = [atom.OBAtom.GetAtomicNum() for atom in pybel.Molecule(mols[0])]
        for x, y in zip(atomicnums, new_atomicnums):
            self.assertEqual(x, y) # check that the atoms have not been permuted
        # Second test
        xyz = """6
examples/water_dimer.xyz
O          0.12908       -0.26336        0.64798
H          0.89795        0.28805        0.85518
H          0.10833       -0.20468       -0.33302
O          0.31020        0.07569       -2.07524
H          0.64083       -0.57862       -2.71449
H         -0.26065        0.64232       -2.62218
"""
        mol = pybel.readstring("xyz", xyz)
        mols = mol.OBMol.Separate()
        allatoms = pybel.Molecule(mols[0]).atoms + pybel.Molecule(mols[1]).atoms
        for idx, atom in enumerate(allatoms):
            xcoord = atom.OBAtom.GetX()
            orig_xcoord = mol.OBMol.GetAtom(idx+1).GetX()
            self.assertEqual(xcoord, orig_xcoord)
예제 #11
0
 def testMOL(self):
     """Roundtrip thru MOL file"""
     smi = "C[CH3:6]"
     mol = pybel.readstring("smi", smi)
     molfile = mol.write("mol", opt={"a":True})
     molb = pybel.readstring("mol", molfile)
     out = mol.write("smi", opt={"a":True, "n":True, "nonewline":True})
     self.assertEqual(smi, out)
예제 #12
0
 def testAtom4Refs(self):
     for mol in self.mols:
         can = mol.write("can")
         smi = mol.write("smi")
         can_fromsmi = pybel.readstring("smi", smi).write("can")
         self.assertEqual(can, can_fromsmi)
         can_fromcan = pybel.readstring("smi", can).write("can")
         self.assertEqual(can, can_fromcan)
예제 #13
0
 def testSmilesParsingAndWritingOfLargeIsotopes(self):
     smis = ["[1C]", "[11C]", "[111C]", "[1111C]"]
     for smi in smis:
         mol = pybel.readstring("smi", smi)
         self.assertEqual(mol.write("smi").rstrip(), smi)
     self.assertRaises(IOError, pybel.readstring, "smi", "[11111C]")
     mol = pybel.readstring("smi", "[C]")
     mol.atoms[0].OBAtom.SetIsotope(65535)
     self.assertEqual(mol.write("smi").rstrip(), "[C]")
예제 #14
0
 def testSettingSpinMult(self):
     """Set spin and read/write it"""
     mol = pybel.readstring("smi", "C")
     mol.atoms[0].OBAtom.SetSpinMultiplicity(2)
     molfile = mol.write("mol")
     self.assertEqual("M  RAD  1   1   2", molfile.split("\n")[5])
     molb = pybel.readstring("mol", molfile)
     self.assertEqual(2, molb.atoms[0].OBAtom.GetSpinMultiplicity())
     self.assertEqual(4, molb.atoms[0].OBAtom.GetImplicitHCount())
예제 #15
0
 def testRGroup(self):
     """[*:1] is converted to R1 in MOL file handling"""
     smi = "[*:6]C"
     mol = pybel.readstring("smi", smi)
     molfile = mol.write("mol")
     self.assertTrue("M  RGP  1   1   6" in molfile)
     molb = pybel.readstring("mol", molfile)
     out = mol.write("smi", opt={"a":True, "n":True, "nonewline":True})
     self.assertEqual(smi, out)
예제 #16
0
 def testInChIIsotopes(self):
     """Ensure that we correctly set and read isotopes in InChIs"""
     with open(os.path.join(here, "inchi", "inchi_isotopes.txt")) as inp:
         for line in inp:
             if line.startswith("#"): continue
             smi, inchi = line.rstrip().split("\t")
             minchi = pybel.readstring("smi", smi).write("inchi").rstrip()
             self.assertEqual(minchi, inchi)
             msmi = pybel.readstring("inchi", minchi).write("smi").rstrip()
             self.assertEqual(msmi, smi)
예제 #17
0
 def testAtomMapsAfterDeletion(self):
     """Removing atoms/hydrogens should not mess up the atom maps"""
     smis = ["C[NH2:2]", "[CH3:1][NH2:2]"]
     for smi in smis:
         mol = pybel.readstring("smi", smi)
         mol.OBMol.DeleteAtom(mol.OBMol.GetAtom(1))
         self.assertEqual(mol.write("smi", opt={"a":True}).rstrip(), "[NH2:2]")
     smi = "[H]C[NH:2]"
     mol = pybel.readstring("smi", smi)
     mol.removeh()
     self.assertEqual(mol.write("smi", opt={"a":True}).rstrip(), "C[NH:2]")
예제 #18
0
def main():
	if len(sys.argv) < 2:
		print "No input file provided: Murcko.py filetosprocess.ext"
		print "The script will determine which file type to read from by the extension."
		print "It is recommended you run your structures through,\nfor example, ChemAxon's Standardizer first."
		sys.exit(1)
	molnum = 0
	Fragments = dict()
	for mol in pybel.readfile(sys.argv[1].split('.')[1], sys.argv[1]):
		molnum += 1
		if not (molnum % 10):
			print "Molecules processed:", molnum
		#if molnum == 210:
		#	break
		#print mol
		mol.OBMol.DeleteHydrogens()
		smiles = mol.write("smi").split("\t")[0]
		#print smiles
		#out.write(mol)
		#print "Number of rings:", len(mol.sssr)
		canmol = pybel.readstring("smi", smiles)
		FusedRingsMatrix = GetFusedRingsMatrix(canmol)
		FusedRings = GetFusedRings(FusedRingsMatrix, len(canmol.sssr))
		#print FusedRings
		RingSystems = GetAtomsInRingSystems(canmol, FusedRings, inclexo=True)
		# Delete all non-ring atoms: this is now done in GetCanonicalFragments()
		#for ringnum in range(len(mol.sssr)):
		#	mol = pybel.readstring("smi", smiles)
		#	ratoms = list(mol.sssr[ringnum]._path)
		#	#print "Atoms in ring:", sorted(ratoms, reverse=True)
		#	#Delete complementary atoms
		#	remove = list(set(range(1,len(mol.atoms)+1)).difference(set(ratoms)))
		#	for a in sorted(remove, reverse=True):
		#		mol.OBMol.DeleteAtom(mol.atoms[a-1].OBAtom)
		#	#print mol
		#	#out.write(mol)
		# Get all rings/ring systems
		frags = GetCanonicalFragments(smiles, RingSystems)
		for frag in frags:
			if frag in Fragments:
				Fragments[frag] += 1
			else:
				Fragments[frag] = 1

	# Write results to file
	print "Writing results to file."
	out = pybel.Outputfile("sdf", "fragments.sdf", overwrite=True)
	d = Fragments
	for k, v in sorted(d.items(), key=itemgetter(1), reverse=True):
		mol = pybel.readstring("smi", k)
		mol.data["COUNT"] = v
		mol.OBMol.DeleteHydrogens()
		out.write(mol)
	out.close()
예제 #19
0
 def testCML(self):
     """OB stores atom classes using _NN at the end of atom ids"""
     smis = ["[CH3:6]C", "[CH3:6][OH:6]",
             "O"+"[CH2:2]"*27+"O"
             ]
     for smi in smis:
         mol = pybel.readstring("smi", smi)
         cml = mol.write("cml")
         molb = pybel.readstring("mol", cml)
         out = mol.write("smi", opt={"a":True, "n":True, "nonewline":True})
         self.assertEqual(smi, out)
예제 #20
0
 def testSmilesAtomOrder(self):
     """Ensure that SMILES atom order is written correctly"""
     data = [("CC", "1 2"),
             ("O=CCl", "3 2 1")]
     for smi, atomorder in data:
         mol = pybel.readstring("smi", smi)
         mol.write("can", opt={"O": True})
         res = mol.data["SMILES Atom Order"]
         self.assertEqual(res, atomorder)
     mol = pybel.readstring("smi", "CC")
     mol.write("can")
     self.assertFalse("SMILES Atom Order" in mol.data)
예제 #21
0
    def testFuzzingTestCases(self):
        """Ensure that fuzzing testcases do not cause crashes"""

        # rejected as invalid smiles
        smis = [r"\0", "&0", "=&",
                "[H][S][S][S@S00]0[S][S@S00H](0[S@S00][S])0n"]
        for smi in smis:
            self.assertRaises(IOError, pybel.readstring, "smi", smi)

        smis = ["c0C[C@H](B)00O0"] # warning and stereo ignored
        for smi in smis:
            pybel.readstring("smi", smi)
예제 #22
0
 def testSmilesToMol(self):
     smis = ["C", "[CH3]", "[CH2]", "[CH2]C", "[C]"]
     valences = [0, 3, 2, 3, 15]
     for smi, valence in zip(smis, valences):
         mol = pybel.readstring("smi", smi)
         molfile = mol.write("mol")
         firstcarbon = molfile.split("\n")[4]
         mvalence = int(firstcarbon[48:53])
         self.assertEqual(valence, mvalence)
         # test molfile->smiles
         msmi = pybel.readstring("mol", molfile).write("smi").rstrip()
         self.assertEqual(smi, msmi)
예제 #23
0
 def testImplicitCisDblBond(self):
     """Ensure that dbl bonds in rings of size 8 or less are always
     implicitly cis"""
     smi = "C1/C=C/C"
     for i in range(5): # from size 4 to 8
         ringsize = i + 4
         ringsmi = smi + "1"
         roundtrip = pybel.readstring("smi", ringsmi).write("smi")
         self.assertTrue("/" not in roundtrip)
         smi += "C"
     ringsize = 9
     ringsmi = smi + "1"
     roundtrip = pybel.readstring("smi", ringsmi).write("smi")
     self.assertTrue("/" in roundtrip)
예제 #24
0
 def testSmiToSmi(self):
     # Should preserve stereo
     tet = "[C@@H](Br)(Br)Br"
     out = pybel.readstring("smi", tet).write("smi")
     self.assertTrue("@" in out)
     cistrans = r"C/C=C(\C)/C"
     out = pybel.readstring("smi", cistrans).write("smi")
     self.assertTrue("/" in out)
     # Should wipe stereo
     out = pybel.readstring("smi", tet, opt={"S": True}).write("smi")
     self.assertFalse("@" in out)
     cistrans = r"C/C=C(\C)/C"
     out = pybel.readstring("smi", cistrans, opt={"S": True}).write("smi")
     self.assertFalse("/" in out)
예제 #25
0
 def __init__(self, smilesFrag):
     self.smiles = smilesFrag.replace(Break, Asterisk)
     self._molSmiles = self._removedAtom(self.smiles, Asterisk)
     self.mol = readstring('smi', self._molSmiles)
     self.atoms = len(self.mol.atoms) - self._molSmiles.count(WildCard) - self._molSmiles.count('H')
     self.smartsString = self._removedAtom(self._molSmiles, WildCard)
     self._smarts = Smarts(self.smartsString)
     if not self.match(self.mol) or len(Fragment._nh.findall(self.mol)) != self.smartsString.count(Fragment._nhString):
         self.smiles = smilesFrag.replace(Break, WildCard)
         self._molSmiles = self._removedAtom(self.smiles, Asterisk)
         self.mol = readstring('smi', self._molSmiles)
     self.cansmiles = Fragment._converter.getSmiles(self.mol)
     self._fingerprint = None
     self.target = None
     self._childs = set()
예제 #26
0
def calculate(ID, smiles):
	print "Calculating Features ..."
	mols = [pybel.readstring("smi", smile) for smile in smiles]
	fp2 = [mol.calcfp(fptype='fp2') for mol in mols] #1024
	fp3 = [mol.calcfp(fptype='fp3') for mol in mols] #210
	fp4 = [mol.calcfp(fptype='fp4') for mol in mols] #301
	maccs = [mol.calcfp(fptype='maccs') for mol in mols] #166
	print "Storing Features"
	features = []
	for mol in range(len(mols)):
		feature = np.zeros(1024+210+301+166)
		for i in fp2[mol].bits:
			feature[i] = 1
		for i in fp3[mol].bits:
			feature[i+1024] = 1
		for i in fp4[mol].bits:
			feature[i+1024+210] = 1
		for i in maccs[mol].bits:
			feature[i+1024+301] = 1
		features.append(feature)
	pack = []
	for i in range(len(smiles)):
		pack.append((ID[i],smiles[i],features[i],0))
	# print pack[-1]
	print "Saving into file..."
	f = open('openbabel_rdkit_test.csv','a')
	for r in pack:
		# print ','.join([str(i) for i in r[3]])
		#print "%s,%s,%s\n" % (r[0], ','.join([str(i) for i in r[1]]),r[2])
		tmp = "%s,%s,%s,%s\n" % (r[0], r[1], ','.join([str(i) for i in r[2]]),r[3])
		# print tmp
		f.write(tmp)
	f.close()
예제 #27
0
	def fromCML(self, cmlstr):
		"""
		Convert a string of CML `cmlstr` to a Structure object.
		"""
		cmlstr = cmlstr.replace('\t', '')
		mol = pybel.readstring('cml', cmlstr)
		self.fromOBMol(mol.OBMol)
    def pocketSection(self):
        cleaned = self.__cleanedPdb()
        prt = pybel.readstring("pdb", cleaned)
        if type(self.lig_path) is str and os.path.exists(self.lig_path):
            suffix = self.lig_path.split('.')[-1]
            lig = pybel.readfile(suffix, self.lig_path).next()
        elif type(self.lig_path) is pybel.Molecule:
            lig = self.lig_path
        else:
            raise Exception("Wrong input for ligand")

        pkt_lines = []
        residues = set()
        for line, atom in zip(cleaned.split("\n")[:-1], prt.atoms):
            coords = atom.coords
            dists = [euclidean(coords, a.coords) for a in lig.atoms]
            if any([d < self.threshold for d in dists]):
                pkt_lines.append(line)
                res_num = int(line[22:26])
                residues.add(res_num)

        if self.title == "":
            start_pkt_line = "\nPKT %d 1000 %s\n" % (len(residues),
                                                    lig.title.split('/')[-1])
        else:
            start_pkt_line = "\nPKT %d 1000 %s\n" % (len(residues),
                                                     self.title)

        return start_pkt_line + "\n".join(pkt_lines) + "\nTER\n"
예제 #29
0
def convert(data, in_format, out_format, pretty=True, add_h=False):
    """Converts between two inputted chemical formats."""
    # Decide on a json formatter depending on desired prettiness
    dumps = json.dumps if pretty else json.compress

    # Not doing this can cause segfaults in the underlying openbabel C++
    if not IS_PY3:
        in_format.encode("ascii")
        out_format.encode("ascii")
        data.encode("ascii", "replace")

    # If it's a json string, load it. NOTE: This is a custom chemical format
    if in_format == "json" and isinstance(data, str if IS_PY3 else basestring):
        data = json.loads(data)

    # These use the open babel library to interconvert, with additions for json
    mol = (json_to_pybel(data) if in_format == "json" else
           pybel.readstring(in_format, data))

    # Infer structure in cases where the input format has no specification
    # or the specified structure is small
    if not mol.OBMol.HasNonZeroCoords() or len(mol.atoms) < 50:
        mol.make3D(steps=500)
    mol.OBMol.Center()

    if add_h:
        mol.addh()

    return (dumps(pybel_to_json(mol)) if out_format == "json"
            else mol.write(out_format))
예제 #30
0
def write_input_file(par,name,file_name):
    f = open(file_name,'w+')
    f.write('title = \'%s\'\n\n'%name)
    f.write('method = \'%s\'\n'%par.method)
    f.write('basis = \'%s\'\n'%par.basis)
    f.write('qc = \'%s\'\n'%par.qc)
    f.write('conformer_search = %i\n'%par.conformer_search)
    f.write('reaction_search = %i\n'%par.reaction_search)
    f.write('barrier_threshold = %.1f\n'%par.barrier_threshold)
    f.write('families = [%s]\n'%','.join(["'%s'"%fi for fi in par.jobs[name][1]]))
    f.write('ga = %i\n'%par.ga)
    f.write('ngen = %i\n'%par.ngen)
    f.write('ppn = %i\n\n'%par.ppn)
    
    smi = par.jobs[name][0]
    obmol = pybel.readstring('smi',smi)
    obmol.OBMol.AddHydrogens()
    
    charge = 0
    f.write('charge = %i\n'%charge)
    mult = obmol.spin
    f.write('mult = %i\n'%mult)
    natom = len(obmol.atoms)
    f.write('natom = %i\n'%natom)
    f.write('smiles = \'%s\'\n'%smi)
    
    if name in par.structures: 
        f.write('structure = %s\n\n'%par.structures[name])
       (i, j, k, l))
 dbfile = []
 fin = open(
     inpath + '/Neutral' + '/%s/%s/%s/%s.st' % (i, j, k, l),
     'r')
 for s in fin:
     ss = s.replace('\n', '').split('\t')
     dbfile.append(ss)
 fin.close()
 totalcount += len(dbfile)
 print('Reading complete')
 for s in dbfile:
     smiles = s[3]
     idx = s[4]
     fullidx = 'Neutral_%s_%s_%s_%s_%s' % (i, j, k, l, idx)
     mol = pybel.readstring('smi', smiles)
     mol.addh()
     model = pt.PybelModel_To_Fragmenter(mol)
     subcount += 1
     if subcount > 10000:
         subcount = 0
         fout.close()
         subindex += 1
         fout = open(
             outpath + '/fraginput_%s.txt' % (subindex),
             'w')
         #fout=open(outpath+'/%s/fraginput_%s.txt'%(batch,subindex),'w');
     fout.write('%s\n' % fullidx)
     print(fullidx)
     fout.write(
         '%s,%s,%s,%s,%s' %
예제 #32
0
def select_molecules(sdf_dir, out_dir, begin=0, end=1000_000):
    '''
    Filter molecules in sdf.gz files by charge, n_heavy, element, components
    Then write the selected molecules in sdf files

    :param sdf_dir:
    :param out_dir:
    :param begin:
    :param end:
    :return:
    '''
    sdf_list = list(
        filter(lambda x: x.endswith('.sdf.gz'), os.listdir(sdf_dir)))
    print(len(sdf_list))

    for i, sdf in enumerate(sdf_list[begin:end]):
        sys.stdout.write('\r\t%i / %i' % (i + begin, len(sdf_list)))
        sys.stdout.flush()
        sdf_out = pybel.Outputfile(
            'sdf', os.path.join(out_dir, 'CHONFClBr-%04i.sdf' % (i + 1)))

        for m in pybel.readfile('sdf',
                                os.path.join(sdf_dir, sdf),
                                opt={'P': None}):
            try:
                cid = int(m.data['PUBCHEM_COMPOUND_CID'])
                formula = m.data['PUBCHEM_MOLECULAR_FORMULA']
                name = m.data['PUBCHEM_IUPAC_NAME']
                smiles = m.data['PUBCHEM_OPENEYE_ISO_SMILES']
                inchi = m.data['PUBCHEM_IUPAC_INCHI']
                cactvs = m.data['PUBCHEM_CACTVS_SUBSKEYS']  # base64 encoded
                weight = float(m.data['PUBCHEM_MOLECULAR_WEIGHT'])
                charge = int(m.data['PUBCHEM_TOTAL_CHARGE'])
                n_heavy = int(m.data['PUBCHEM_HEAVY_ATOM_COUNT'])
            except:
                continue

            # Ignore ion
            if charge != 0:
                continue

            f = Formula(formula)
            # Ignore large molecule
            if f.n_heavy > 19:
                continue
            # Limit element
            atom_set = set(f.atomdict.keys())
            if 'C' not in atom_set or atom_set & {'H', 'F', 'Cl', 'Br'} == set() or \
                    not atom_set <= {'C', 'H', 'O', 'N', 'F', 'Cl', 'Br'}:
                continue
            # Kick out mixture
            if smiles.find('.') > -1:
                continue

            mol = pybel.readstring('smi', smiles)
            if mol.formula != formula or mol.charge != charge:
                print('SMILES formula Error:', cid)
                continue

            sdf_out.write(m)
            continue

        sdf_out.close()
예제 #33
0
 def testReadingBenzyne(self):
     """Check that benzyne is read correctly"""
     smi = "c1cccc#c1"
     mol = pybel.readstring("smi", smi)
     self.assertEqual("C1=CC=CC#C1", mol.write("smi").rstrip())
예제 #34
0
def convert_inchi_to_formula(inchi_string):
  """
  Converts InChI to formula. Depends on/uses openbabel.
  """
  # We always cast strings because some applications return unicode (such as Django model fields)
  return pybel.readstring('inchi', str(inchi_string)).formula
예제 #35
0
# reading input file with the desription of the desired new fragment
fname = args.infile
print "Reading description of the new fragment from the file ", fname
fp = open(fname, "r")
fragname = fp.readline().strip()
corestring = fp.readline().strip()
fragstring = []
s = fp.readline()
while s:
    if (len(s.strip()) > 0):
        fragstring.append(s.strip())
    s = fp.readline()
fp.close()

core = pybel.readstring("smi", corestring)
coreSMARTS = pybel.Smarts(corestring)

# checking for self-consistency
for f in fragstring:
    fmol = pybel.readstring("smi", f)
    res = coreSMARTS.findall(fmol)
    if (len(res) == 0):
        sys.exit("ERROR: cannot find core " + corestring +
                 " in the structure " + f)
    for i in xrange(1, fmol.OBMol.NumAtoms() + 1):
        atm = fmol.OBMol.GetAtom(i)
        if (i not in res[0]) and (atm.IsHydrogen() == False):
            bonded = 0
            for at in openbabel.OBAtomAtomIter(fmol.OBMol.GetAtom(i)):
                if at.GetIndex() + 1 in res[0]:
예제 #36
0
 def test_make3d(self):
     mol_0d = pb.readstring("smi", "CCCC").OBMol
     adaptor = BabelMolAdaptor(mol_0d)
     adaptor.make3d()
     self.assertEqual(mol_0d.GetDimension(), 3)
 Ac_CollisionEnergyRecord, MS_FocusedIon, Ac_IonType in cursor:
     print(EntryID);
     spectrum=MSSpectrum();
     if Ac_MassSpecIonMode=='P':
         spectrum.parameters['mode']=1;
     else:
         spectrum.parameters['mode']=-1;
     if Ac_MassSpecType=='MS' or Ac_MassSpecType=='MS1':
         spectrum.parameters['level']=1;
     elif Ac_MassSpecType=='MS2':
         spectrum.parameters['level']=2;
     elif Ac_MassSpecType=='MS3':
         spectrum.parameters['level']=3;
     elif Ac_MassSpecType=='MS4':
         spectrum.parameters['level']=4;
     mol=pybel.readstring('smi',str(Ch_SMILES));
     mol.addh();
     charge=mol.charge;
     Ch_ExactMass=mol.exactmass;
     spectrum.parameters['dbsource']=DB_Source;
     spectrum.parameters['formula']=Ch_Formula;
     spectrum.parameters['exactmass']=Ch_ExactMass;
     spectrum.parameters['charge']=charge;
     spectrum.parameters['smiles']=Ch_SMILES;
     spectrum.parameters['inchi']=Ch_InChi[3:];
     
     sinchi=Ch_InChi[3:].split('/');
     shortinchi=sinchi[0];
     for j in range(1,len(sinchi)):
                 if sinchi[j][0]=='c' or sinchi[j][0]=='h':
                     shortinchi+='/'+sinchi[j];
예제 #38
0
def extractdata(folder):
    smiles = [
        x.rstrip()
        for x in open(os.path.join(folder,
                                   os.path.basename(folder) +
                                   ".txt"), "r").readlines()
    ]
    print smiles
    archivefile = open(os.path.join(folder, "zindo.txt"), "w")
    print >> archivefile, "\t".join([
        "File ID", "SMILES", "H**O (eV)", "LUMO (eV)", "Trans (eV)", "Osc",
        "..."
    ])

    #getnum = lambda x: int(x.split("/")[1].split(".")[0])
    getnum = lambda x: int(x.split("/")[7].split(".")[0])
    homos = []
    lumos = []
    trans = []
    convert = 1.0 / utils.convertor(1, "eV", "cm-1")

    #for filename in sorted(glob.glob("*.gz"), key=getnum):
    for filename in sorted(glob.glob(os.path.join(folder, "*.gz")),
                           key=getnum):

        number = getnum(filename)
        smile = smiles[number]
        text = gzip.open(filename, "r").read()

        if text.find("Excitation energies and oscillator strength") < 0:
            continue
        lines = iter(text.split("\n"))

        for line in lines:
            if line.startswith(" Initial command"): break
        text = StringIO.StringIO("\n".join(list(lines)))

        logfile = ccopen(text)
        #logfile.logger.setLevel(logging.ERROR)
        data = logfile.parse()

        assert (len(data.homos) == 1)
        smiles.append(smile)
        h**o = data.homos[0]
        homos.append(data.moenergies[0][h**o])
        lumos.append(data.moenergies[0][h**o + 1])
        trans.append(zip(data.etenergies, data.etoscs))

        archivefile.write("%d\t%s\t%f\t%f" %
                          (number, smile, homos[-1], lumos[-1]))
        for x in trans[-1]:
            archivefile.write("\t%f\t%f" % (x[0] * convert, x[1]))
        archivefile.write("\n")
        ##        print >> open("tmp.txt", "w"), text.getvalue()
        if smile != "c(s1)c(SN=N2)c2c1c(s1)c(SN=N2)c2c1c(s1)c(SN=N2)c2c1c(s1)c(SN=N2)c2c1c(s1)c(SN=N2)c2c1c(s1)c(SN=N2)c2c1":
            mol = pybel.readstring('g09', text.getvalue())
            mol.write("xyz",
                      os.path.join(folder, "%d.xyz" % number),
                      overwrite=True)

    print "%s: Created zindo.txt, plus various xyz files." % folder
예제 #39
0
    def draw_molecule(self,
                      context,
                      center=(0, 0, 0),
                      show_bonds=True,
                      join=True):

        smile_text = context.scene.molecule.smile_format
        molecule = pybel.readstring("smi", smile_text)
        molecule.make3D()

        shapes = []

        bpy.ops.mesh.primitive_uv_sphere_add()
        sphere = bpy.context.object

        # Initialize bond material if it's going to be used.
        if show_bonds:
            bpy.data.materials.new(name='bond')
            bpy.data.materials['bond'].diffuse_color = atom_data['bond'][
                'color']
            bpy.data.materials['bond'].specular_intensity = 0.2
            bpy.ops.mesh.primitive_cylinder_add()
            cylinder = bpy.context.object
            cylinder.active_material = bpy.data.materials['bond']

        for atom in molecule.atoms:
            element = atom.type
            if element not in atom_data:
                element = 'undefined'

            if element not in bpy.data.materials:
                key = element
                bpy.data.materials.new(name=key)
                bpy.data.materials[key].diffuse_color = atom_data[key]['color']
                bpy.data.materials[key].specular_intensity = 0.2

            atom_sphere = sphere.copy()
            atom_sphere.data = sphere.data.copy()
            atom_sphere.location = [l + c for l, c in zip(atom.coords, center)]
            scale = 1 if show_bonds else 2.5
            atom_sphere.dimensions = [
                atom_data[element]['radius'] * scale * 2
            ] * 3
            atom_sphere.active_material = bpy.data.materials[element]
            bpy.context.scene.collection.objects.link(atom_sphere)
            shapes.append(atom_sphere)

        for bond in (openbabel.OBMolBondIter(molecule.OBMol)
                     if show_bonds else []):
            start = molecule.atoms[bond.GetBeginAtom().GetIndex()].coords
            end = molecule.atoms[bond.GetEndAtom().GetIndex()].coords
            diff = [c2 - c1 for c2, c1 in zip(start, end)]
            cent = [(c2 + c1) / 2 for c2, c1 in zip(start, end)]
            mag = sum([(c2 - c1)**2 for c1, c2 in zip(start, end)])**0.5

            v_axis = Vector(diff).normalized()
            v_obj = Vector((0, 0, 1))
            v_rot = v_obj.cross(v_axis)

            # This check prevents gimbal lock (ie. weird behavior when v_axis is
            # close to (0, 0, 1))
            if v_rot.length > 0.01:
                v_rot = v_rot.normalized()
                axis_angle = [acos(v_obj.dot(v_axis))] + list(v_rot)
            else:
                v_rot = Vector((1, 0, 0))
                axis_angle = [0] * 4
            order = bond.GetBondOrder()
            if order not in range(1, 4):
                sys.stderr.write(
                    "Improper number of bonds! Defaulting to 1.\n")
                bond.GetBondOrder = 1

            if order == 1:
                trans = [[0] * 3]
            elif order == 2:
                trans = [[
                    1.4 * atom_data['bond']['radius'] * x for x in v_rot
                ], [-1.4 * atom_data['bond']['radius'] * x for x in v_rot]]
            elif order == 3:
                trans = [
                    [0] * 3,
                    [2.2 * atom_data['bond']['radius'] * x for x in v_rot],
                    [-2.2 * atom_data['bond']['radius'] * x for x in v_rot]
                ]

            for i in range(order):
                bond_cylinder = cylinder.copy()
                bond_cylinder.data = cylinder.data.copy()
                bond_cylinder.dimensions = [
                    atom_data['bond']['radius'] * scale * 2
                ] * 2 + [mag]
                bond_cylinder.location = [
                    c + scale * v for c, v in zip(cent, trans[i])
                ]
                bond_cylinder.rotation_mode = 'AXIS_ANGLE'
                bond_cylinder.rotation_axis_angle = axis_angle
                bpy.context.scene.collection.objects.link(bond_cylinder)
                shapes.append(bond_cylinder)

        sphere.select_set(True)
        if show_bonds:
            cylinder.select_set(True)
        bpy.ops.object.delete()

        for shape in shapes:
            shape.select_set(True)
        bpy.context.view_layer.objects.active = shapes[0]
        bpy.ops.object.shade_smooth()
        if join:
            bpy.ops.object.join()

        bpy.ops.object.origin_set(type='ORIGIN_GEOMETRY', center='MEDIAN')
        bpy.context.scene.update()
        obj = bpy.context.selected_objects
        obj[0].name = smile_text
        obj[0].location = bpy.context.scene.cursor_location

        return {'FINISHED'}
        source = "MetaCyc"
        if (re.search("^[CR]\d{5}$", external_id)):
            source = "KEGG"
        for struct_stage in sorted(
                Structures_Dict[struct_type][external_id].keys()):
            file_string = "_".join((source, struct_type, struct_stage))
            for structure in sorted(Structures_Dict[struct_type][external_id]
                                    [struct_stage].keys()):
                mol = None
                mol_source = ""
                try:
                    if (struct_type == 'InChI'):
                        mol = AllChem.MolFromInchi(structure)
                        if (mol is None or external_id == 'FAD'):
                            mol = pybel.readstring("inchi", structure)
                            if (mol):
                                mol_source = "OpenBabel"
                        else:
                            mol_source = "RDKit"
                    elif (struct_type == 'SMILE'):
                        mol = AllChem.MolFromSmiles(structure)
                        if (mol == None):
                            mol = pybel.readstring("smiles", structure)
                            if (mol):
                                mol_source = "OpenBabel"
                        else:
                            mol_source = "RDKit"
                except Exception as e:
                    pass
예제 #41
0
def adddocking(uniquestring, smiles, molname):
    molname = molname.decode("windows-1252").encode('utf-8', 'ignore')
    try:
        mol = pybel.readstring("smi", str(smiles))
    except IOError:
        status = "Something went wrong.."
        dock = Docking(uniquestring=uniquestring,
                       smiles=smiles,
                       molname=molname,
                       status=status)
        dock.save()
        return "Error"

    if mol.molwt > 800:
        # Prevent people from docking too big compounds
        status = "Molecular weight too big, calculation aborted.."
        dock = Docking(uniquestring=uniquestring,
                       smiles=smiles,
                       molname=molname,
                       status=status)
        dock.save()
        return "Error"

    mol.OBMol.AddHydrogens(True, True, 7.4)
    smiles = mol.write(format='smi')
    descs = mol.calcdesc()
    #generate 2D coordinates, needs openbabel
    obConversion = openbabel.OBConversion()
    obConversion.SetInAndOutFormats("smi", "mdl")
    obmol = openbabel.OBMol()
    obConversion.ReadString(obmol, smiles)
    gen2d = openbabel.OBOp.FindType("gen2d")
    gen2d.Do(obmol)
    MDL = obConversion.WriteString(obmol)
    molfile = MDL.replace("\n", r"\n")
    CMW = descs["MW"]
    HBA = descs["HBA1"]
    HBD = descs["HBD"]
    logP = descs["logP"]
    tpsa = descs["TPSA"]
    #Get number of rotatable bonds
    smarts = pybel.Smarts(
        r"[!$([NH]!@C(=O))&!D1&!$(*#*)]\&!@[!$([NH]!@C(=O))&!D1&!$(*#*)]")
    rb = smarts.findall(mol)
    nrb = len(rb)
    #Get fingerprint and molecular complexity
    if detect_pains(mol) == "":
        pains = "Not found"
    else:
        pains = detect_pains(mol)
    status = "Calculating..."
    results = ""
    dock = Docking(uniquestring=uniquestring,
                   smiles=smiles,
                   molname=molname,
                   molfile=molfile,
                   CMW=CMW,
                   HBA=HBA,
                   HBD=HBD,
                   logP=logP,
                   tpsa=tpsa,
                   nrb=nrb,
                   pains=pains,
                   status=status,
                   results=results)
    dock.save()
    dockingseq.delay(dock)
    return dock.id
예제 #42
0
def localopt(mol_with_cat_mopin, _):
    """Optimize using uff built in pybel."""
    pymol = pybel.readstring('mopin', mol_with_cat_mopin)
    pymol.localopt('uff')
    return pymol
예제 #43
0
def run():
    """This method is run by typing `blender-chemicals` into a terminal."""
    parser = argparse.ArgumentParser(description="Imports chemicals into "
                                     "Blender with Open Babel.")
    parser.add_argument('input', help="The file or smiles string to draw.")
    parser.add_argument('--format',
                        type=str,
                        default='auto',
                        help="The "
                        "chemical format of the input file. Defaults to "
                        "'auto', which uses the file extension.")
    parser.add_argument('--convert-only',
                        action='store_true',
                        help="Converts "
                        "the input into a simplified JSON format and prints "
                        "to stdout. Does not draw.")
    parser.add_argument('--space-filling',
                        action='store_true',
                        help="Draws "
                        "a space-filling (instead of ball-and-stick) "
                        "representation.")
    parser.add_argument('--no-join',
                        dest='join',
                        action='store_false',
                        help="Skips joining the atoms/bonds into a single "
                        "mesh. Use if you want to individually edit atoms in "
                        "Blender, but note it will impair performance.")
    parser.add_argument('--no-hydrogens',
                        dest='hydrogens',
                        action='store_false',
                        help="Avoids drawing hydrogens.")
    parser.add_argument('--no-generate-coords',
                        dest='generate_coords',
                        action='store_false',
                        help="Skips generating 3D "
                        "coordinates.")
    parser.add_argument('--no-infer-bonds',
                        dest='infer_bonds',
                        action='store_false',
                        help="Skips inferring bonds.")

    args = parser.parse_args()

    try:
        with open(args.input) as in_file:
            data = in_file.read()
        is_file = True
    except IOError:
        data = args.input
        is_file = False

    if args.format == 'auto':
        chemformat = os.path.splitext(args.input)[1][1:] if is_file else 'smi'
    else:
        chemformat = args.format

    if not pybel.informats:
        sys.stderr.write("Open babel not properly installed. Exiting.\n")
        sys.exit()
    if chemformat not in pybel.informats:
        prefix = "Inferred" if args.format == 'auto' else "Supplied"
        formats = ', '.join(pybel.informats.keys())
        sys.stderr.write(
            ("{} format '{}' not in available open babel formats."
             "\n\nSupported formats:\n{}\n").format(prefix, chemformat,
                                                    formats))
        sys.exit()

    try:
        mol = pybel.readstring(chemformat, data)
    except OSError:
        prefix = "Inferred" if args.format == 'auto' else "Supplied"
        debug = ((" - Read input as file."
                  if is_file else " - Inferred input as string, not file.") +
                 "\n - {} format of '{}'.".format(prefix, chemformat))
        sys.stderr.write("Could not read molecule.\n\nDebug:\n" + debug + "\n")
        sys.exit()

    json_mol = process(mol, args.hydrogens, args.generate_coords,
                       args.infer_bonds, args.convert_only)
    if args.convert_only:
        print(json_mol)
        sys.exit()

    mac_path = '/Applications/blender.app/Contents/MacOS/./blender'
    if shutil.which('blender') is not None:
        blender = 'blender'
    elif os.path.isfile(mac_path):
        blender = mac_path
    else:
        sys.stderr.write("Could not find installed copy of Blender. Either "
                         "make sure it's on your path or copy the contents of "
                         "`drawer.py` into a running blender instance.\n")
        sys.exit()

    root = os.path.normpath(os.path.dirname(__file__))
    script = os.path.join(root, 'draw.py')
    command = [blender, '--python', script, '--', json_mol]
    if args.space_filling:
        command.append('--space-filling')
    if not args.join:
        command.append('--no-join')
    with open(os.devnull, 'w') as null:
        subprocess.Popen(command, stdout=null, stderr=null)
예제 #44
0
 def add_hydrogen(self):
     mol_0d = pb.readstring("smi", "CCCC").OBMol
     self.assertEqual(len(pb.Molecule(mol_0d).atoms), 2)
     adaptor = BabelMolAdaptor(mol_0d)
     adaptor.add_hydrogen()
     self.assertEqual(len(adaptor.pymatgen_mol.sites), 14)
예제 #45
0
def eqn_interr(num_eqn, naked_list_eqn, rindx, rstoi, pindx, pstoi,
               chem_scheme_markers, reac_coef, spec_namelist, spec_name,
               spec_smil, spec_list, Pybel_objects, nreac, nprod, comp_num,
               phase):

    # inputs: ----------------------------------------------------------------------------
    # num_eqn - number of equations (scalar)
    # naked_list_eqn - equations in strings
    # rindx - to hold indices of reactants
    # rstoi - to hold stoichiometries of reactants
    # pindx - to hold indices of products
    # pstoi - to hold stoichiometries of products
    # chem_scheme_markers - markers for separating sections of the chemical scheme
    # reac_coef - to hold reaction rate coefficients
    # spec_namelist - name strings of components present in the scheme (not SMILES)
    # spec_name - name string of components in xml file (not SMILES)
    # spec_smil - SMILES from xml file
    # spec_list - SMILES of components present in scheme
    # Pybel_objects - list containing pybel objects
    # nreac - to hold number of reactions per equation
    # nprod - number of products per equation
    # comp_num - number of unique components in reactions across all phases
    # phase - marker for the phase being considered: 0 for gas, 1 for particulates
    # ------------------------------------------------------------------------------------

    max_no_reac = 0.0  # log maximum number of reactants in a reaction
    max_no_prod = 0.0  # log maximum number of products in a reaction

    # Loop through equations line by line and extract the required information
    for eqn_step in range(num_eqn):

        line = naked_list_eqn[eqn_step]  # extract this line

        # work out whether equation or reaction rate coefficient part comes first
        eqn_start = str('.*\\' + chem_scheme_markers[10])
        rrc_start = str('.*\\' + chem_scheme_markers[9])
        # get index of these markers, note span is the property of the match object that
        # gives the location of the marker
        eqn_start_indx = (re.match(eqn_start, line)).span()[1]
        rrc_start_indx = (re.match(rrc_start, line)).span()[1]

        if eqn_start_indx > rrc_start_indx:
            eqn_sec = 1  # equation is second part
        else:
            eqn_sec = 0  # equation is first part

        # split the line into 2 parts: equation and rate coefficient
        # . means match with anything except a new line character., when followed by a *
        # means match zero or more times (so now we match with all characters in the line
        # except for new line characters, so final part is stating the character(s) we
        # are specifically looking for, \\ ensures the marker is recognised
        if eqn_sec == 1:
            eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' +
                              chem_scheme_markers[11])
        else:  # end of equation part is start of reaction rate coefficient part
            eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' +
                              chem_scheme_markers[9])

        # extract the equation as a string ([0] extracts the equation section and
        # [1:-1] removes the bounding markers)
        eqn = re.findall(eqn_markers, line)[0][1:-1].strip()

        eqn_split = eqn.split()
        eqmark_pos = eqn_split.index('=')
        # with stoich number; rule out the photon
        reactants = [
            i for i in eqn_split[:eqmark_pos] if i != '+' and i != 'hv'
        ]
        products = [t for t in eqn_split[eqmark_pos + 1:]
                    if t != '+']  # with stoich number

        # record maximum number of reactants across all equations
        max_no_reac = np.maximum(len(reactants), max_no_reac)
        # record maximum number of products across all equations
        max_no_prod = np.maximum(len(products), max_no_prod)

        # append columns if needed
        while max_no_reac > np.minimum(rindx.shape[1], rstoi.shape[1]):
            rindx = np.append(rindx, (np.zeros((num_eqn, 1))).astype(int),
                              axis=1)
            rstoi = np.append(rstoi, (np.zeros((num_eqn, 1))), axis=1)
        while max_no_prod > np.minimum(pindx.shape[1], pstoi.shape[1]):
            pindx = np.append(pindx, (np.zeros((num_eqn, 1))).astype(int),
                              axis=1)
            pstoi = np.append(pstoi, (np.zeros((num_eqn, 1))), axis=1)

        # .* means occurs anywhere in line and, first \ means second \ can be interpreted
        # and second \ ensures recognition of marker
        rate_coeff_start_mark = str('\\' + chem_scheme_markers[9])
        # . means match with anything except a new line character, when followed by a *
        # means match zero or more times (so now we match with all characters in the line
        # except for new line characters, \\ ensures the marker
        # is recognised
        if eqn_sec == 1:  # end of reaction rate coefficient part is start of equation part
            rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[10])
        else:  # end of reaction rate coefficient part is end of line
            rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[11])

        # rate coefficient starts and end punctuation
        rate_regex = str(rate_coeff_start_mark + rate_coeff_end_mark)
        # rate coefficient expression in a string
        rate_ex = re.findall(rate_regex, line)[0][1:-1].strip()

        # convert fortran-type scientific notation to python type
        rate_ex = formatting.SN_conversion(rate_ex)
        # convert the rate coefficient expressions into Python readable commands
        rate_ex = formatting.convert_rate_mcm(rate_ex)
        if (rate_ex.find('EXP') != -1):
            print(rate_ex)
            sys.exit()

        # store the reaction rate coefficient for this equation
        # (/s once any inputs applied)
        reac_coef.append(rate_ex)

        # extract the stoichiometric number of the specii in current equation
        reactant_step = 0
        product_step = 0
        stoich_regex = r"^\d*\.\d*|^\d*"
        numr = len(reactants)  # number of reactants in this equation

        # left hand side of equations (losses)
        for reactant in reactants:

            if (re.findall(stoich_regex, reactant)[0] != ''):
                stoich_num = float(re.findall(stoich_regex, reactant)[0])
                # name with no stoich number
                name_only = re.sub(stoich_regex, '', reactant)
            elif (re.findall(stoich_regex, reactant)[0] == ''):
                stoich_num = 1.0
                name_only = reactant

            # store stoichometry
            rstoi[eqn_step, reactant_step] = stoich_num

            if name_only not in spec_namelist:  # if new component encountered
                spec_namelist.append(
                    name_only)  # add to chemical scheme name list

                # convert MCM chemical names to SMILES
                if name_only in spec_name:
                    # index where xml file name matches reaction component name
                    name_indx = spec_name.index(name_only)
                    name_SMILE = spec_smil[name_indx]  # SMILES of component
                else:
                    sys.exit(
                        str('Error: inside eqn_parser, chemical scheme name ' +
                            str(name_only) + ' not found in xml file'))

                spec_list.append(name_SMILE)  # list SMILE names
                name_indx = comp_num  # allocate index to this species
                # Generate pybel
                Pybel_object = pybel.readstring('smi', name_SMILE)
                # append to Pybel object list
                Pybel_objects.append(Pybel_object)

                comp_num += 1  # number of unique species

            else:  # if it's a species already encountered it will be in spec_list
                # existing index
                name_indx = spec_namelist.index(name_only)

            # store reactant index
            # check if index already present - i.e. component appears more than once
            if sum(rindx[eqn_step, 0:reactant_step] == int(name_indx)) > 0:
                # get pre-existing index of this component
                exist_indx = np.where(
                    rindx[eqn_step, 0:reactant_step] == (int(name_indx)))
                # add to pre-existing stoichiometry
                rstoi[eqn_step, exist_indx] += rstoi[eqn_step, reactant_step]
                rstoi[eqn_step,
                      reactant_step] = 0  # remove stoichiometry added above
                reactant_step -= 1  # ignore this duplicate product
            else:
                rindx[eqn_step, reactant_step] = int(name_indx)

            reactant_step += 1

        # number of reactants in this equation
        nreac[eqn_step] = int(reactant_step)

        # right hand side of equations (gains)
        for product in products:

            if (re.findall(stoich_regex, product)[0] != ''):
                stoich_num = float(re.findall(stoich_regex, product)[0])
                name_only = re.sub(stoich_regex, '',
                                   product)  # name with no stoich number

            elif (re.findall(stoich_regex, product)[0] == ''):
                stoich_num = 1.0
                name_only = product

            # store stoichometry
            pstoi[eqn_step, product_step] = stoich_num

            if name_only not in spec_namelist:  # if new component encountered
                spec_namelist.append(name_only)

                # convert MCM chemical names to SMILES
                # index where xml file name matches reaction component name
                if name_only in spec_name:
                    name_indx = spec_name.index(name_only)
                    name_SMILE = spec_smil[name_indx]
                else:
                    sys.exit(
                        str('Error: inside eqn_parser, chemical scheme name ' +
                            str(name_only) + ' not found in xml file'))

                spec_list.append(
                    name_SMILE)  # list SMILE string of parsed species
                name_indx = comp_num  # allocate index to this species
                # Generate pybel

                Pybel_object = pybel.readstring('smi', name_SMILE)
                # append to Pybel object list
                Pybel_objects.append(Pybel_object)

                comp_num += 1  # number of unique species

            else:  # if it's a species already encountered
                # index of component already listed
                name_indx = spec_namelist.index(name_only)

            # store product index
            # check if index already present - i.e. component appears more than once
            if sum(pindx[eqn_step, 0:product_step] == int(name_indx)) > 0:
                exist_indx = np.where(pindx[eqn_step, 0:product_step] == (int(
                    name_indx)))  # get pre-existing index of this component
                # add to pre-existing stoichometry
                pstoi[eqn_step, exist_indx] += pstoi[eqn_step, product_step]
                pstoi[eqn_step,
                      product_step] = 0  # remove stoichometry added above
                product_step -= 1  # ignore this duplicate product
            else:
                pindx[eqn_step, product_step] = int(name_indx)
            product_step += 1

        # number of products in this equation
        nprod[eqn_step] = int(product_step)

    return (rindx, rstoi, pindx, pstoi, reac_coef, spec_namelist, spec_list,
            Pybel_objects, nreac, nprod, comp_num)
예제 #46
0
                           (0.48640239E-1 * temp) + (0.41764768E-4 *
                                                     (temp**2.0E0)) -
                           (0.14452093E-7 * (temp**3.0E0)) +
                           (0.65459673E1 * numpy.log(temp)))
 y_density_array.append(1000.0E0)  #Append density of water to array [kg/m3]
 y_mw.append(18.0E0)  #Append mw of water to array [g/mol]
 sat_vp.append(numpy.log10(sat_vap_water * 9.86923E-6))  #Convert Pa to atm
 Delta_H.append(40.66)
 Latent_heat_gas.append(
     Lv_water_vapour
 )  #Water vapour, taken from Paul Connolly's parcel model ACPIM
 num_species += 1  #We need to increase the number of species to account for water in the gas phase
 # Now also account for any change in species considered in condensed phase based on those that are ignored
 num_species_condensed = len(y_density_array)
 #Update the Pybel object libraries
 key = pybel.readstring('smi', 'O')
 Pybel_object_dict.update({'O': key})
 #Pybel_object_activity.update({key:Water_Abun})
 species_dict2array.update({'H2O': num_species - 1})
 include_index.append(num_species - 1)
 #pdb.set_trace()
 ignore_index_fortran = numpy.append(ignore_index_fortran, 0.0)
 #pdb.set_trace()
 #-------------------------------------------------------------------------------------
 # 6) Now calculate the additional properties that dictate gas-to-particle partitioning [inc water]
 #-------------------------------------------------------------------------------------
 property_dict2 = Property_calculation.Pure_component2(
     num_species_condensed, y_mw, R_gas, temp)
 alpha_d_org = property_dict2['alpha_d_org']
 DStar_org = property_dict2['DStar_org']
 mean_them_vel = property_dict2['mean_them_vel']
예제 #47
0
    "Pharm2D2point": CalculatePharm2D2pointFingerprint,
    "Pharm2D3point": CalculatePharm2D3pointFingerprint,
    "PubChem": CalculatePubChemFingerprint,
    "GhoseCrippen": CalculateGhoseCrippenFingerprint,
}
################################################################

if __name__ == "__main__":

    print("-" * 10 + "START" + "-" * 10)

    ms = [
        Chem.MolFromSmiles("CCOC=N"),
        Chem.MolFromSmiles("NC1=NC(=CC=N1)N1C=CC2=C1C=C(O)C=C2"),
    ]
    m2 = [pybel.readstring("smi", "CCOC=N"), pybel.readstring("smi", "CCO")]
    res1 = CalculateECFP4Fingerprint(ms[0])
    print(res1)
    print("-" * 25)
    res2 = CalculateECFP4Fingerprint(ms[1])
    print(res2)
    print("-" * 25)
    mol = pybel.readstring("smi", "CCOC=N")
    res3 = CalculateFP3Fingerprint(mol)
    print(res3)
    print("-" * 25)

    mol = Chem.MolFromSmiles("O=C1NC(=O)NC(=O)C1(C(C)C)CC=C")
    res4 = CalculatePharm2D2pointFingerprint(mol)[0]
    print(res4)
    print("-" * 25)
예제 #48
0
파일: al_run.py 프로젝트: tyo-nu/SimAL
def dissim_run(org,
               ec,
               neg,
               k,
               pos=None,
               zinc=False,
               zinc_tol_l=1,
               zinc_tol_r=1,
               vl=None,
               simfp=fptr.integer_sim,
               target_bits=None,
               screen=None):

    # Collects isozyme data into the Isozyme class.
    a = bi(org, ec)
    bits = a.analyze_reactions()
    if pos:
        a.add_from_sdf(pos, k, pos=True)

    a.add_from_sdf(neg, k, pos=False)
    #Two branches here; one pulls potential test data from ZINC, another pulls from KEGG.

    res_ = [(page["smiles"], fptr.integer_fp(str(page["smiles"])),
             page["vendors"], page["_id"])
            for page in dbq.zinc_pull(target_bits,
                                      a.mass_avg[k],
                                      a.mass_std[k],
                                      zinc_tol_l=zinc_tol_l,
                                      zinc_tol_r=zinc_tol_r)
            if u'R' not in page["smiles"] and 'vendors' in page]
    res_s = [rr for rr in res_ if rr[1] is not None]
    if screen is not None:
        patt = [pybel.Smarts(smarts) for smarts in screen.split('|')]
        if len(patt) > 2:
            raise IOError(
                'al_run only supports OR filters for two SMARTS queries at this time.'
            )
        res = [
            rr for rr in res_s
            if len(patt[0].findall(pybel.readstring('smi', str(rr[0])))) > 0
            or len(patt[1].findall(pybel.readstring('smi', str(rr[0])))) > 0
        ]

    else:
        res = res_s

    x_pos_array = np.vstack(tuple([t[1] for t in a.pos[k]]))
    x_neg_array = np.vstack(tuple([t[1] for t in a.neg[k]]))
    x_array = np.vstack((x_pos_array, x_neg_array))
    centroid = np.mean(x_array, axis=0)

    test_a = np.vstack(tuple([np.array(x[1]) for x in res
                              if x[1] is not None]))
    test_centroid = np.mean(test_a, axis=0)
    tc_u = dw.avg_proximity(test_a, test_a, f=simfp)

    xis_a = [(x[0], fptr.integer_sim(centroid, x[1]), 1, x[2], x[3])
             for x in res if x[1] is not None]
    xis_b = [(x[0], tc_u[i] * (-math.log(fptr.integer_sim(centroid, x[1]), 2)),
              1, x[2], x[3]) for i, x in enumerate(res) if x[1] is not None]

    dw.generate_report(sorted(xis_a, key=lambda y: y[1]),
                       vendors_list=vl,
                       outfile="%s_ec%s_dissim_zinc%s%s.sdf" %
                       (org, ec.replace('.', '_'), str(zinc_tol_l).replace(
                           '.', '_'), str(zinc_tol_r).replace('.', '_')))
    dw.generate_report(sorted(xis_b, key=lambda y: y[1]),
                       vendors_list=vl,
                       outfile="%s_ec%s_dissimcentral_zinc%s%s.sdf" %
                       (org, ec.replace('.', '_'), str(zinc_tol_l).replace(
                           '.', '_'), str(zinc_tol_r).replace('.', '_')))
예제 #49
0
def generateSvg(inchi, filename):
    if os.path.exists(filename):
        return
    mol = pybel.readstring('inchi', inchi)
    mol.write('svg', filename=filename)
예제 #50
0
파일: al_run.py 프로젝트: tyo-nu/SimAL
def al_run(org,
           ec,
           neg,
           k,
           beta=1,
           pos=None,
           ent=False,
           kernel='rbf',
           degree=3,
           zinc=True,
           zinc_tol_l=1,
           zinc_tol_r=1,
           greedy=False,
           vl=None,
           simfp=fptr.integer_sim,
           C=5,
           target_bits=None,
           screen=None):

    #Collects isozyme data into the Isozyme class.
    a = bi(org, ec)
    if pos:
        a.add_from_sdf(pos, k, pos=True)
    a.add_from_sdf(neg, k, pos=False)

    #Two branches here; one pulls potential test data from ZINC, another pulls from KEGG.

    if zinc:
        res_ = [(page["smiles"], fptr.integer_fp(str(page["smiles"])),
                 page["vendors"], page["_id"])
                for page in dbq.zinc_pull(target_bits,
                                          a.mass_avg[k],
                                          a.mass_std[k],
                                          zinc_tol_l=zinc_tol_l,
                                          zinc_tol_r=zinc_tol_r)
                if u'R' not in page["smiles"] and 'vendors' in page]
        res_s = [rr for rr in res_ if rr[1] is not None]
        if screen is not None:
            patt = [pybel.Smarts(smarts) for smarts in screen.split('|')]
            if len(patt) > 2:
                raise IOError(
                    'al_run only supports OR filters for two SMARTS queries at this time.'
                )
            res = [
                rr for rr in res_s if len(patt[0].findall(
                    pybel.readstring('smi', str(rr[0])))) > 0 or
                len(patt[1].findall(pybel.readstring('smi', str(rr[0])))) > 0
            ]
        else:
            res = res_s

    else:

        res = [(page["SMILES"], np.array(fptr.integer_fp(str(page["SMILES"]))))
               for page in dbq.kegg_pull(target_bits)
               if u'R' not in page["SMILES"]
               and np.array(fptr.integer_fp(str(page["SMILES"]))) is not None]

    labels = machines.svm_clf(a.pos[k],
                              a.neg[k],
                              res,
                              kernel=kernel,
                              degree=degree,
                              ent=ent,
                              C=C)

    test_a = np.vstack(
        tuple([
            np.array(x[1]) for x in res
            if x[1] is not None and len(x[1]) == 313
        ]))

    tc_u = dw.avg_proximity(test_a, test_a, f=simfp)

    if greedy:

        if ent:

            xis = [
                l * dw.weight(dw.entropy(p), tc_u[i], beta=beta)
                for i, (l, p) in enumerate(labels)
            ]

        else:

            xis = [
                l * dw.weight(dw.hyper_distance(d), tc_u[i], beta=beta)
                for i, (l, d) in enumerate(labels)
            ]

    else:

        if ent:

            xis = [
                dw.weight(dw.entropy(p), tc_u[i], beta=beta)
                for i, (l, p) in enumerate(labels)
            ]

        else:

            xis = [
                dw.weight(dw.hyper_distance(d), tc_u[i], beta=beta)
                for i, (l, d) in enumerate(labels)
            ]

    if zinc:
        dw.generate_report(
            sorted(zip([s for s, fp, vend, z in res if fp is not None], xis,
                       [lab[0] for lab in labels],
                       [vend for s, fp, vend, z in res if fp is not None],
                       [z for s, fp, vend, z in res if fp is not None]),
                   key=lambda y: y[1],
                   reverse=True),
            vendors_list=vl,
            outfile="%s_ec%s_beta%s_%s_zinc%s%s_C%s.sdf" %
            (org, ec.replace(
                '.', '_'), str(beta), kernel, str(zinc_tol_l).replace(
                    '.', '_'), str(zinc_tol_r).replace('.', '_'), str(C)))
        f = open(
            "%s_ec%s_beta%s_%s_zinc%s%s_C%s.txt" % (org, ec.replace(
                '.', '_'), str(beta), kernel, str(zinc_tol_l).replace(
                    '.', '_'), str(zinc_tol_r).replace('.', '_'), str(C)), 'w')

    else:
        dw.generate_report(sorted(zip([s for s, fp in res], xis,
                                      [lab[0] for lab in labels]),
                                  key=lambda y: y[1],
                                  reverse=True),
                           outfile="%s_ec%s_beta%s_%s.sdf" %
                           (org, ec.replace('.', '_'), str(beta), kernel),
                           zinc=False)
        f = open(
            "%s_ec%s_beta%s_%s.txt" %
            (org, ec.replace('.', '_'), str(beta), kernel), 'w')

    for score in xis:
        f.write(str(score) + '\n')
    f.close()
예제 #51
0
 def testKekulizationOfcn(self):
     """We were previously not reading 'cn' correctly, or at least how
     Daylight would"""
     mol = pybel.readstring("smi", "cn")
     self.assertEqual("C=N", mol.write("smi").rstrip())
예제 #52
0
            except:
                try:
                    df = pd.read_csv(file_path,
                                     sep=' ',
                                     dtype={
                                         '#smiles': str,
                                         'zinc_id': str
                                     })
                    smile_arr = smile_arr + df["#smiles"].tolist()
                    id_arr = id_arr + df["zinc_id"].tolist()
                except:
                    print("warning!!:", file_path)
        else:
            continue

data = {"smile": smile_arr, "name": id_arr}
df_out = pd.DataFrame(data)
df_out = df_out.drop_duplicates("smile")
df_out.to_csv("ZINC_UNIQUE_SMILE.csv", index=False)

drug = df_out["smile"]

canonical_smiles = [
    pybel.readstring("smi", smile).write("can").rstrip() for smile in drug
]

data = {"canonical_smile": canonical_smiles, "name": df_out['name']}
df_out = pd.DataFrame(data)
df_out = df_out.drop_duplicates("canonical_smile")
df_out.to_csv("ZINC_UNIQUE_canonical.csv", index=False)
예제 #53
0
def enhance_structure_dict(structure_dict):
    """Add derived information to the structure dictionary.

    Args:
        structure_dict: Output of :func:`make_structure_dict`.

    Returns:
        dict: The same, modified in-place, with derived information (e.g. atom distances).

    Caution: If torch is imported at the same time as this is run, you may get a segmentation fault. Complain to pybel or rdkit, I suppose.
    """
    import pybel
    for molecule_name in structure_dict:

        # positions - array (N,3) of Cartesian positions
        molecule = structure_dict[molecule_name]
        positions = np.array(molecule['positions'])
        n_atom = positions.shape[0]
        molecule['positions'] = positions

        # distances - array (N,N) of distances between atoms
        pos1 = np.tile(positions, (n_atom, 1, 1))
        pos2 = np.transpose(pos1, (1, 0, 2))
        dist = np.linalg.norm(pos1 - pos2, axis=-1)
        molecule['distances'] = dist

        # angle - array (N,) of angles to the 2 closest atoms
        sorted_j = np.argsort(dist, axis=-1)
        relpos1 = positions[sorted_j[:, 1], :] - positions[sorted_j[:, 0], :]
        relpos2 = positions[sorted_j[:, 2], :] - positions[sorted_j[:, 0], :]
        cos = np.sum(relpos1 * relpos2, axis=1) / (
            np.linalg.norm(relpos1, axis=1) * np.linalg.norm(relpos2, axis=1))
        angle = np.arccos(np.clip(cos, -1.0, 1.0)).reshape((n_atom, 1)) / np.pi
        molecule['angle'] = angle[:, 0]

        # bond orders - array (N,N) of the bond order (0 for no chemical bond)
        # Note this relies on a few manual corrections
        molecule['bond_orders'] = np.zeros((n_atom, n_atom))
        atomicNumList = [
            atomic_num_dict[symbol] for symbol in molecule['symbols']
        ]
        if molecule_name in manual_bond_order_dict:
            molecule['bond_orders'] = np.array(
                manual_bond_order_dict[molecule_name], dtype=float)
        else:
            mol = x2m.xyz2mol(atomicNumList, 0, positions, True, True)
            for bond in mol.GetBonds():
                atom0, atom1 = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
                bond_order = bond.GetBondType()
                molecule['bond_orders'][atom0,
                                        atom1] = bond_order_dict[bond_order]
                molecule['bond_orders'][atom1,
                                        atom0] = bond_order_dict[bond_order]

        # Supplementary information for tagging:
        # top_bonds: (N,4 or less) bond orders of the top 4 bonds, for each atom
        # bond_ids: (N,4): Label the atom with the following 4 linear transform of top_bonds:
        #   * total num bonds (valence), counting double as 2
        #   * total num bonded neighbors, counting double as 1
        #   * largest order
        #   * second largest order.
        molecule['top_bonds'] = np.sort(molecule['bond_orders'],
                                        axis=-1)[:, -1:-5:-1]
        molecule['bond_ids'] = np.hstack(
            (molecule['top_bonds'].sum(axis=-1)[:, np.newaxis],
             np.sum(molecule['top_bonds'] > 1e-3,
                    axis=-1)[:, np.newaxis], molecule['top_bonds'][:, :2]))
        # long_symbols (N,) string relabel of the symbol straight from bond_ids
        molecule['long_symbols'] = [
            '_'.join([molecule['symbols'][i]] +
                     [str(x) for x in molecule['bond_ids'][i]])
            for i in range(n_atom)
        ]
        chem_bond_atoms = [
            sorted([
                molecule['symbols'][i]
                for i in molecule['bond_orders'][atom_index].nonzero()[0]
            ]) for atom_index in range(n_atom)
        ]
        molecule['sublabel_atom'] = [
            '-'.join([molecule['long_symbols'][atom_index]] +
                     chem_bond_atoms[atom_index])
            for atom_index in range(n_atom)
        ]

        # pybel information. I think we only end up using Gastiger charges.
        # Each of these is (N,) arrays
        # Convert to xyz string for pybel's I/O
        xyz = str(n_atom) + '\n\n' + '\n'.join([
            ' '.join([
                str(molecule['symbols'][i]),
                str(molecule['positions'][i, 0]),
                str(molecule['positions'][i, 1]),
                str(molecule['positions'][i, 2])
            ]) for i in range(n_atom)
        ])
        mol = pybel.readstring('xyz', xyz)
        molecule['charges'] = [
            mol.atoms[i].partialcharge for i in range(n_atom)
        ]
        molecule['spins'] = [mol.atoms[i].spin for i in range(n_atom)]
        molecule['heavyvalences'] = [
            mol.atoms[i].heavyvalence for i in range(n_atom)
        ]
        molecule['heterovalences'] = [
            mol.atoms[i].heterovalence for i in range(n_atom)
        ]
        molecule['valences'] = [mol.atoms[i].valence for i in range(n_atom)]
        molecule['hyb_types'] = [mol.atoms[i].type for i in range(n_atom)]
    return structure_dict
예제 #54
0
 def testNonexistentAtom(self):
     mol = pybel.readstring("smi", "ICBr")
     bv = self.createBitVec(10, (9, ))
     nmol = ob.OBMol()
     ok = mol.OBMol.CopySubstructure(nmol, bv)
     self.assertFalse(ok)
예제 #55
0
def get_inchi_molecule(accession):
    return pybel.readstring("inchi", accession)
예제 #56
0
import csv

# Manually convert .db file to text file. Data file is a file which contains the output databases which have been
# converted to text files and merged into one large file.

dataFile1 = sys.argv[1]
dataSet1 = open(dataFile1)

all_monomer_pairs = []

for line in dataSet1.readlines():
    # Pull SMILES
    #smiles = line.split("_")[0].split("~")
    smiles = line.split("\"")[1].split("_")[0].split("~")
    # Convert SMILES to canonical SMILES
    mol_1 = pybel.readstring("smi", smiles[0])
    canmol1 = mol_1.write("can").split("\t")[0]
    mol_2 = pybel.readstring("smi", smiles[1])
    canmol2 = mol_2.write("can").split("\t")[0]
    # Make a set containing the 2 canonical SMILES and save the set to a list of all monomer pairs
    all_monomer_pairs.append({canmol1, canmol2})

# Generates a list of unique pairs with the number of occurrances of that monomer (Ex: Monomer_A 5)
unique_data = [list(x) for x in set(frozenset(tuple(x)) for x in all_monomer_pairs)]

monomer_pair_counts = []
for pair in unique_data:
    counts = all_monomer_pairs.count(set(pair))
    monomer_pair_counts.append([pair,counts])

sorted_pairs = sorted(monomer_pair_counts, key=lambda tup: tup[1], reverse=True)
예제 #57
0
        ff.ConjugateGradients(250, 1.0e-3)
        ff.WeightedRotorSearch(250, 5)
        ff.WeightedRotorSearch(250, 10)
        ff.ConjugateGradients(100, 1.0e-5)
        ff.GetCoordinates(mol.OBMol)


if __name__ == "__main__":
    # iterate through all the files, all the molecules in the files and optimize
    for argument in sys.argv[1:]:
        with open(argument) as f:
            for line in f:
                ikey, smi = line.split()

                try:
                    mol = pybel.readstring("smi", smi)
                except IOError:
                    continue

                mol = cirpy.Molecule(ikey, ['inchikey'])

                filename = "library/%s/%s/%s.mol2" % (ikey[0], ikey[1], ikey)
                if not os.path.isfile(filename):
                    if mol.twirl_url is not None:
                        mol.download(filename, 'mol2', True)
                    else:
                        globalopt(mol)
                        mkpath('library/%s/%s' % (ikey[0], ikey[1]))
                        mol.write("mol2", filename)
예제 #58
0
def eqn_interr(num_eqn, eqn_list, aqeqn_list, chem_scheme_markers, comp_name,
               comp_smil, num_sb, wall_on):

    # inputs: ----------------------------------------------------------------------------
    # num_eqn - number of equations
    # eqn_list - gas-phase equations in list of strings
    # aqeqn_list - aqueous-phase equations in list of strings
    # chem_scheme_markers - markers for separating sections of the chemical scheme
    # comp_name - name string of components in xml file (not SMILES)
    # comp_smil - SMILES from xml file
    # num_sb - number of size bins
    # wall_on - marker for whether to include wall partitioning
    # ------------------------------------------------------------------------------------

    # preparatory part ----------------------------------------------------
    # matrix to record indices of reactants (cols) in each equation (rows)
    rindx = np.zeros((num_eqn[0], 1)).astype(int)
    # matrix of indices to arrange reactant concentrations when
    # reaction rate coefficient calculated
    y_arr = (np.ones((num_eqn[0], 1)).astype(int)) * -9999
    # array to arrange reaction rates so they align with reactant stoichiometries
    rr_arr = np.empty((0))
    # same but for products
    rr_arr_p = np.empty((0))
    # index array for extracting required reactant concentrations for the
    # reaction rate coefficient calculation
    y_rind = np.empty((0))
    # index array for identifying products when assigning gains from reactions
    y_pind = np.empty((0))
    # matrix to record indices of products (cols) in each equation (rows)
    pindx = np.zeros((num_eqn[0], 1)).astype(int)
    # matrix to record stoichiometries of reactants (cols) in each equation (rows)
    rstoi = np.zeros((num_eqn[0], 1))
    jac_stoi = np.zeros((num_eqn[0], 1))
    # 1D array to record stoichiometries of reactants per equarion
    rstoi_flat = np.empty((0))
    # 1D array to record stoichiometries of products per equarion
    pstoi_flat = np.empty((0))
    # matrix to record stoichiometries of products (cols) in each equation (rows)
    pstoi = np.zeros((num_eqn[0], 1))
    # arrays to store number of reactants and products in gas-phase equations
    nreac = np.empty(num_eqn[0], dtype=np.int8)
    nprod = np.empty(num_eqn[0], dtype=np.int8)
    # colptrs for sparse matrix
    reac_col = np.empty(num_eqn[0], dtype=np.int8)
    prod_col = np.empty(num_eqn[0], dtype=np.int8)
    # list for equation reaction rate coefficients
    reac_coef = []
    # matrix containing index of components who are denominators in the
    # calculation of equation derivatives in the Jacobian
    jac_den_indx = np.zeros((num_eqn[0], 1))
    # total number of Jacobian elements per equation
    njac = np.zeros((num_eqn[0], 1))
    # indices of Jacobian to affect per equation (rows)
    jac_indx = np.zeros((num_eqn[0], 1))
    # a new list for the name strings of components presented in the scheme (not SMILES)
    comp_namelist = []
    comp_list = [
    ]  # list for the SMILE strings of components present in the chemical scheme
    # list of Pybel objects of components in chemical scheme
    Pybel_objects = []
    comp_num = 0  # count the number of unique components in the chemical scheme
    RO_indx = []  # empty list for holding indices of alkoxy components
    # ---------------------------------------------------------------------

    max_no_reac = 0.  # log maximum number of reactants in a reaction
    max_no_prod = 0.  # log maximum number of products in a reaction

    # loop through gas-phase equations line by line and extract the required information
    for eqn_step in range(num_eqn[0]):

        line = eqn_list[eqn_step]  # extract this line

        # work out whether equation or reaction rate coefficient part comes first
        eqn_start = str('.*\\' + chem_scheme_markers[10])
        rrc_start = str('.*\\' + chem_scheme_markers[9])
        # get index of these markers, note span is the property of the match object that
        # gives the location of the marker
        eqn_start_indx = (re.match(eqn_start, line)).span()[1]
        rrc_start_indx = (re.match(rrc_start, line)).span()[1]

        if (eqn_start_indx > rrc_start_indx):
            eqn_sec = 1  # equation is second part
        else:
            eqn_sec = 0  # equation is first part

        # split the line into 2 parts: equation and rate coefficient
        # . means match with anything except a new line character., when followed by a *
        # means match zero or more times (so now we match with all characters in the line
        # except for new line characters, so final part is stating the character(s) we
        # are specifically looking for, \\ ensures the marker is recognised
        if eqn_sec == 1:
            eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' +
                              chem_scheme_markers[11])
        else:  # end of equation part is start of reaction rate coefficient part
            eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' +
                              chem_scheme_markers[9])

        # extract the equation as a string ([0] extracts the equation section and
        # [1:-1] removes the bounding markers)
        eqn = re.findall(eqn_markers, line)[0][1:-1].strip()

        eqn_split = eqn.split()
        eqmark_pos = eqn_split.index('=')
        # reactants with stoichiometry number and omit any photon
        reactants = [
            i for i in eqn_split[:eqmark_pos] if i != '+' and i != 'hv'
        ]
        # products with stoichiometry number
        products = [t for t in eqn_split[eqmark_pos + 1:] if t != '+']

        # record maximum number of reactants across all equations
        max_no_reac = np.maximum(len(reactants), max_no_reac)
        # record maximum number of products across all equations
        max_no_prod = np.maximum(len(products), max_no_prod)

        # append columns if needed because maximum number of reactants increases
        while (max_no_reac > np.minimum(rindx.shape[1], rstoi.shape[1])):
            rindx = np.append(rindx, (np.zeros((num_eqn[0], 1))).astype(int),
                              axis=1)
            rstoi = np.append(rstoi, (np.zeros((num_eqn[0], 1))), axis=1)
            y_arr = np.append(y_arr, (np.ones(
                (num_eqn[0], 1)) * -9999).astype(int),
                              axis=1)
            y_arr_fixer = ((np.arange(0, num_eqn[0],
                                      dtype='int')).reshape(-1, 1))
            y_arr_fixer = np.tile(y_arr_fixer, (1, int(max_no_reac)))
            y_arr[y_arr !=
                  -9999] = y_arr[y_arr != -9999] + y_arr_fixer[y_arr != -9999]

        while (max_no_prod > np.minimum(pindx.shape[1], pstoi.shape[1])):
            pindx = np.append(pindx, (np.zeros((num_eqn[0], 1))).astype(int),
                              axis=1)
            pstoi = np.append(pstoi, (np.zeros((num_eqn[0], 1))), axis=1)
        while ((len(reactants)**2.0 + len(reactants) * len(products)) >
               jac_indx.shape[1]):
            jac_indx = np.append(jac_indx, (np.zeros((num_eqn[0], 1))), axis=1)
            jac_den_indx = np.append(jac_den_indx, (np.zeros((num_eqn[0], 1))),
                                     axis=1)
            jac_stoi = np.append(jac_stoi, (np.zeros((num_eqn[0], 1))), axis=1)

        # .* means occurs anywhere in line and, first \ means second \ can be interpreted
        # and second \ ensures recognition of marker
        rate_coeff_start_mark = str('\\' + chem_scheme_markers[9])
        # . means match with anything except a new line character, when followed by a *
        # means match zero or more times (so now we match with all characters in the line
        # except for new line characters, \\ ensures the marker
        # is recognised
        if eqn_sec == 1:  # end of reaction rate coefficient part is start of equation part
            rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[10])
        else:  # end of reaction rate coefficient part is end of line
            rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[11])

        # rate coefficient starts and end punctuation
        rate_regex = str(rate_coeff_start_mark + rate_coeff_end_mark)
        # rate coefficient expression in a string
        rate_ex = re.findall(rate_regex, line)[0][1:-1].strip()

        # convert fortran-type scientific notation to python type
        rate_ex = formatting.SN_conversion(rate_ex)
        # convert the rate coefficient expressions into Python readable commands
        rate_ex = formatting.convert_rate_mcm(rate_ex)
        if (rate_ex.find('EXP') != -1):
            print('Error in reaction rate coefficient expression: ', rate_ex)
            sys.exit()

        # store the reaction rate coefficient for this equation
        # (/s once any inputs applied)
        reac_coef.append(rate_ex)

        # extract the stoichiometric number of the component in current equation
        reactant_step = 0
        product_step = 0
        stoich_regex = r"^\d*\.\d*|^\d*"
        numr = len(reactants)  # number of reactants in this equation

        # left hand side of equations (losses)
        for reactant in reactants:

            if (re.findall(stoich_regex, reactant)[0] != ''):
                stoich_num = float(re.findall(stoich_regex, reactant)[0])
                # name with no stoich number
                name_only = re.sub(stoich_regex, '', reactant)
            elif (re.findall(stoich_regex, reactant)[0] == ''):
                stoich_num = 1.
                name_only = reactant

            # store stoichiometry
            rstoi[eqn_step, reactant_step] = stoich_num
            jac_stoi[eqn_step, reactant_step] = -1 * stoich_num

            if name_only not in comp_namelist:  # if new component encountered
                comp_namelist.append(
                    name_only)  # add to chemical scheme name list

                # convert MCM chemical names to SMILES
                # index where xml file name matches reaction component name
                name_indx = comp_name.index(name_only)
                name_SMILE = comp_smil[name_indx]  # SMILES of component

                comp_list.append(name_SMILE)  # list SMILE names
                name_indx = comp_num  # allocate index to this species
                # generate pybel object
                Pybel_object = pybel.readstring('smi', name_SMILE)
                # append to Pybel object list
                Pybel_objects.append(Pybel_object)

                # check if alkoxy radical present in this component and that component is organic
                if ('[O]' in name_SMILE):
                    if ('C' in name_SMILE or 'C' in name_SMILE):
                        if (name_SMILE !=
                                'C[O]'):  # ensure it's not carbon monoxide
                            # if it is an organic alkoxy radical add its index to list
                            RO_indx.append(comp_num)

                comp_num += 1  # number of unique species

            else:  # if it is a component already encountered it will be in comp_list
                # existing index
                name_indx = comp_namelist.index(name_only)

            # store reactant index
            # check if index already present - i.e. component appears more than once
            if sum(rindx[eqn_step, 0:reactant_step] == int(name_indx)) > 0:
                # get existing index of this component
                exist_indx = (np.where(
                    rindx[eqn_step, 0:reactant_step] == (int(name_indx))))[0]
                # add to existing stoichiometry
                rstoi[eqn_step, exist_indx] += rstoi[eqn_step, reactant_step]
                jac_stoi[eqn_step,
                         exist_indx] += -1 * rstoi[eqn_step, reactant_step]
                # remove stoichiometry added above
                rstoi[eqn_step, reactant_step] = 0
                jac_stoi[eqn_step, reactant_step] = 0
                reactant_step -= 1  # ignore this duplicate
            else:
                rindx[eqn_step, reactant_step] = int(name_indx)
                y_arr[eqn_step, reactant_step] = int((eqn_step * max_no_reac) +
                                                     reactant_step)
                y_rind = np.append(y_rind, int(name_indx))
                rr_arr = np.append(rr_arr, int(eqn_step))

            reactant_step += 1

        # number of reactants in this equation
        nreac[eqn_step] = int(reactant_step)

        # record 1D array of stoichiometries per equation
        rstoi_flat = np.append(rstoi_flat, rstoi[eqn_step,
                                                 0:int(reactant_step)])

        # right hand side of equations (gains)
        for product in products:

            if (re.findall(stoich_regex, product)[0] != ''):
                stoich_num = float(re.findall(stoich_regex, product)[0])
                name_only = re.sub(stoich_regex, '',
                                   product)  # name with no stoich number

            elif (re.findall(stoich_regex, product)[0] == ''):
                stoich_num = 1.
                name_only = product

            # store stoichiometry
            pstoi[eqn_step, product_step] = stoich_num
            jac_stoi[eqn_step, reactant_step + product_step] = 1 * stoich_num

            if name_only not in comp_namelist:  # if new component encountered
                comp_namelist.append(name_only)

                # convert MCM chemical names to SMILES
                # index where xml file name matches reaction component name
                name_indx = comp_name.index(name_only)
                name_SMILE = comp_smil[name_indx]

                comp_list.append(
                    name_SMILE)  # list SMILE string of parsed species
                name_indx = comp_num  # allocate index to this species
                # Generate pybel

                Pybel_object = pybel.readstring('smi', name_SMILE)
                # append to Pybel object list
                Pybel_objects.append(Pybel_object)

                # check if alkoxy radical present in this component and that component is organic
                if ('[O]' in name_SMILE):
                    if ('C' in name_SMILE or 'C' in name_SMILE):
                        if (name_SMILE !=
                                'C[O]'):  # ensure it's not carbon monoxide
                            # if it is an organic alkoxy radical add its index to list
                            RO_indx.append(comp_num)

                comp_num += 1  # number of unique species

            else:  # if it's a species already encountered
                # index of component already listed
                name_indx = comp_namelist.index(name_only)

            # store product index
            # check if index already present - i.e. component appears more than once
            if sum(pindx[eqn_step, 0:product_step] == int(name_indx)) > 0:
                # get existing index of this component
                exist_indx = (np.where(
                    pindx[eqn_step, 0:product_step] == (int(name_indx))))[0]
                # add to existing stoichiometry
                pstoi[eqn_step, exist_indx] += pstoi[eqn_step, product_step]
                jac_stoi[eqn_step, reactant_step +
                         exist_indx] += 1 * pstoi[eqn_step, product_step]
                # remove stoichiometry added above
                pstoi[eqn_step, product_step] = 0
                jac_stoi[eqn_step, reactant_step + product_step] = 0
                product_step -= 1  # ignore this duplicate
            else:
                pindx[eqn_step, product_step] = int(name_indx)
                rr_arr_p = np.append(rr_arr_p, int(eqn_step))
                y_pind = np.append(y_pind, int(name_indx))

            product_step += 1

        # number of products in this equation
        nprod[eqn_step] = int(product_step)
        # record 1D array of stoichiometries per equation
        pstoi_flat = np.append(pstoi_flat, pstoi[eqn_step,
                                                 0:int(product_step)])

        # now that total number of components (reactants and products)
        # in an equation is known, replicate the reactant indices over all
        # components
        tot_comp = nreac[eqn_step] + nprod[eqn_step]
        for i in range(nreac[eqn_step]):
            jac_den_indx[eqn_step,
                         i * tot_comp:(i + 1) * tot_comp] = rindx[eqn_step, i]
            # also replicate the stoichiometries for every reactant
            if (i > 0):
                jac_stoi[eqn_step, i * tot_comp:(i + 1) *
                         tot_comp] = jac_stoi[eqn_step, 0:tot_comp]
# number of Jacobian elements affected by this equation
        njac[eqn_step, 0] = tot_comp * nreac[eqn_step]

    # remove fillers and flatten index for arranging concentrations
    # ready for reaction rate coefficient calculation
    y_arr_g = y_arr[y_arr != -9999]
    y_rind_g = y_rind.astype(int)  # ensure integer type
    uni_y_rind_g = (np.unique(y_rind)).astype(int)  # unique index of reactants
    y_pind_g = y_pind.astype(int)  # ensure integer type
    uni_y_pind_g = (np.unique(y_pind)).astype(int)  # unique index of products
    rr_arr_g = rr_arr.astype(int)  # ensure integer type
    rr_arr_p_g = rr_arr_p.astype(int)  # ensure integer type
    # colptrs for sparse matrix of the change to reactants per equation
    reac_col_g = np.cumsum(nreac) - nreac
    # colptrs for sparse matrix of the change to products per equation
    prod_col_g = np.cumsum(nprod) - nprod
    if (len(reac_col_g) > 0):  # if gas-phase reaction present
        # include final columns
        reac_col_g = np.append(reac_col_g, reac_col_g[-1] + nreac[-1])
        prod_col_g = np.append(prod_col_g, prod_col_g[-1] + nprod[-1])

    # tag other gas-phase arrays
    rindx_g = rindx
    pindx_g = pindx
    rstoi_g = rstoi
    pstoi_g = pstoi
    jac_stoi_g = jac_stoi
    rstoi_flat_g = rstoi_flat
    pstoi_flat_g = pstoi_flat
    nreac_g = nreac
    nprod_g = nprod
    reac_coef_g = reac_coef
    jac_den_indx_g = jac_den_indx.astype(int)
    njac_g = njac.astype(int)
    jac_indx_g = jac_indx
    jac_indx_g = jac_indx_g.astype(int)

    # same for aqueous-phase reactions ----------------------------------
    # preparatory part ----------------------------------------------------
    # matrix to record indices of reactants (cols) in each equation (rows)
    rindx = (np.ones((num_eqn[1], 1)) * -2).astype(int)
    # matrix of indices to arrange reactant concentrations when
    # reaction rate coefficient calculated
    y_arr = (np.ones((num_eqn[1], 1)).astype(int)) * -9999
    # array to arrange reaction rates so they align with reactant stoichiometries
    rr_arr = np.empty((0))
    # same but for products
    rr_arr_p = np.empty((0))
    # index array for extracting required reactant concentrations for the
    # reaction rate coefficient calculation
    y_rind = np.empty((0))
    # index array for identifying products when assigning gains from reactions
    y_pind = np.empty((0))
    # matrix to record indices of products (cols) in each equation (rows)
    pindx = np.zeros((num_eqn[1], 1)).astype(int)
    # matrix to record stoichiometries of reactants (cols) in each equation (rows)
    rstoi = np.zeros((num_eqn[1], 1))
    jac_stoi = np.zeros((num_eqn[1], 1))
    # 1D array to record stoichiometries of reactants per equation
    rstoi_flat = np.empty((0))
    # 1D array to record stoichiometries of products per equation
    pstoi_flat = np.empty((0))
    # matrix to record stoichiometries of products (cols) in each equation (rows)
    pstoi = np.zeros((num_eqn[1], 1))
    # arrays to store number of reactants and products of equations
    nreac = np.empty(num_eqn[1], dtype=np.int8)
    nprod = np.empty(num_eqn[1], dtype=np.int8)
    # list for equation reaction rate coefficients
    reac_coef = []
    # matrix containing index of components who are denominators in the
    # calculation of equation derivatives in the Jacobian
    jac_den_indx = np.zeros((num_eqn[1], 1))
    # total number of Jacobian elements per equation
    njac = np.zeros((num_eqn[1], 1))
    # indices of Jacobian to affect per equation (rows)
    jac_indx = np.zeros((num_eqn[1], 1))
    # ---------------------------------------------------------------------

    max_no_reac = 0.  # log maximum number of reactants in a reaction
    max_no_prod = 0.  # log maximum number of products in a reaction

    # loop through aqueous-phase equations line by line and extract the required information
    for eqn_step in range(num_eqn[1]):

        line = aqeqn_list[eqn_step]  # extract this line

        # work out whether equation or reaction rate coefficient part comes first
        eqn_start = str('.*\\' + chem_scheme_markers[10])
        rrc_start = str('.*\\' + chem_scheme_markers[9])
        # get index of these markers, note span is the property of the match object that
        # gives the location of the marker
        eqn_start_indx = (re.match(eqn_start, line)).span()[1]
        rrc_start_indx = (re.match(rrc_start, line)).span()[1]

        if eqn_start_indx > rrc_start_indx:
            eqn_sec = 1  # equation is second part
        else:
            eqn_sec = 0  # equation is first part

        # split the line into 2 parts: equation and rate coefficient
        # . means match with anything except a new line character., when followed by a *
        # means match zero or more times (so now we match with all characters in the line
        # except for new line characters, so final part is stating the character(s) we
        # are specifically looking for, \\ ensures the marker is recognised
        if eqn_sec == 1:
            eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' +
                              chem_scheme_markers[11])
        else:  # end of equation part is start of reaction rate coefficient part
            eqn_markers = str('\\' + chem_scheme_markers[10] + '.*\\' +
                              chem_scheme_markers[9])

        # extract the equation as a string ([0] extracts the equation section and
        # [1:-1] removes the bounding markers)
        eqn = re.findall(eqn_markers, line)[0][1:-1].strip()

        eqn_split = eqn.split()
        eqmark_pos = eqn_split.index('=')
        # with stoich number; rule out the photon
        reactants = [
            i for i in eqn_split[:eqmark_pos] if i != '+' and i != 'hv'
        ]
        products = [t for t in eqn_split[eqmark_pos + 1:]
                    if t != '+']  # with stoich number

        # record maximum number of reactants across all equations
        max_no_reac = np.maximum(len(reactants), max_no_reac)
        # record maximum number of products across all equations
        max_no_prod = np.maximum(len(products), max_no_prod)

        # append columns if needed
        while max_no_reac > np.minimum(rindx.shape[1], rstoi.shape[1]):
            rindx = np.append(rindx, (np.ones(
                (num_eqn[1], 1)) * -2).astype(int),
                              axis=1)
            rstoi = np.append(rstoi, (np.zeros((num_eqn[1], 1))), axis=1)
            y_arr = np.append(y_arr, (np.ones(
                (num_eqn[1], 1)) * -9999).astype(int),
                              axis=1)
            y_arr_fixer = ((np.arange(0, num_eqn[1],
                                      dtype='int')).reshape(-1, 1))
            y_arr_fixer = np.tile(y_arr_fixer, (1, int(max_no_reac)))
            y_arr[y_arr !=
                  -9999] = y_arr[y_arr != -9999] + y_arr_fixer[y_arr != -9999]
        while max_no_prod > np.minimum(pindx.shape[1], pstoi.shape[1]):
            pindx = np.append(pindx, (np.zeros((num_eqn[1], 1))).astype(int),
                              axis=1)
            pstoi = np.append(pstoi, (np.zeros((num_eqn[1], 1))), axis=1)
        while ((len(reactants)**2.0 + len(reactants) * len(products)) >
               jac_indx.shape[1]):
            jac_indx = np.append(jac_indx, (np.zeros((num_eqn[1], 1))), axis=1)
            jac_den_indx = np.append(jac_den_indx, (np.zeros((num_eqn[1], 1))),
                                     axis=1)
            jac_stoi = np.append(jac_stoi, (np.zeros((num_eqn[1], 1))), axis=1)

        # .* means occurs anywhere in line and, first \ means second \ can be interpreted
        # and second \ ensures recognition of marker
        rate_coeff_start_mark = str('\\' + chem_scheme_markers[9])
        # . means match with anything except a new line character, when followed by a *
        # means match zero or more times (so now we match with all characters in the line
        # except for new line characters, \\ ensures the marker
        # is recognised
        if eqn_sec == 1:  # end of reaction rate coefficient part is start of equation part
            rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[10])
        else:  # end of reaction rate coefficient part is end of line
            rate_coeff_end_mark = str('.*\\' + chem_scheme_markers[11])

        # rate coefficient starts and end punctuation
        rate_regex = str(rate_coeff_start_mark + rate_coeff_end_mark)
        # rate coefficient expression in a string
        rate_ex = re.findall(rate_regex, line)[0][1:-1].strip()

        # convert fortran-type scientific notation to python type
        rate_ex = formatting.SN_conversion(rate_ex)
        # convert the rate coefficient expressions into Python readable commands
        rate_ex = formatting.convert_rate_mcm(rate_ex)
        if (rate_ex.find('EXP') != -1):
            print('Error in reaction rate coefficient expression: ', rate_ex)
            sys.exit()

        # store the reaction rate coefficient for this equation
        # (/s once any inputs applied)
        reac_coef.append(rate_ex)

        # extract the stoichiometric number of the component in current equation
        reactant_step = 0
        product_step = 0
        stoich_regex = r"^\d*\.\d*|^\d*"
        numr = len(reactants)  # number of reactants in this equation

        # left hand side of equations (losses)
        for reactant in reactants:

            if (re.findall(stoich_regex, reactant)[0] != ''):
                stoich_num = float(re.findall(stoich_regex, reactant)[0])
                # name with no stoich number
                name_only = re.sub(stoich_regex, '', reactant)
            elif (re.findall(stoich_regex, reactant)[0] == ''):
                stoich_num = 1.0
                name_only = reactant

            # store stoichiometry
            rstoi[eqn_step, reactant_step] = stoich_num
            jac_stoi[eqn_step, reactant_step] = -1 * stoich_num

            if name_only not in comp_namelist:  # if new component encountered
                comp_namelist.append(
                    name_only)  # add to chemical scheme name list

                # convert MCM chemical names to SMILES
                if name_only in comp_name:
                    # index where xml file name matches reaction component name
                    name_indx = comp_name.index(name_only)
                    name_SMILE = comp_smil[name_indx]  # SMILES of component
                else:
                    print(
                        str('Error: inside eqn_parser, chemical scheme name ' +
                            str(name_only) + ' not found in xml file'))
                    sys.exit()

                comp_list.append(name_SMILE)  # list SMILE names
                name_indx = comp_num  # allocate index to this species
                # Generate pybel
                Pybel_object = pybel.readstring('smi', name_SMILE)
                # append to Pybel object list
                Pybel_objects.append(Pybel_object)

                # check if alkoxy radical present in this component and that component is organic
                if ('[O]' in name_SMILE):
                    if ('C' in name_SMILE or 'C' in name_SMILE):
                        if (name_SMILE !=
                                'C[O]'):  # ensure it's not carbon monoxide
                            # if it is an organic alkoxy radical add its index to list
                            RO_indx.append(comp_num)

                comp_num += 1  # number of unique species

            else:  # if it's a species already encountered it will be in comp_list
                # existing index
                name_indx = comp_namelist.index(name_only)

            # store reactant index
            # check if index already present - i.e. component appears more than once
            # as a reactant in this reaction
            if sum(rindx[eqn_step, 0:reactant_step] == int(name_indx)) > 0:
                # get existing index of this component
                exist_indx = (np.where(
                    rindx[eqn_step, 0:reactant_step] == (int(name_indx))))[0]
                # add to existing stoichiometry
                rstoi[eqn_step, exist_indx] += rstoi[eqn_step, reactant_step]
                jac_stoi[eqn_step,
                         exist_indx] += -1 * rstoi[eqn_step, reactant_step]
                # remove stoichiometry added above
                rstoi[eqn_step, reactant_step] = 0
                jac_stoi[eqn_step, reactant_step] = 0
                reactant_step -= 1  # ignore this duplicate
            else:
                rindx[eqn_step, reactant_step] = int(name_indx)
                y_arr[eqn_step, reactant_step] = int((eqn_step * max_no_reac) +
                                                     reactant_step)
                y_rind = np.append(y_rind, int(name_indx))
                rr_arr = np.append(rr_arr, int(eqn_step))

            reactant_step += 1

        # number of reactants in this equation
        nreac[eqn_step] = int(reactant_step)

        # record 1D array of stoichiometries per equation
        rstoi_flat = np.append(rstoi_flat, rstoi[eqn_step,
                                                 0:int(reactant_step)])

        # right hand side of equations (gains)
        for product in products:

            if (re.findall(stoich_regex, product)[0] != ''):
                stoich_num = float(re.findall(stoich_regex, product)[0])
                name_only = re.sub(stoich_regex, '',
                                   product)  # name with no stoich number

            elif (re.findall(stoich_regex, product)[0] == ''):
                stoich_num = 1.0
                name_only = product

            # store stoichiometry
            pstoi[eqn_step, product_step] = stoich_num
            jac_stoi[eqn_step, reactant_step + product_step] = 1 * stoich_num
            if name_only not in comp_namelist:  # if new component encountered
                comp_namelist.append(name_only)

                # convert MCM chemical names to SMILES
                # index where xml file name matches reaction component name
                if name_only in comp_name:
                    name_indx = comp_name.index(name_only)
                    name_SMILE = comp_smil[name_indx]
                else:
                    print('Error: inside eqn_interr, chemical scheme name ' +
                          str(name_only) + ' not found in xml file')
                    sys.exit()

                comp_list.append(
                    name_SMILE)  # list SMILE string of parsed species
                name_indx = comp_num  # allocate index to this species

                # generate pybel object
                Pybel_object = pybel.readstring('smi', name_SMILE)
                # append to Pybel object list
                Pybel_objects.append(Pybel_object)

                # check if alkoxy radical present in this component and that component is organic
                if ('[O]' in name_SMILE):
                    if ('C' in name_SMILE or 'C' in name_SMILE):
                        if (name_SMILE !=
                                'C[O]'):  # ensure it's not carbon monoxide
                            # if it is an organic alkoxy radical add its index to list
                            RO_indx.append(comp_num)

                comp_num += 1  # number of unique species

            else:  # if it's a species already encountered
                # index of component already listed
                name_indx = comp_namelist.index(name_only)

            # store product index
            # check if index already present - i.e. component appears more than once
            if sum(pindx[eqn_step, 0:product_step] == int(name_indx)) > 0:
                # get existing index of this component
                exist_indx = (np.where(
                    pindx[eqn_step, 0:product_step] == (int(name_indx))))[0]
                # add to existing stoichiometry
                pstoi[eqn_step, exist_indx] += pstoi[eqn_step, product_step]
                jac_stoi[eqn_step, reactant_step +
                         exist_indx] += 1 * pstoi[eqn_step, product_step]
                # remove stoichiometry added above
                pstoi[eqn_step, product_step] = 0
                jac_stoi[eqn_step, reactant_step + product_step] = 0
                product_step -= 1  # ignore this duplicate
            else:
                pindx[eqn_step, product_step] = int(name_indx)
                rr_arr_p = np.append(rr_arr_p, int(eqn_step))
                y_pind = np.append(y_pind, int(name_indx))

            product_step += 1

        # number of products in this equation
        nprod[eqn_step] = int(product_step)
        # record 1D array of stoichiometries per equation
        pstoi_flat = np.append(pstoi_flat, pstoi[eqn_step,
                                                 0:int(product_step)])

        # now that total number of components (reactants and products)
        # in an equation is known, replicate the reactant indices over all
        # components
        tot_comp = nreac[eqn_step] + nprod[eqn_step]
        for i in range(nreac[eqn_step]):
            jac_den_indx[eqn_step,
                         i * tot_comp:(i + 1) * tot_comp] = rindx[eqn_step, i]
            # also replicate the stoichiometries for every reactant
            if (i > 0):
                jac_stoi[eqn_step, i * tot_comp:(i + 1) *
                         tot_comp] = jac_stoi[eqn_step, 0:tot_comp]
# number of Jacobian elements affected by this equation
        njac[eqn_step, 0] = tot_comp * nreac[eqn_step]

    # account for gas-phase in Jacobian denominator index
    jac_den_indx += (comp_num + 2)

    # remove fillers and flatten index for arranging concentrations ready for reaction rate coefficient calculation
    y_arr_aq = y_arr[y_arr != -9999]  # remove fillers
    y_rind_aq = y_rind.astype(int)  # ensure integer type
    uni_y_rind_aq = (np.unique(y_rind)).astype(
        int)  # unique index of reactants
    y_pind_aq = y_pind.astype(int)  # ensure integer type
    uni_y_pind_aq = (np.unique(y_pind)).astype(int)  # unique index of products
    rr_arr_aq = rr_arr.astype(int)  # ensure integer type
    rr_arr_p_aq = rr_arr_p.astype(int)  # ensure integer type
    # colptrs for sparse matrix of the change to reactants per equation
    reac_col_aq = np.cumsum(nreac) - nreac
    # colptrs for sparse matrix of the change to products per equation
    prod_col_aq = np.cumsum(nprod) - nprod
    if (len(reac_col_aq) > 0):  # if aqueous-phase reaction present
        # include final columns
        reac_col_aq = np.append(reac_col_aq, reac_col_aq[-1] + nreac[-1])
        prod_col_aq = np.append(prod_col_aq, prod_col_aq[-1] + nprod[-1])

    # tag other aqueous-phase arrays
    rindx_aq = rindx
    pindx_aq = pindx
    rstoi_aq = rstoi
    pstoi_aq = pstoi
    jac_stoi_aq = jac_stoi
    rstoi_flat_aq = rstoi_flat
    pstoi_flat_aq = pstoi_flat
    nreac_aq = nreac
    nprod_aq = nprod
    reac_coef_aq = reac_coef
    jac_den_indx_aq = jac_den_indx.astype(int)
    njac_aq = njac.astype(int)
    jac_indx_aq = jac_indx
    jac_indx_aq = jac_indx_aq.astype(int)

    return (rindx_g, rstoi_g, pindx_g, pstoi_g, reac_coef_g, nreac_g, nprod_g,
            jac_stoi_g, jac_den_indx_g, njac_g, jac_indx_g, y_arr_g, y_rind_g,
            uni_y_rind_g, y_pind_g, uni_y_pind_g, reac_col_g, prod_col_g,
            rstoi_flat_g, pstoi_flat_g, rr_arr_g, rr_arr_p_g, rindx_aq,
            rstoi_aq, pindx_aq, pstoi_aq, reac_coef_aq, nreac_aq, nprod_aq,
            jac_stoi_aq, jac_den_indx_aq, njac_aq, jac_indx_aq, y_arr_aq,
            y_rind_aq, uni_y_rind_aq, y_pind_aq, uni_y_pind_aq, reac_col_aq,
            prod_col_aq, rstoi_flat_aq, pstoi_flat_aq, rr_arr_aq, rr_arr_p_aq,
            comp_namelist, comp_list, Pybel_objects, comp_num, RO_indx)
예제 #59
0
if __name__=="__main__":


    for idx, sdf_dataset in enumerate(DATA_SETS):
        logp_dataset = dict()
        database = pybel.readfile('sdf', sdf_dataset)
        #read the molecules in the sdf files
        for sd_record in database:

            mol_id = sd_record.data['MOLECULEID']
            file_path = mol2_file_path[idx] + mol_id+'.mol2'
            molecule_coords = get_coords(file_path)


            #molecule.data.keys() gives all the properties
            molecule = pybel.readstring("smi", sd_record.data['SMILES'])
            #add hydrogen
            molecule.OBMol.AddHydrogens()
            #minimize the energy
            molecule.make3D(forcefield="gaff", steps=STEPS)
            molecule.localopt(forcefield="gaff", steps=STEPS)

            #get the coordinates
            molecule_coords = []
            for atom in molecule.atoms:
                molecule_coords.append(atom.coords)

            # #save in the data set
            logp_dataset[mol_id] = {'logp':float(sd_record.data['logPow {measured}']),
                                    'coords':np.array(molecule_coords)}
            molecule.write("pdb", f"{databae_path}/pdbs/{sd_record.data['MOLECULEID']}.pdb")
예제 #60
0
	[comp_smil, comp_name] = xml_interr.xml_interr(str(cwd + '/example_xml.xml'))

# convert chemical scheme component names into SMILEs
comp_smiles = [] # holder
for name in comp_names[0:-2]: # omit H20 and core at end of comp_names
	comp_smiles.append(comp_smil[comp_name.index(name)])

SOA0 = 0.
for i in range(1, num_sb):
	# calculate SOA (*1.0E-12 to convert from g/cc (air) to ug/m3 (air))
	SOA0 += (((y[:, num_comp*i:num_comp*(i+1)-2]/si.N_A)*y_mw[0:-2]*1.0e12).sum(axis=1))

Pybel_objects = [] # holder for Pybel object names
for i in range(num_comp-2): # component loop
	# generate pybel object
	Pybel_objects.append(pybel.readstring('smi', comp_smiles[i]))

# point to umansysprop folder
sys.path.insert(1, (cwd + '/umansysprop')) # address for updated version
	
from umansysprop import boiling_points
from umansysprop import vapour_pressures
from umansysprop import liquid_densities

NA = si.Avogadro # Avogadro's number (molecules/mol)
# vapour pressures of components, excluding water and core at end
Psat = np.zeros((1, num_comp-2))
TEMP = 298.15 # temperature (K) 


for i in range(num_comp-2): # component loop