def generate(self, target=[], ratios=np.linspace(0, 3, 500)): if target != []: target = preprocessing.process_smiles(target) for i in range(0, len(target)): self.X_test[i] = target[i] x_latent = self.smiles_to_latent_model.predict(self.X_test) molecules = [] smiles_arr = [] for i in range(0, len(target) - 1): latent1 = x_latent[i:i + 1] latent0 = x_latent[i + 2:i + 3] for r in ratios: rlatent = (1.0 - r) * latent0 + r * latent1 smiles = self.latent_to_smiles(rlatent) mol = Chem.MolFromSmiles(smiles) if mol and ((smiles in smiles_arr) == False): print(smiles) molecules.append(molecule(smiles)) smiles_arr.append(smiles) return molecules
def gjfTodat(self, directory, fileName): atomsNum = 0 lineNum = -1 atom_start = 0 connect_start = 0 geom = '' multi_done = 0 atom_done = 0 connect_done = 0 fr = file(directory + '/' + fileName, 'r') tmp_lines = fr.readlines() for tmp_line in tmp_lines: lineNum += 1 if multi_done != 1: tmp_m = self.pattern_multi.match(tmp_line) if tmp_m: atom_start = lineNum + 1 multi_done = 1 elif atom_done != 1: tmp_m = self.pattern_atom.match(tmp_line) if tmp_m: atomsNum += 1 else: atom_done = 1 elif connect_done != 1: tmp_m = self.pattern_connnect.match(tmp_line) if tmp_m: tmp_num = int(tmp_m.group(1)) if (connect_done + tmp_num) == 1: connect_done -= 1 if tmp_num == 1: connect_start = lineNum if tmp_num == atomsNum: connect_done = 1 else: connect_done = 0 if connect_start != 0: # print 'connectivity info extracted successfully!' pass else: print 'connectivity info extracted not successfully!' geom = geometryExtractor.mominertGeometryExtractor(tmp_lines[atom_start: atom_start + atomsNum]) molecule1 = chem.molecule(geom=tmp_lines[atom_start: atom_start + atomsNum], connect=tmp_lines[connect_start: connect_start + atomsNum], atomsNum=atomsNum) if __OOSYSTEM__ == True: molecule1.fulfillBonds() # molecule1.displayBonds() rotations = molecule1.getRotations() fr.close() fw = file(directory + '/' + fileName[7:-4] + '.dat', 'w') fw.write(fileName[7:-4] + '\n' + 'ANGS\n' + str(atomsNum) + '\n' + geom) fw.write(''.join(tmp_rot.singleGroupInfo() for tmp_rot in rotations)) # for tmp_rot in rotations: # fw.write(tmp_rot.group1Info()) fw.write('0 0\n\n\n\n\n\n\n') fw.close() return fileName[7:-4] + '.dat'
def generate(self, target=[], hit_rate=100, preprocessing_instance=None): if target != []: target = preprocessing_instance.process_smiles(target) for i in range(0, len(target)): self.X_test[i] = target[i] x_latent = self.smiles_to_latent_model.predict(self.X_test) molecules = [] smiles_arr = [] for i in range(0, len(target)-1): latent1 = x_latent[i:i+1] latent0 = x_latent[i+2:i+3] for r in np.linspace(0,3,hit_rate): rlatent = (1.0-r)*latent0 + r*latent1 smiles = self.latent_to_smiles(rlatent, preprocessing_instance) mol = Chem.MolFromSmiles(smiles) if mol and ((smiles in smiles_arr) == False): print(smiles, "adding to array!", "\n\n") molecules.append(molecule(smiles)) smiles_arr.append(smiles) return molecules
def readConformers(self, fileName, path=''): # varibles molecules = [] # flags energyStart_done = 0 energy_done = 0 atomStart_done = 0 coordinate_done = 0 # temporary variables tmp_energy = 0.0 tmp_geom = [] tmp_coordinate = '' atomStartLine = 0 fr = file(os.path.join(path, fileName), 'r') tmp_lines = fr.readlines() for (index, tmp_line) in enumerate(tmp_lines): if atomStart_done != 1: tmp_m = Balloon.pattern_sdfAtomStart.match(tmp_line) if tmp_m: atomStartLine = index + 1 atomStart_done = 1 elif coordinate_done != 1: tmp_m = Balloon.pattern_sdfAtom.match(tmp_line) if tmp_m: tmp_coordinate = ''.join([ tmp_m.group(4), ' ', tmp_m.group(1), ' ', tmp_m.group(2), ' ', tmp_m.group(3) ]) tmp_geom.append(tmp_coordinate) else: coordinate_done = 1 elif energyStart_done != 1: tmp_m = Balloon.pattern_sdfEnergyStart.match(tmp_line) if tmp_m: energyStart_done = 1 elif energy_done != 1: tmp_m = Balloon.pattern_sdfEnergy.match(tmp_line) if tmp_m: tmp_energy = float(tmp_m.group(1)) tmp_molecule = chem.molecule() tmp_molecule.getGjfGeom(tmp_geom) tmp_molecule.setZPE(tmp_energy) molecules.append(tmp_molecule) atomStart_done = 0 coordinate_done = 0 energyStart_done = 0 tmp_energy = 0.0 tmp_geom = [] tmp_coordinate = '' fr.close() return molecules
def readGjfFile(self, fileName, directory='', moleculeLabel=''): #definition of flags gjfCommand_done = -1 gjfMulti_done = -1 geomDone = -1 #definition of temporary variables tmp_m = [] #match result tmp_multi = 1 tmp_geom = '' lineStart = 0 lineEnd = 0 # print fileName gjfFile = file(os.path.join(directory, fileName), 'r') tmp_lines = gjfFile.readlines() for (lineNum, tmp_line) in enumerate(tmp_lines): if gjfCommand_done != 1: tmp_m = self.pattern_gjfCommand.match(tmp_line) if tmp_m: gjfCommand_done = 1 elif gjfMulti_done != 1: tmp_m = self.pattern_gjfMulti.match(tmp_line) if tmp_m: lineStart = lineNum tmp_multi = int(tmp_m.group(2)) geomDone = 0 gjfMulti_done = 1 elif geomDone != 1: tmp_m = self.pattern_blankLine.match(tmp_line) if tmp_m: lineEnd = lineNum tmp_geom = tmp_lines[lineStart+1: lineEnd] geomDone = 1 if geomDone != 1: print 'Sorry! The input file is not a standard gjf file!' else: pass # print 'Gjf file read in successfully!' gjfFile.close() self.mole = chem.molecule() self.mole.getGjfGeom(tmp_geom) self.mole.setSpinMultiplicity(tmp_multi) self.mole.calcFormula() if moleculeLabel == '': self.mole.setLabel(self.mole.formula) else: self.mole.setLabel(moleculeLabel) self.mole.fulfillBonds() return self.mole
def readGjfFile(self, fileName, directory='', moleculeLabel=''): #definition of flags gjfCommand_done = -1 gjfMulti_done = -1 geomDone = -1 #definition of temporary variables tmp_m = [] #match result tmp_multi = 1 tmp_geom = '' lineStart = 0 lineEnd = 0 # print fileName gjfFile = file(os.path.join(directory, fileName), 'r') tmp_lines = gjfFile.readlines() for (lineNum, tmp_line) in enumerate(tmp_lines): if gjfCommand_done != 1: tmp_m = self.pattern_gjfCommand.match(tmp_line) if tmp_m: gjfCommand_done = 1 elif gjfMulti_done != 1: tmp_m = self.pattern_gjfMulti.match(tmp_line) if tmp_m: lineStart = lineNum tmp_multi = int(tmp_m.group(2)) geomDone = 0 gjfMulti_done = 1 elif geomDone != 1: tmp_m = self.pattern_blankLine.match(tmp_line) if tmp_m: lineEnd = lineNum tmp_geom = tmp_lines[lineStart + 1:lineEnd] geomDone = 1 if geomDone != 1: print 'Sorry! The input file is not a standard gjf file!' else: pass # print 'Gjf file read in successfully!' gjfFile.close() self.mole = chem.molecule() self.mole.getGjfGeom(tmp_geom) self.mole.setSpinMultiplicity(tmp_multi) self.mole.calcFormula() if moleculeLabel == '': self.mole.setLabel(self.mole.formula) else: self.mole.setLabel(moleculeLabel) self.mole.fulfillBonds() return self.mole
def readConformers(self, fileName, path=''): # varibles molecules = [] # flags energyStart_done = 0 energy_done = 0 atomStart_done = 0 coordinate_done = 0 # temporary variables tmp_energy = 0.0 tmp_geom = [] tmp_coordinate = '' atomStartLine = 0 fr = file(os.path.join(path, fileName), 'r') tmp_lines = fr.readlines() for (index, tmp_line) in enumerate(tmp_lines): if atomStart_done != 1: tmp_m = Balloon.pattern_sdfAtomStart.match(tmp_line) if tmp_m: atomStartLine = index + 1 atomStart_done = 1 elif coordinate_done != 1: tmp_m = Balloon.pattern_sdfAtom.match(tmp_line) if tmp_m: tmp_coordinate = ''.join([tmp_m.group(4), ' ', tmp_m.group(1), ' ', tmp_m.group(2), ' ', tmp_m.group(3)]) tmp_geom.append(tmp_coordinate) else: coordinate_done = 1 elif energyStart_done != 1: tmp_m = Balloon.pattern_sdfEnergyStart.match(tmp_line) if tmp_m: energyStart_done = 1 elif energy_done != 1: tmp_m = Balloon.pattern_sdfEnergy.match(tmp_line) if tmp_m: tmp_energy = float(tmp_m.group(1)) tmp_molecule = chem.molecule() tmp_molecule.getGjfGeom(tmp_geom) tmp_molecule.setZPE(tmp_energy) molecules.append(tmp_molecule) atomStart_done = 0 coordinate_done = 0 energyStart_done = 0 tmp_energy = 0.0 tmp_geom = [] tmp_coordinate = '' fr.close() return molecules
def readGjfGeom(self, gjfGeom, moleculeLabel='', multiplicity=1): tmp_multi = multiplicity tmp_geom = gjfGeom.strip() tmp_geom = tmp_geom.split('\n') self.mole = chem.molecule() self.mole.getGjfGeom(tmp_geom) self.mole.setSpinMultiplicity(tmp_multi) self.mole.calcFormula() if moleculeLabel == '': self.mole.setLabel(self.mole.formula) else: self.mole.setLabel(moleculeLabel) self.mole.fulfillBonds() return self.mole
def readGjfGeom(self, gjfGeom, moleculeLabel='', multiplicity=1): tmp_multi = multiplicity tmp_geom = gjfGeom.strip() tmp_geom = tmp_geom.split('\n') self.mole = chem.molecule() try: self.mole.getGjfGeom(tmp_geom) except: print 'Sorry! The input file is not in the expected format!' raise common_api.readGjfGeomError self.mole.setSpinMultiplicity(tmp_multi) self.mole.calcFormula() tmp_m = self.pattern_formula.match(self.mole.formula) if tmp_m: if tmp_m.group(1) == '': carbonNumber = 1 else: carbonNumber = int(tmp_m.group(1)) if carbonNumber < 3: print 'Please submit a species with carbon atoms >= 3!' raise common_api.carbonLessThan3Error else: print 'Sorry! Not an alkane or alkene or their radical! Not supported temporarily!' raise common_api.beyondSpeciesRangeError if moleculeLabel == '': self.mole.setLabel(self.mole.formula) else: self.mole.setLabel(moleculeLabel) try: self.mole.fulfillBonds() except: print 'mole.fulfillBonds() failed!' raise common_api.readGjfGeomError if self.mole.existRings(): print 'Sorry! Structure with rings is not supported temporarily!' raise common_api.ringExistingError return self.mole
def genFrogInputFromGjf(self, fileList, path='', jobName=''): for tmp_file in fileList: gjfCommand_done = -1 gjfMulti_done = -1 geomDone = -1 lineStart = 0 lineEnd = 0 if re.search('[Tt][sS]', tmp_file): self.setTS(True) else: self.setTS(False) if jobName == '': tmp_dir = tmp_file[0:-4] + '_1_confSearch' else: tmp_dir = jobName if path == '': tmp_dir_path = tmp_dir fr = file(tmp_file, 'r') else: tmp_dir_path = os.path.join(path, tmp_dir) fr = file(os.path.join(path, tmp_file), 'r') tmp_lines = fr.readlines() for (lineNum, tmp_line) in enumerate(tmp_lines): if gjfCommand_done != 1: tmp_m = pattern_gjfCommand.match(tmp_line) if tmp_m: gjfCommand_done = 1 elif gjfMulti_done != 1: tmp_m = pattern_gjfMulti.match(tmp_line) if tmp_m: lineStart = lineNum multi = int(tmp_m.group(2)) geomDone = 0 gjfMulti_done = 1 elif geomDone != 1: tmp_m = pattern_blankLine.match(tmp_line) if tmp_m: lineEnd = lineNum geomDone = 1 if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir) os.mkdir(tmp_dir) fw = file(os.path.join(tmp_dir_path, tmp_dir+'.gjf'), 'w') fw.write( '''%mem=28GB %nprocshared=12 ''') if self._TS == False: if multi != 1: fw.write('#p ub3lyp/cbsb7 opt freq') else: fw.write('#p b3lyp/cbsb7 opt freq') else: if multi != 1: fw.write('#p ub3lyp/cbsb7 opt=(TS, calcfc) freq') else: fw.write('#p b3lyp/cbsb7 opt=(TS, calcfc) freq') if self._dispersionD3 == False: fw.write('\n') else: fw.write(' EmpiricalDispersion=GD3\n') fw.write(''' using ub3lyp/6-31G(d) to scan ''') fw.write(''.join(tmp_lines[lineStart: lineEnd]) + '\n\n\n\n\n') fw.close() os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + fw.name + ' > log_dos2unix.txt 2>&1') fw = file(os.path.join(tmp_dir_path, tmp_dir+'.xyz'), 'w') fw.write(str(lineEnd - lineStart - 1) + '\n') fw.write(tmp_file[0:-4] + '\n') fw.write(''.join(tmp_lines[lineStart+1: lineEnd]) + '\n\n\n\n\n') fw.close() os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + fw.name + ' > log_dos2unix.txt 2>&1') os.system('E:\\softwares\\OpenBabel-2.3.72\\babel.exe -ixyz ' + os.path.join(tmp_dir_path, tmp_dir+'.xyz') + ' -osdf ' + os.path.join(tmp_dir_path, tmp_dir+'.sdf') + ' > log_dos2unix.txt 2>&1') openBabelWrong = False tmp_molecule = chem.molecule(geom=tmp_lines[lineStart+1: lineEnd]) tmp_molecule.fulfillBonds() if os.path.exists(os.path.join(tmp_dir_path, tmp_dir+'.sdf')): fr = file(os.path.join(tmp_dir_path, tmp_dir+'.sdf'), 'r') tmp2_lines = fr.readlines() fr.close() tmp_num = map(int, tmp2_lines[3].split()[0:2]) if tmp_num[1] < len(tmp_molecule.bonds): openBabelWrong = True elif tmp_num[1] > len(tmp_molecule.bonds): print 'Error! Open babel bond number > len(tmp_molecule.bonds)', tmp_dir else: openBabelWrong = True if openBabelWrong: print 'Warning! Open babel transformation bug! Chem used to regenerate the bonds!', tmp_dir tmp_molecule.generateSDFFile(directory=tmp_dir_path, fileName=tmp_dir+'.sdf', moleculeLabel=tmp_file[0:-4]) # fw = file(os.path.join(tmp_dir_path, tmp_dir+'.sdf'), 'w') # tmp2_lines[3] = ''.join([' ', '%2d'%tmp_num[0], ' ', '%2d'%(len(tmp_molecule.bonds)), tmp2_lines[3][6:]]) # fw.writelines(tmp2_lines[0:3+tmp_num[0]+1]) # for tmp_bond in tmp_molecule.bonds: # fw.write(''.join([' ', '%2d'%tmp_bond.atom1.label, ' ', '%2d'%tmp_bond.atom2.label, ' ', '%2d'%tmp_bond.bondOrder, ' 0 0 0 0\n'])) # fw.write( # '''M END # $$$$ # ''') # fw.close() os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + os.path.join(tmp_dir_path, tmp_dir+'.sdf') + ' > log_dos2unix.txt 2>&1') fw = file(os.path.join(tmp_dir_path, tmp_dir+'.job'), 'w') fw.write( # cce cluster '''#!/bin/sh cd ''' + self.jobLocation + '/' + tmp_dir + ''' python /home/apps/Frog2/www_iMolecule.py -osmi ''' + tmp_dir + '''.smiles -logFile ''' + tmp_dir + '''.log -ounsolved Unsolved.data -wrkPath . -eini 100.0 -mcsteps 100 -emax 50 -i3Dsdf ''' + tmp_dir + '''.sdf -osdf out_''' + tmp_dir + '''.sdf -unambiguate -mini -multi 250 &>> log_''' + tmp_dir + '''.txt ''') fw.close() os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + fw.name + ' > log_dos2unix.txt 2>&1')
coordinate_done = 0 if coordinate_done != 1: tmp_m = pattern_endline.match(tmp_line) if tmp_m: if i > geom_start: geom_end = i coordinate_done = 1 tmp_m = pattern_energy.match(tmp_line) if tmp_m: tmp_energy = float(tmp_m.group(1)) tmp_m = pattern_optimized.match(tmp_line) if tmp_m: energy.append(tmp_energy) if (geom_end - geom_start) != atomsNum: print 'Error! The number of atoms is not correct!' tmp_mole = chem.molecule() tmp_mole.getLogGeom( tmp_lines[geom_start:geom_start + atomsNum]) tmp_mole.changeLabel(tmp_file2[0:-4]) geoms.append(tmp_mole) dihedral_done = 0 standard_done = 1 coordinate_done = 1 energy_done = 1 optimized_done = 1 elif dihedral_done != 1: tmp_m = pattern_dihedral.match(tmp_line) if tmp_m: tmp_dihedral = float(tmp_m.group(1)) if len(dihedral) > 0: if tmp_dihedral < dihedral[-1]:
def genBalloonInputFromGjf(self, fileList, path='', jobName=''): for tmp_file in fileList: gjfCommand_done = -1 gjfMulti_done = -1 geomDone = -1 lineStart = 0 lineEnd = 0 if re.search('[Tt][sS]', tmp_file): self.setTS(True) else: self.setTS(False) if jobName == '': tmp_dir = tmp_file[0:-4] + '_1_confSearch' else: tmp_dir = jobName if path == '': tmp_dir_path = tmp_dir fr = file(tmp_file, 'r') else: tmp_dir_path = os.path.join(path, tmp_dir) fr = file(os.path.join(path, tmp_file), 'r') tmp_lines = fr.readlines() for (lineNum, tmp_line) in enumerate(tmp_lines): if gjfCommand_done != 1: tmp_m = pattern_gjfCommand.match(tmp_line) if tmp_m: gjfCommand_done = 1 elif gjfMulti_done != 1: tmp_m = pattern_gjfMulti.match(tmp_line) if tmp_m: lineStart = lineNum multi = int(tmp_m.group(2)) geomDone = 0 gjfMulti_done = 1 elif geomDone != 1: tmp_m = pattern_blankLine.match(tmp_line) if tmp_m: lineEnd = lineNum geomDone = 1 if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir) os.mkdir(tmp_dir) fw = file(os.path.join(tmp_dir_path, tmp_dir + '.gjf'), 'w') fw.write('''%mem=28GB %nprocshared=12 ''') if self._TS == False: if multi != 1: fw.write('#p ub3lyp/cbsb7 opt freq') else: fw.write('#p b3lyp/cbsb7 opt freq') else: if multi != 1: fw.write('#p ub3lyp/cbsb7 opt=(TS, calcfc) freq') else: fw.write('#p b3lyp/cbsb7 opt=(TS, calcfc) freq') if self._dispersionD3 == False: fw.write('\n') else: fw.write(' EmpiricalDispersion=GD3\n') fw.write(''' using ub3lyp/6-31G(d) to scan ''') fw.write(''.join(tmp_lines[lineStart:lineEnd]) + '\n\n\n\n\n') fw.close() os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + fw.name + ' > log_dos2unix.txt 2>&1') fw = file(os.path.join(tmp_dir_path, tmp_dir + '.xyz'), 'w') fw.write(str(lineEnd - lineStart - 1) + '\n') fw.write(tmp_file[0:-4] + '\n') fw.write(''.join(tmp_lines[lineStart + 1:lineEnd]) + '\n\n\n\n\n') fw.close() os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + fw.name + ' > log_dos2unix.txt 2>&1') os.system('E:\\softwares\\OpenBabel-2.3.72\\babel.exe -ixyz ' + os.path.join(tmp_dir_path, tmp_dir + '.xyz') + ' -osdf ' + os.path.join(tmp_dir_path, tmp_dir + '.sdf') + ' > log_dos2unix.txt 2>&1') fr = file(os.path.join(tmp_dir_path, tmp_dir + '.sdf'), 'r') tmp2_lines = fr.readlines() fr.close() tmp_num = map(int, tmp2_lines[3].split()[0:2]) tmp_molecule = chem.molecule(geom=tmp_lines[lineStart + 1:lineEnd]) tmp_molecule.fulfillBonds() if tmp_num[1] < len(tmp_molecule.bonds): print 'Warning! Open babel transformation bug! Chem used to regenerate the bonds!', tmp_dir fw = file(os.path.join(tmp_dir_path, tmp_dir + '.sdf'), 'w') tmp2_lines[3] = ''.join([ ' ', '%2d' % tmp_num[0], ' ', '%2d' % (len(tmp_molecule.bonds)), tmp2_lines[3][6:] ]) fw.writelines(tmp2_lines[0:3 + tmp_num[0] + 1]) for tmp_bond in tmp_molecule.bonds: fw.write(''.join([ ' ', '%2d' % tmp_bond.atom1.label, ' ', '%2d' % tmp_bond.atom2.label, ' ', '%2d' % tmp_bond.bondOrder, ' 0 0 0 0\n' ])) fw.write('''M END $$$$ ''') fw.close() elif tmp_num[1] > len(tmp_molecule.bonds): print 'Error! Open babel bond number > len(tmp_molecule.bonds)', tmp_dir os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + os.path.join(tmp_dir_path, tmp_dir + '.sdf') + ' > log_dos2unix.txt 2>&1') fw = file(os.path.join(tmp_dir_path, tmp_dir + '.job'), 'w') fw.write( # linux bash '''#!/bin/sh cd ''' + self.jobLocation + '/' + tmp_dir + ''' /home/apps/balloon/balloon -f /home/hetanjin/apps/balloon/MMFF94.mff --nconfs 300 --stereo --addConformerNumberToName ''' + tmp_dir + '''.sdf out_''' + tmp_dir + '''.sdf &>> log_''' + tmp_dir + '''.txt ''') fw.close() os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + fw.name + ' > log_dos2unix.txt 2>&1')
def genFrogInputFromGjf(self, fileList, path='', jobName=''): for tmp_file in fileList: gjfCommand_done = -1 gjfMulti_done = -1 geomDone = -1 lineStart = 0 lineEnd = 0 if re.search('[Tt][sS]', tmp_file): self.setTS(True) else: self.setTS(False) if jobName == '': tmp_dir = tmp_file[0:-4] + '_1_confSearch' else: tmp_dir = jobName if path == '': tmp_dir_path = tmp_dir fr = file(tmp_file, 'r') else: tmp_dir_path = os.path.join(path, tmp_dir) fr = file(os.path.join(path, tmp_file), 'r') tmp_lines = fr.readlines() for (lineNum, tmp_line) in enumerate(tmp_lines): if gjfCommand_done != 1: tmp_m = pattern_gjfCommand.match(tmp_line) if tmp_m: gjfCommand_done = 1 elif gjfMulti_done != 1: tmp_m = pattern_gjfMulti.match(tmp_line) if tmp_m: lineStart = lineNum multi = int(tmp_m.group(2)) geomDone = 0 gjfMulti_done = 1 elif geomDone != 1: tmp_m = pattern_blankLine.match(tmp_line) if tmp_m: lineEnd = lineNum geomDone = 1 if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir) os.mkdir(tmp_dir) fw = file(os.path.join(tmp_dir_path, tmp_dir + '.gjf'), 'w') fw.write('''%mem=28GB %nprocshared=12 ''') if self._TS == False: if multi != 1: fw.write('#p ub3lyp/cbsb7 opt freq') else: fw.write('#p b3lyp/cbsb7 opt freq') else: if multi != 1: fw.write('#p ub3lyp/cbsb7 opt=(TS, calcfc) freq') else: fw.write('#p b3lyp/cbsb7 opt=(TS, calcfc) freq') if self._dispersionD3 == False: fw.write('\n') else: fw.write(' EmpiricalDispersion=GD3\n') fw.write(''' using ub3lyp/6-31G(d) to scan ''') fw.write(''.join(tmp_lines[lineStart:lineEnd]) + '\n\n\n\n\n') fw.close() os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + fw.name + ' > log_dos2unix.txt 2>&1') fw = file(os.path.join(tmp_dir_path, tmp_dir + '.xyz'), 'w') fw.write(str(lineEnd - lineStart - 1) + '\n') fw.write(tmp_file[0:-4] + '\n') fw.write(''.join(tmp_lines[lineStart + 1:lineEnd]) + '\n\n\n\n\n') fw.close() os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + fw.name + ' > log_dos2unix.txt 2>&1') os.system('E:\\softwares\\OpenBabel-2.3.72\\babel.exe -ixyz ' + os.path.join(tmp_dir_path, tmp_dir + '.xyz') + ' -osdf ' + os.path.join(tmp_dir_path, tmp_dir + '.sdf') + ' > log_dos2unix.txt 2>&1') openBabelWrong = False tmp_molecule = chem.molecule(geom=tmp_lines[lineStart + 1:lineEnd]) tmp_molecule.fulfillBonds() if os.path.exists(os.path.join(tmp_dir_path, tmp_dir + '.sdf')): fr = file(os.path.join(tmp_dir_path, tmp_dir + '.sdf'), 'r') tmp2_lines = fr.readlines() fr.close() tmp_num = map(int, tmp2_lines[3].split()[0:2]) if tmp_num[1] < len(tmp_molecule.bonds): openBabelWrong = True elif tmp_num[1] > len(tmp_molecule.bonds): print 'Error! Open babel bond number > len(tmp_molecule.bonds)', tmp_dir else: openBabelWrong = True if openBabelWrong: print 'Warning! Open babel transformation bug! Chem used to regenerate the bonds!', tmp_dir tmp_molecule.generateSDFFile(directory=tmp_dir_path, fileName=tmp_dir + '.sdf', moleculeLabel=tmp_file[0:-4]) # fw = file(os.path.join(tmp_dir_path, tmp_dir+'.sdf'), 'w') # tmp2_lines[3] = ''.join([' ', '%2d'%tmp_num[0], ' ', '%2d'%(len(tmp_molecule.bonds)), tmp2_lines[3][6:]]) # fw.writelines(tmp2_lines[0:3+tmp_num[0]+1]) # for tmp_bond in tmp_molecule.bonds: # fw.write(''.join([' ', '%2d'%tmp_bond.atom1.label, ' ', '%2d'%tmp_bond.atom2.label, ' ', '%2d'%tmp_bond.bondOrder, ' 0 0 0 0\n'])) # fw.write( # '''M END # $$$$ # ''') # fw.close() os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + os.path.join(tmp_dir_path, tmp_dir + '.sdf') + ' > log_dos2unix.txt 2>&1') fw = file(os.path.join(tmp_dir_path, tmp_dir + '.job'), 'w') fw.write( # cce cluster '''#!/bin/sh cd ''' + self.jobLocation + '/' + tmp_dir + ''' python /home/apps/Frog2/www_iMolecule.py -osmi ''' + tmp_dir + '''.smiles -logFile ''' + tmp_dir + '''.log -ounsolved Unsolved.data -wrkPath . -eini 100.0 -mcsteps 100 -emax 50 -i3Dsdf ''' + tmp_dir + '''.sdf -osdf out_''' + tmp_dir + '''.sdf -unambiguate -mini -multi 250 &>> log_''' + tmp_dir + '''.txt ''') fw.close() os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + fw.name + ' > log_dos2unix.txt 2>&1')
def gjfTodat(self, directory, fileName): atomsNum = 0 lineNum = -1 atom_start = 0 connect_start = 0 geom = '' multi_done = 0 atom_done = 0 connect_done = 0 fr = file(directory + '/' + fileName, 'r') tmp_lines = fr.readlines() for tmp_line in tmp_lines: lineNum += 1 if multi_done != 1: tmp_m = self.pattern_multi.match(tmp_line) if tmp_m: atom_start = lineNum + 1 multi_done = 1 elif atom_done != 1: tmp_m = self.pattern_atom.match(tmp_line) if tmp_m: atomsNum += 1 else: atom_done = 1 elif connect_done != 1: tmp_m = self.pattern_connnect.match(tmp_line) if tmp_m: tmp_num = int(tmp_m.group(1)) if (connect_done + tmp_num) == 1: connect_done -= 1 if tmp_num == 1: connect_start = lineNum if tmp_num == atomsNum: connect_done = 1 else: connect_done = 0 if connect_start != 0: # print 'connectivity info extracted successfully!' pass else: print 'connectivity info extracted not successfully!' geom = geometryExtractor.mominertGeometryExtractor( tmp_lines[atom_start:atom_start + atomsNum]) molecule1 = chem.molecule( geom=tmp_lines[atom_start:atom_start + atomsNum], connect=tmp_lines[connect_start:connect_start + atomsNum], atomsNum=atomsNum) if __OOSYSTEM__ == True: molecule1.fulfillBonds() # molecule1.displayBonds() rotations = molecule1.getRotations() fr.close() fw = file(directory + '/' + fileName[7:-4] + '.dat', 'w') fw.write(fileName[7:-4] + '\n' + 'ANGS\n' + str(atomsNum) + '\n' + geom) fw.write(''.join(tmp_rot.singleGroupInfo() for tmp_rot in rotations)) # for tmp_rot in rotations: # fw.write(tmp_rot.group1Info()) fw.write('0 0\n\n\n\n\n\n\n') fw.close() return fileName[7:-4] + '.dat'
def genBalloonInputFromGjf(self, fileList, path='', jobName=''): for tmp_file in fileList: gjfCommand_done = -1 gjfMulti_done = -1 geomDone = -1 lineStart = 0 lineEnd = 0 if re.search('[Tt][sS]', tmp_file): self.setTS(True) else: self.setTS(False) if jobName == '': tmp_dir = tmp_file[0:-4] + '_1_confSearch' else: tmp_dir = jobName if path == '': tmp_dir_path = tmp_dir fr = file(tmp_file, 'r') else: tmp_dir_path = os.path.join(path, tmp_dir) fr = file(os.path.join(path, tmp_file), 'r') tmp_lines = fr.readlines() for (lineNum, tmp_line) in enumerate(tmp_lines): if gjfCommand_done != 1: tmp_m = pattern_gjfCommand.match(tmp_line) if tmp_m: gjfCommand_done = 1 elif gjfMulti_done != 1: tmp_m = pattern_gjfMulti.match(tmp_line) if tmp_m: lineStart = lineNum multi = int(tmp_m.group(2)) geomDone = 0 gjfMulti_done = 1 elif geomDone != 1: tmp_m = pattern_blankLine.match(tmp_line) if tmp_m: lineEnd = lineNum geomDone = 1 if os.path.exists(tmp_dir): shutil.rmtree(tmp_dir) os.mkdir(tmp_dir) fw = file(os.path.join(tmp_dir_path, tmp_dir+'.gjf'), 'w') fw.write( '''%mem=28GB %nprocshared=12 ''') if self._TS == False: if multi != 1: fw.write('#p ub3lyp/cbsb7 opt freq') else: fw.write('#p b3lyp/cbsb7 opt freq') else: if multi != 1: fw.write('#p ub3lyp/cbsb7 opt=(TS, calcfc) freq') else: fw.write('#p b3lyp/cbsb7 opt=(TS, calcfc) freq') if self._dispersionD3 == False: fw.write('\n') else: fw.write(' EmpiricalDispersion=GD3\n') fw.write(''' using ub3lyp/6-31G(d) to scan ''') fw.write(''.join(tmp_lines[lineStart: lineEnd]) + '\n\n\n\n\n') fw.close() os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + fw.name + ' > log_dos2unix.txt 2>&1') fw = file(os.path.join(tmp_dir_path, tmp_dir+'.xyz'), 'w') fw.write(str(lineEnd - lineStart - 1) + '\n') fw.write(tmp_file[0:-4] + '\n') fw.write(''.join(tmp_lines[lineStart+1: lineEnd]) + '\n\n\n\n\n') fw.close() os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + fw.name + ' > log_dos2unix.txt 2>&1') os.system('E:\\softwares\\OpenBabel-2.3.72\\babel.exe -ixyz ' + os.path.join(tmp_dir_path, tmp_dir+'.xyz') + ' -osdf ' + os.path.join(tmp_dir_path, tmp_dir+'.sdf') + ' > log_dos2unix.txt 2>&1') fr = file(os.path.join(tmp_dir_path, tmp_dir+'.sdf'), 'r') tmp2_lines = fr.readlines() fr.close() tmp_num = map(int, tmp2_lines[3].split()[0:2]) tmp_molecule = chem.molecule(geom=tmp_lines[lineStart+1: lineEnd]) tmp_molecule.fulfillBonds() if tmp_num[1] < len(tmp_molecule.bonds): print 'Warning! Open babel transformation bug! Chem used to regenerate the bonds!', tmp_dir fw = file(os.path.join(tmp_dir_path, tmp_dir+'.sdf'), 'w') tmp2_lines[3] = ''.join([' ', '%2d'%tmp_num[0], ' ', '%2d'%(len(tmp_molecule.bonds)), tmp2_lines[3][6:]]) fw.writelines(tmp2_lines[0:3+tmp_num[0]+1]) for tmp_bond in tmp_molecule.bonds: fw.write(''.join([' ', '%2d'%tmp_bond.atom1.label, ' ', '%2d'%tmp_bond.atom2.label, ' ', '%2d'%tmp_bond.bondOrder, ' 0 0 0 0\n'])) fw.write( '''M END $$$$ ''') fw.close() elif tmp_num[1] > len(tmp_molecule.bonds): print 'Error! Open babel bond number > len(tmp_molecule.bonds)', tmp_dir os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + os.path.join(tmp_dir_path, tmp_dir+'.sdf') + ' > log_dos2unix.txt 2>&1') fw = file(os.path.join(tmp_dir_path, tmp_dir+'.job'), 'w') fw.write( # linux bash '''#!/bin/sh cd ''' + self.jobLocation + '/' + tmp_dir + ''' /home/apps/balloon/balloon -f /home/hetanjin/apps/balloon/MMFF94.mff --nconfs 300 --stereo --addConformerNumberToName ''' + tmp_dir + '''.sdf out_''' + tmp_dir + '''.sdf &>> log_''' + tmp_dir + '''.txt ''') fw.close() os.system("..\\dos2unix-6.0.6-win64\\bin\\dos2unix.exe " + fw.name + ' > log_dos2unix.txt 2>&1')
import xlrd import re import numpy as np import os import xlsxwriter import phaseSpaceIntegral import phys import chem import textExtractor # constant phys1 = phys.phys() molecule1 = chem.molecule() pattern_scanFileName = re.compile('^([CHO0-9]+_[0-9]+)_.*scan.*$') pattern_geomFileName = re.compile('^([CHO0-9]+_[0-9]+)_.*opt.*$') pattern_freqCom = re.compile('^.*#[PN]? Geom=AllCheck Guess=TCheck SCRF=Check.*Freq.*$') # note that Input orientation should be used in MESMER rather than standard orientation when hessian used pattern_input = re.compile('^.*Input orientation:.*$') # pattern_standard = re.compile('^.*Standard orientation:.*$') pattern_endline = re.compile('^.*---------------------------------------------------------------------.*$') # variables temperature = [298.15] + range(300,2600,100) # target variables HR_dict = {} mole_dict = {} # read data and get integral wbr = xlrd.open_workbook('HR_fit.xls')
def readGjfFile(self, gjfFile, moleculeLabel=''): #definition of flags gjfCommand_done = -1 gjfMulti_done = -1 geomDone = -1 #definition of temporary variables tmp_m = [] #match result tmp_multi = 1 tmp_geom = '' lineStart = 0 lineEnd = 0 BASE_DIR = os.path.dirname(os.path.dirname(__file__)) # print fileName tmp_lines = gjfFile.readlines() for (lineNum, tmp_line) in enumerate(tmp_lines): if gjfCommand_done != 1: tmp_m = self.pattern_gjfCommand.match(tmp_line) if tmp_m: gjfCommand_done = 1 elif gjfMulti_done != 1: tmp_m = self.pattern_gjfMulti.match(tmp_line) if tmp_m: lineStart = lineNum tmp_multi = int(tmp_m.group(2)) geomDone = 0 gjfMulti_done = 1 elif geomDone != 1: tmp_m = self.pattern_blankLine.match(tmp_line) if tmp_m: lineEnd = lineNum tmp_geom = tmp_lines[lineStart+1: lineEnd] geomDone = 1 if geomDone != 1: print 'Sorry! The input file is not a standard .gjf file!' raise common_api.readGjfFileError else: pass # print 'Gjf file read in successfully!' gjfFile.close() self.mole = chem.molecule() try: self.mole.getGjfGeom(tmp_geom) except: print 'Sorry! The input file is not a standard .gjf file!' raise common_api.readGjfFileError self.mole.setSpinMultiplicity(tmp_multi) self.mole.calcFormula() tmp_m = self.pattern_formula.match(self.mole.formula) if tmp_m: if tmp_m.group(1) == '': carbonNumber = 1 else: carbonNumber = int(tmp_m.group(1)) if carbonNumber < 3: print 'Please submit a species with carbon atoms >= 3!' raise common_api.carbonLessThan3Error else: print 'Sorry! Not an alkane or alkene or their radical! Not supported temporarily!' raise common_api.beyondSpeciesRangeError if moleculeLabel == '': self.mole.setLabel(self.mole.formula) else: self.mole.setLabel(moleculeLabel) try: self.mole.fulfillBonds() except: print 'mole.fulfillBonds() failed!' raise common_api.readGjfFileError if self.mole.existRings(): print 'Sorry! Structure with rings is not supported temporarily!' raise common_api.ringExistingError return self.mole
tmp_m = pattern_D3Energy.match(tmp_line) if tmp_m: tmp_D3energy = float(tmp_m.group(1)) SPEnergy_done = 0 tmp_m = pattern_SPEnergy.match(tmp_line) if tmp_m: tmp_energy = float(tmp_m.group(1)) SPEnergy.append(tmp_energy - tmp_D3energy) if np.abs(tmp_energy) > 1e-12 and SPEnergy_done == 0: SPEnergy_done = 1 if SPEnergy_done != 1: print 'Error! SP energy file error!' + tmp_file energyFile.close() tmp_mole = chem.molecule() tmp_mole.getLogGeom(geom[-1]) tmp_mole.calcFormula() formula.append(tmp_mole.formula) atomsNum.append(tmp_mole.getAtomsNum()) name.append(tmp_name) speciesNum += 1 ########################################### # write info to excel ########################################### wb = xlsxwriter.Workbook('database.xlsx') sh = wb.add_worksheet('speciesInfo') tmp_row = 0
def setUp(self): self.assy = assy = assembly.assembly(None) self.mol = mol = chem.molecule(assy) self.a = a = chem.Atom("C", chem.V(0.0, 0.0, 0.0), mol) self.a.set_atomtype("sp3", True)