def map(self, mol, source): """Import molecule into MESS.DB.""" # setup local variables self.inchikey = mol.write('inchikey').rstrip() inchikey_dir = get_inchikey_dir(self.inchikey) inchikey_basename = os.path.join(inchikey_dir, self.inchikey) identifier = unicode(mol.title, 'utf-8', 'replace') # setup directory setup_dir(inchikey_dir) if not self.check(): mol.title = b'' mol.write('inchi', (inchikey_basename + '.inchi'), overwrite=True) if not os.path.exists(inchikey_basename + '.png'): mol.write('_png2', (inchikey_basename + '.png')) touch(inchikey_basename + '.log') touch(inchikey_basename + '.notes') touch(os.path.join(inchikey_dir, '%s.sources.tsv' % inchikey_basename)) self.log_all.info('%s molecule directory initialized', self.inchikey) source.update_source_tsv(self.inchikey, identifier) yield source.update_molecule_source_query(self.inchikey, identifier) yield self.insert_molecule_query(self.inchikey, mol) for query, values in self.get_insert_moldata_queries( self.inchikey, mol, description='molecule data from %s input' % source.dirname): yield query, values for query, values in self.get_openbabel_property_queries(self.inchikey, mol): yield query, values
def map(self, mol, source): """Import molecule into MESS.DB.""" self.inchikey = mol.write('inchikey').rstrip() if not self.check(): inchikey_dir = get_inchikey_dir(self.inchikey) setup_dir(os.path.join(inchikey_dir, self.method_dir)) mol.write('xyz', os.path.join(inchikey_dir, self.method_dir, '%s.xyz' % self.inchikey), overwrite=True) self.log_all.info('%s 3D structure from %s added', self.inchikey, source.dirname)
def map(self, inchikey, inchikey_dir): start = time.time() self.inchikey = inchikey if self.parent_method_dir is None: sys.exit(('This method requires a parent path with a valid ' 'xyz file (i.e., it cannot accept an InChI).')) if not self.has_parent_path(self.inchikey): self.log_console.info(('parent path %i has not been calculated ' 'for %s, skipping current calculation'), self._parent_path_id, self.inchikey) return out_dir = os.path.realpath(os.path.join(inchikey_dir, self.method_dir)) setup_dir(out_dir) mop_file = os.path.join(out_dir, '%s.mop' % self.inchikey) out_file = os.path.join(out_dir, '%s.out' % self.inchikey) xyz_in = os.path.abspath(os.path.join(inchikey_dir, self.parent_method_dir, '%s.xyz' % self.inchikey)) if not os.path.isfile(xyz_in): self.log_console.warning('xyz file expected but not found: %s.', xyz_in) return xyz_out = os.path.abspath(os.path.join(out_dir, '%s.xyz' % self.inchikey)) if not self.check(out_file, xyz_out): keywords = '' for k, v in self.parameters.items(): if v: keywords += '%s=%s ' % (k, v) else: keywords += '%s ' % k for k, v in self.secondary_parameters.items(): if v: keywords += '%s=%s ' % (k, v) else: keywords += '%s ' % k query = ('SELECT result AS charge ' 'FROM molecule_method_property mpp ' 'JOIN property p ON mpp.property_id = p.property_id ' "WHERE p.name='charge' AND mpp.inchikey=?") charge = self.db.execute(query, (self.inchikey,)).fetchone()[0] keywords += 'CHARGE=%i ' % charge babel = subprocess.Popen(['obabel', '-ixyz', xyz_in, '-omop', '-xk' + keywords], stdout=codecs.open(mop_file, 'w', 'utf-8'), stderr=subprocess.PIPE) babel_stderr = babel.stderr.read() pwd = os.getcwd() os.chdir(out_dir) # mopac unhappy if not run in same dir as input subprocess.Popen(['MOPAC2012.exe', '%s.mop' % self.inchikey]).wait() os.chdir(pwd) self.moo_to_xyz(os.path.abspath(out_file), xyz_out) if self.check(out_file, xyz_out): self.log_all.info('%s calculation successful', self.inchikey) yield self.get_timing_query(self.inchikey, start) for query, values in self.import_properties(out_file): yield query, values else: print(babel_stderr, file=sys.stderr) else: self.log_console.info('%s calculation skipped', self.inchikey) for query, values in self.import_properties(out_file): yield query, values
def test_setup_dir(self): test_dir = os.path.join(self.tmp_dir, 'test_dir/test_subdir') utils.setup_dir(test_dir) self.assertTrue(os.path.exists(test_dir))
def map(self, inchikey, inchikey_dir): """Generate 3D structures with Balloon.""" self.inchikey = inchikey start = time.time() out_dir = os.path.realpath(os.path.join(inchikey_dir, self.method_dir)) setup_dir(out_dir) sdf_out = os.path.realpath(os.path.join(out_dir, '%s.sdf' % self.inchikey)) xyz_out = os.path.join(out_dir, '%s.xyz' % self.inchikey) messages = [] if not self.check(xyz_out): query = 'SELECT smiles FROM molecule WHERE inchikey=?' r = self.db.execute(query, (self.inchikey,)).next() # get positive 32-bit integer seed = binascii.crc32(inchikey) & 0xffffffff try: os.remove(sdf_out) except OSError: pass balloon_cmd = ['balloon'] for k, v in self.parameters.items(): if k.startswith('#') or v.startswith('#'): continue balloon_cmd.append(k) if v: balloon_cmd.append(v) balloon_cmd.extend(['--randomSeed', str(seed), r.smiles, sdf_out]) balloon = subprocess.Popen(balloon_cmd, cwd=out_dir, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) balloon.stdin.write('Y') # in case balloon asks about overwrite messages.append(balloon.stdout.read()) messages.append(balloon.stderr.read()) forcefield = b'mmff94s' steps = 512 moldata = {} try: mol = pybel.readfile('sdf', sdf_out).next() mol.write(b'xyz', str(xyz_out)) for query, values in self.get_insert_moldata_queries( self.inchikey, mol, description='balloon molecule data'): try: moldata[query].append(values) except KeyError: moldata[query] = [values] except IOError: sdf_bad = os.path.join(out_dir, '%s_bad.sdf' % inchikey) try: mol = pybel.readfile('sdf', sdf_bad).next() mol.localopt(forcefield=forcefield, steps=steps) self.log_all.info(('"bad" %s sdf cleaned up ' 'with %s forcefield ' 'and %i steps'), self.inchikey, forcefield, steps) mol.write(b'xyz', str(xyz_out)) except IOError: pass if self.check(xyz_out): if abs(mol.molwt - pybel.readstring('smi', r.smiles).molwt) > 0.001: moldata = {} mol = pybel.readstring(b'smi', str(r.smiles)) mol.make3D(forcefield, steps) mol.write(b'xyz', str(xyz_out), overwrite=True) self.log_all.info(('%s 3D coordinates generation ' 'attempted by ' 'Open Babel rule-based algorithm ' '(forcefields=%s steps=%i) instead of ' 'balloon due to hydrogen atom ' 'mismatch'), self.inchikey, forcefield, steps) else: moldata = {} mol = pybel.readstring(b'smi', str(r.smiles)) mol.make3D(forcefield, steps) mol.write(b'xyz', str(xyz_out), overwrite=True) self.log_all.info(('%s 3D coordinates generation attempted by ' 'Open Babel rule-based algorithm ' '(forcefields=%s steps=%i) instead of ' 'balloon due to unexpected failure'), self.inchikey, forcefield, steps) if self.check(xyz_out): self.log_all.info('%s 3D coordinates generated successfully', self.inchikey) else: self.log_all.warning('%s coordinate generation failed', self.inchikey) for query, values in moldata.iteritems(): for v in values: yield query, v yield self.get_timing_query(self.inchikey, start) else: self.log_console.info('%s skipped', self.inchikey)