Example #1
0
 def map(self, mol, source):
     """Import molecule into MESS.DB."""
     # setup local variables
     self.inchikey = mol.write('inchikey').rstrip()
     inchikey_dir = get_inchikey_dir(self.inchikey)
     inchikey_basename = os.path.join(inchikey_dir, self.inchikey)
     identifier = unicode(mol.title, 'utf-8', 'replace')
     # setup directory
     setup_dir(inchikey_dir)
     if not self.check():
         mol.title = b''
         mol.write('inchi',
                   (inchikey_basename + '.inchi'),
                   overwrite=True)
         if not os.path.exists(inchikey_basename + '.png'):
             mol.write('_png2',
                       (inchikey_basename + '.png'))
         touch(inchikey_basename + '.log')
         touch(inchikey_basename + '.notes')
         touch(os.path.join(inchikey_dir,
                            '%s.sources.tsv' % inchikey_basename))
         self.log_all.info('%s molecule directory initialized',
                           self.inchikey)
     source.update_source_tsv(self.inchikey, identifier)
     yield source.update_molecule_source_query(self.inchikey, identifier)
     yield self.insert_molecule_query(self.inchikey, mol)
     for query, values in self.get_insert_moldata_queries(
             self.inchikey,
             mol,
             description='molecule data from %s input' % source.dirname):
         yield query, values
     for query, values in self.get_openbabel_property_queries(self.inchikey,
                                                              mol):
         yield query, values
Example #2
0
 def map(self, mol, source):
     """Import molecule into MESS.DB."""
     self.inchikey = mol.write('inchikey').rstrip()
     if not self.check():
         inchikey_dir = get_inchikey_dir(self.inchikey)
         setup_dir(os.path.join(inchikey_dir, self.method_dir))
         mol.write('xyz',
                   os.path.join(inchikey_dir,
                                self.method_dir,
                                '%s.xyz' % self.inchikey),
                   overwrite=True)
         self.log_all.info('%s 3D structure from %s added',
                           self.inchikey, source.dirname)
Example #3
0
 def map(self, inchikey, inchikey_dir):
     start = time.time()
     self.inchikey = inchikey
     if self.parent_method_dir is None:
         sys.exit(('This method requires a parent path with a valid '
                   'xyz file (i.e., it cannot accept an InChI).'))
     if not self.has_parent_path(self.inchikey):
         self.log_console.info(('parent path %i has not been calculated '
                                'for %s, skipping current calculation'),
                               self._parent_path_id, self.inchikey)
         return
     out_dir = os.path.realpath(os.path.join(inchikey_dir, self.method_dir))
     setup_dir(out_dir)
     mop_file = os.path.join(out_dir, '%s.mop' % self.inchikey)
     out_file = os.path.join(out_dir, '%s.out' % self.inchikey)
     xyz_in = os.path.abspath(os.path.join(inchikey_dir,
                                           self.parent_method_dir,
                                           '%s.xyz' % self.inchikey))
     if not os.path.isfile(xyz_in):
         self.log_console.warning('xyz file expected but not found: %s.',
                                  xyz_in)
         return
     xyz_out = os.path.abspath(os.path.join(out_dir,
                                            '%s.xyz' % self.inchikey))
     if not self.check(out_file, xyz_out):
         keywords = ''
         for k, v in self.parameters.items():
             if v:
                 keywords += '%s=%s ' % (k, v)
             else:
                 keywords += '%s ' % k
         for k, v in self.secondary_parameters.items():
             if v:
                 keywords += '%s=%s ' % (k, v)
             else:
                 keywords += '%s ' % k
         query = ('SELECT result AS charge '
                  'FROM molecule_method_property mpp '
                  'JOIN property p ON mpp.property_id = p.property_id '
                  "WHERE p.name='charge' AND mpp.inchikey=?")
         charge = self.db.execute(query, (self.inchikey,)).fetchone()[0]
         keywords += 'CHARGE=%i ' % charge
         babel = subprocess.Popen(['obabel', '-ixyz', xyz_in, '-omop',
                                   '-xk' + keywords],
                                  stdout=codecs.open(mop_file, 'w',
                                                     'utf-8'),
                                  stderr=subprocess.PIPE)
         babel_stderr = babel.stderr.read()
         pwd = os.getcwd()
         os.chdir(out_dir)  # mopac unhappy if not run in same dir as input
         subprocess.Popen(['MOPAC2012.exe',
                           '%s.mop' % self.inchikey]).wait()
         os.chdir(pwd)
         self.moo_to_xyz(os.path.abspath(out_file), xyz_out)
         if self.check(out_file, xyz_out):
             self.log_all.info('%s calculation successful', self.inchikey)
             yield self.get_timing_query(self.inchikey, start)
             for query, values in self.import_properties(out_file):
                 yield query, values
         else:
             print(babel_stderr, file=sys.stderr)
     else:
         self.log_console.info('%s calculation skipped', self.inchikey)
         for query, values in self.import_properties(out_file):
             yield query, values
Example #4
0
 def test_setup_dir(self):
     test_dir = os.path.join(self.tmp_dir, 'test_dir/test_subdir')
     utils.setup_dir(test_dir)
     self.assertTrue(os.path.exists(test_dir))
Example #5
0
 def map(self, inchikey, inchikey_dir):
     """Generate 3D structures with Balloon."""
     self.inchikey = inchikey
     start = time.time()
     out_dir = os.path.realpath(os.path.join(inchikey_dir, self.method_dir))
     setup_dir(out_dir)
     sdf_out = os.path.realpath(os.path.join(out_dir,
                                             '%s.sdf' % self.inchikey))
     xyz_out = os.path.join(out_dir, '%s.xyz' % self.inchikey)
     messages = []
     if not self.check(xyz_out):
         query = 'SELECT smiles FROM molecule WHERE inchikey=?'
         r = self.db.execute(query, (self.inchikey,)).next()
         # get positive 32-bit integer
         seed = binascii.crc32(inchikey) & 0xffffffff
         try:
             os.remove(sdf_out)
         except OSError:
             pass
         balloon_cmd = ['balloon']
         for k, v in self.parameters.items():
             if k.startswith('#') or v.startswith('#'):
                 continue
             balloon_cmd.append(k)
             if v:
                 balloon_cmd.append(v)
         balloon_cmd.extend(['--randomSeed', str(seed), r.smiles, sdf_out])
         balloon = subprocess.Popen(balloon_cmd, cwd=out_dir,
                                    stdin=subprocess.PIPE,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
         balloon.stdin.write('Y')  # in case balloon asks about overwrite
         messages.append(balloon.stdout.read())
         messages.append(balloon.stderr.read())
         forcefield = b'mmff94s'
         steps = 512
         moldata = {}
         try:
             mol = pybel.readfile('sdf', sdf_out).next()
             mol.write(b'xyz', str(xyz_out))
             for query, values in self.get_insert_moldata_queries(
                     self.inchikey,
                     mol,
                     description='balloon molecule data'):
                 try:
                     moldata[query].append(values)
                 except KeyError:
                     moldata[query] = [values]
         except IOError:
             sdf_bad = os.path.join(out_dir, '%s_bad.sdf' % inchikey)
             try:
                 mol = pybel.readfile('sdf', sdf_bad).next()
                 mol.localopt(forcefield=forcefield, steps=steps)
                 self.log_all.info(('"bad" %s sdf cleaned up '
                                    'with %s forcefield '
                                    'and %i steps'),
                                   self.inchikey,
                                   forcefield,
                                   steps)
                 mol.write(b'xyz', str(xyz_out))
             except IOError:
                 pass
         if self.check(xyz_out):
             if abs(mol.molwt - pybel.readstring('smi',
                                                 r.smiles).molwt) > 0.001:
                 moldata = {}
                 mol = pybel.readstring(b'smi', str(r.smiles))
                 mol.make3D(forcefield, steps)
                 mol.write(b'xyz', str(xyz_out), overwrite=True)
                 self.log_all.info(('%s 3D coordinates generation '
                                    'attempted by '
                                    'Open Babel rule-based algorithm '
                                    '(forcefields=%s steps=%i) instead of '
                                    'balloon due to hydrogen atom '
                                    'mismatch'),
                                   self.inchikey, forcefield, steps)
         else:
             moldata = {}
             mol = pybel.readstring(b'smi', str(r.smiles))
             mol.make3D(forcefield, steps)
             mol.write(b'xyz', str(xyz_out), overwrite=True)
             self.log_all.info(('%s 3D coordinates generation attempted by '
                                'Open Babel rule-based algorithm '
                                '(forcefields=%s steps=%i) instead of '
                                'balloon due to unexpected failure'),
                               self.inchikey, forcefield, steps)
         if self.check(xyz_out):
             self.log_all.info('%s 3D coordinates generated successfully',
                               self.inchikey)
         else:
             self.log_all.warning('%s coordinate generation failed',
                                  self.inchikey)
         for query, values in moldata.iteritems():
             for v in values:
                 yield query, v
         yield self.get_timing_query(self.inchikey, start)
     else:
         self.log_console.info('%s skipped', self.inchikey)