class TestMethodPath(unittest.TestCase): def setUp(self): self.tmp_dir = './tmp' if not os.path.exists(self.tmp_dir): os.mkdir(self.tmp_dir) self.path = MethodPath() with suppress_stderr(): # silence 'MESS.DB created' message self.path._db = MessDB(database='%s/test.db' % self.tmp_dir) self.path._graph = DirectedGraph() def tearDown(self): shutil.rmtree(self.tmp_dir) def assert_path_consistency(self): self.assertEquals(self.path._path_id, self.path.get_path_id()) def test_init(self): self.assertEquals(self.path._db.tries, 0) self.assertEquals(self.path._graph._node_count, 0) self.assertEquals(self.path._path, []) self.assertIsNone(self.path._path_id) self.assert_path_consistency() def test_load_graph(self): insert_query = 'INSERT INTO method_edge VALUES (?, ?, ?)' self.path._db.executemany(insert_query, ((1, 1, 1), (2, 1, 2), (3, 2, 3))) self.path._load_graph() self.assertEquals(sorted(self.path._graph.get_node_ids()), [1, 2, 3]) self.assert_path_consistency() def test_setup_path(self): self.path.setup_path(1) self.assertEquals(self.path._graph._node_count, 1) self.assertEquals(self.path._path, [1]) self.assertEquals(self.path.get_length(), 0) self.assert_path_consistency() # check that new path isn't added for same method new_path = MethodPath() new_path._db = self.path._db new_path._graph = DirectedGraph() new_path._load_graph() new_path.setup_path(1) self.assertEquals(new_path._graph._node_count, 1) self.assertEquals(new_path._path, [1]) self.assertEquals(new_path.get_length(), 0) def test_extend_path(self): self.path.setup_path(42) self.path.extend_path(43) self.assertEquals(self.path._graph._node_count, 2) self.assertEquals(self.path._path, [1, 2]) self.assertEquals(self.path.get_length(), 1) self.path.extend_path(44) self.assertEquals(self.path._graph._node_count, 3) self.assertEquals(self.path._path, [1, 2, 5]) self.assertEquals(self.path.get_length(), 2) self.path.extend_path(42) self.assertEquals(self.path._graph._node_count, 3) self.assertEquals(self.path._path, [1, 2, 5, 8]) self.assertEquals(self.path.get_length(), 3) self.assert_path_consistency() def test_set_path(self): self.path.setup_path(42) self.path.extend_path(43) self.path.extend_path(44) self.path.set_path(2) self.assertEquals(self.path._graph._node_count, 3) self.assertEquals(self.path._path, [1, 2]) self.assertEquals(self.path.get_length(), 1) self.assert_path_consistency() def test_get_path_id(self): self.assertIsNone(self.path.get_path_id()) self.path.setup_path(42) self.path.extend_path(43) self.path.extend_path(44) self.assertEquals(self.path.get_path_id(), 3) def test_get_parent_path_id(self): self.assertIsNone(self.path.get_parent_path_id()) self.path.setup_path(42) self.path.extend_path(43) self.path.extend_path(44) self.assertEquals(self.path.get_parent_path_id(), 2) def test_get_method_id(self): self.assertIsNone(self.path.get_method_id()) self.path.setup_path(42) self.assertEquals(self.path.get_method_id(), 42) self.path.extend_path(43) self.assertEquals(self.path.get_method_id(), 43) def test_get_parent_method_id(self): self.assertIsNone(self.path.get_parent_method_id()) self.path.setup_path(42) self.assertEquals(self.path.get_parent_method_id(), 42) self.path.extend_path(43) self.assertEquals(self.path.get_parent_method_id(), 42) self.path.extend_path(44) self.assertEquals(self.path.get_parent_method_id(), 43) def test_get_superparent_method_id(self): self.assertIsNone(self.path.get_superparent_method_id()) self.path.setup_path(42) self.assertEquals(self.path.get_parent_method_id(), 42) self.path.extend_path(43) self.assertEquals(self.path.get_parent_method_id(), 42) self.path.extend_path(44) self.assertEquals(self.path.get_superparent_method_id(), 42) self.path.extend_path(45) self.assertEquals(self.path.get_superparent_method_id(), 43) def test_get_directory(self): self.assertIsNone(self.path._get_directory(1, 2, 3)) def test_get_path_directory(self): self.assertIsNone(self.path.get_path_directory()) def test_get_parent_path_directory(self): self.assertIsNone(self.path.get_parent_path_directory())
def execute(self, args): """Match molecules to SMARTS patterns.""" if args.inchikeys.name == '<stdin>' and args.inchikeys.isatty(): sys.exit('No input specified.') # parse args if not (args.smarts or args.fingerprint or args.spectrophore): sys.exit('No operations were selected, nothing to match.') if sum(bool(arg) for arg in (args.smarts, args.fingerprint, args.spectrophore)) > 1: sys.exit(('One thing at a time, please. The arguments --smarts, ' '--fingerprint, and --spectrophore are mutually ' 'exclusive.')) if args.smarts and args.target: self.log_console.warning(('--target ignored, proceeding with ' 'SMARTS matching')) if args.spectrophore: if args.path is None: sys.exit(('Spectrophore calculation requires 3D geometry. ' 'You must specify a 3D geometry with --path.')) else: path = MethodPath() path.set_path(args.path) method_dir = path.get_path_directory() sp_args = {'normalization': args.spectrophore_normalization, 'accuracy': args.spectrophore_accuracy, 'stereo': args.spectrophore_stereospecificity, 'resolution': args.spectrophore_resolution} # load target and target fingerprints target_mol = None target_fp = None target_sp = None if args.target: if os.path.exists(args.target): target_mol = pybel.readfile(args.target.split('.')[-1], args.target).next() else: target_mol = pybel.readstring('smi', args.target) if target_mol is not None: if args.fingerprint: target_fp = self.calculate_fingerprint(target_mol, args.fingerprint) if args.spectrophore: target_sp = self.calculate_spectrophore(target_mol, sp_args) # match every input db = MessDB() inchi_query = 'SELECT inchi FROM molecule WHERE inchikey = ?' fp_query = ('SELECT fingerprint FROM molecule_fingerprint ' 'WHERE inchikey = ? AND name = ? ' 'AND settings = ? AND method_path_id = ?') writer = csv.writer(sys.stdout, delimiter=args.delimiter) for row in args.inchikeys: inchikey = row.split()[0].strip() if args.smarts or args.fingerprint: inchi = db.execute(inchi_query, (inchikey,)).fetchone()[0] mol = pybel.readstring('inchi', 'InChI=%s' % inchi) if args.smarts: canonical = pybel.ob.OBOp.FindType(b"canonical") canonical.Do(mol.OBMol) for (smarts_obj, smarts_str) in self.smarts_generator(args.smarts): matches = [match for match in smarts_obj.findall(mol)] if len(matches) > 0: writer.writerow([inchikey, smarts_str] + matches) if args.fingerprint: try: fp = db.execute(fp_query, (inchikey, args.fingerprint, '', '')).fetchone()[0] except TypeError: fp = self.calculate_fingerprint(mol, args.fingerprint) if target_fp is not None: similarity = self.calculate_similarity(target_fp, fp, 'tanimoto') if similarity > args.cutoff: writer.writerow([inchikey, args.fingerprint, args.target, similarity]) else: writer.writerow([inchikey, args.fingerprint] + fp) if args.spectrophore: try: sp = db.execute(fp_query, (inchikey, 'Spectrophore', json.dumps(sp_args, sort_keys=True), args.path)).fetchone()[0] except TypeError: xyz_file = os.path.join(get_inchikey_dir(inchikey), method_dir, '%s.xyz' % inchikey) mol = pybel.readfile('xyz', xyz_file).next() sp = Match.calculate_spectrophore(mol, sp_args) if target_sp is not None: try: similarity = self.calculate_similarity(target_sp, sp, 'cos') except ValueError: similarity = 0 if similarity > args.cutoff: writer.writerow([inchikey, 'Spectrophore', args.target, similarity]) else: writer.writerow([inchikey, 'Spectrophore'] + sp)
def execute(self, args): """Match molecules to SMARTS patterns.""" if args.inchikeys.name == '<stdin>' and args.inchikeys.isatty(): sys.exit('No input specified.') if not (args.cir or args.fingerprint or args.spectrophore): sys.exit('You did not request any annotations.') if args.spectrophore: if args.path is None: sys.exit(('Spectrophore calculation requires 3D geometry. ' 'You must specify a 3D geometry with --path.')) else: path = MethodPath() path.set_path(args.path) method_dir = path.get_path_directory() sp_args = {'normalization': args.spectrophore_normalization, 'accuracy': args.spectrophore_accuracy, 'stereo': args.spectrophore_stereospecificity, 'resolution': args.spectrophore_resolution} self.db = MessDB() inchi_select_query = 'SELECT inchi FROM molecule WHERE inchikey = ?' fp_select_query = ('SELECT fingerprint FROM molecule_fingerprint ' 'WHERE inchikey = ? ' 'AND name = ? ' 'AND settings = ? ' 'AND method_path_id = ?') fp_insert_query = ('INSERT INTO molecule_fingerprint ' '(inchikey, name, settings, ' 'fingerprint, method_path_id) ' 'VALUES (?, ?, ?, ?, ?)') for row in args.inchikeys: self.inchikey = row.split()[0].strip() if args.cir: self.update_iupac(self.inchikey) self.update_synonyms(self.inchikey) if args.fingerprint: inchi = self.db.execute(inchi_select_query, (self.inchikey,)).fetchone()[0] mol = pybel.readstring('inchi', 'InChI=%s' % inchi) canonical = pybel.ob.OBOp.FindType(b'canonical') canonical.Do(mol.OBMol) fp = Match.calculate_fingerprint(mol, args.fingerprint) try: db_fp = self.db.execute(fp_select_query, (self.inchikey, args.fingerprint, '', '')).fetchone()[0] if not str(fp) == db_fp: self.log_console.warning(('new %s fingerprint ' 'for %s did not match ' 'fingerprint in db, ' 'db not updated'), args.fingerprint, self.inchikey) except TypeError: self.db.execute(fp_insert_query, (self.inchikey, args.fingerprint, '', str(fp), '')) self.log_all.info('%s fingerprint for %s added to db', args.fingerprint, self.inchikey) if args.spectrophore: xyz_file = os.path.join(get_inchikey_dir(self.inchikey), method_dir, '%s.xyz' % self.inchikey) mol = pybel.readfile('xyz', xyz_file).next() sp = Match.calculate_spectrophore(mol, sp_args) try: db_sp = self.db.execute(fp_select_query, (self.inchikey, 'Spectrophore', json.dumps(sp_args, sort_keys=True), args.path)).fetchone()[0] if not str(sp) == db_sp: self.log_console.warning(('new Spectrophore ' 'fingerprint for ' '%s did not match ' 'fingerprint in db, ' 'db not updated'), self.inchikey) except TypeError: json_sp_args = json.dumps(sp_args, sort_keys=True) self.db.execute(fp_insert_query, (self.inchikey, 'Spectrophore', json_sp_args, str(sp), args.path)) self.log_all.info(('Spectrophore fingerprint for %s ' 'with parameters %s and ' 'geometry from path %i ' 'added to db'), self.inchikey, json_sp_args, args.path)