Example #1
0
class AbstractMethod(object):
    """All methods must inherit from this class.
    
    Attributes:
        db (obj): A MessDB object
        method_name (str): The name of the method
        description (str): Description of method
        geop (bool): Whether the method generates a new geometry
        prog_name (str): Program name
        prog_version (str): Program version
        prog_url (str): Program url
        parameters (dict): Parameters that affect program execution
    """
    parameters = dict()
    shortdesc = None
    method_citation = None
    prog_citation = None
    _inchikey = None
    _path_id = None
    _parent_path_id = None
    _method_dir = None
    _parent_method_dir = None
    
    def __init__(self):
        """Set up db, check for attributes, dependencies, and setup."""
        self.db = MessDB()
        self.path = MethodPath()
        self.log_console = Log('console')
        self.log_all = Log('all')
        self.method_name = self.get_method_name()
        try:
            self.parameters
            self.description
            self.geop  # flag indicates method results in new xyz coordinates
            self.prog_name
            self.prog_version
            self.prog_url
        except AttributeError as err:
            print(''.join([str(err), '\n']), file=sys.stderr)
            sys.exit(('Each method class needs to define description, geop, '
                      'prog_name, prog_version, prog_url, '
                      'parameters as attributes.'))
        self.check_dependencies()
    
    def __hash__(self):
        """Hash based on method name and parameters.
        
        Returns:
            A hex string of the sha1 hash of self.method_name plus
            JSON-serialized self.parameters. Keys are sorted.
        """
        return hashlib.sha1(self.method_name +
                            json.dumps(dict((str(k).lower(),
                                             str(v).lower())
                                            for k, v
                                            in self.parameters.iteritems()),
                                       sort_keys=True)).hexdigest()
    
    @property
    def hash(self):
        """Get hash."""
        return self.__hash__()
    
    @property
    def method_id(self):
        """Get the object's method_id attribute."""
        query = ('SELECT method_id FROM method '
                 'WHERE hash = ?;')
        row = self.db.execute(query, (self.hash,)).fetchone()
        return row.method_id
    
    @property
    def path_id(self):
        """Get the path id of the method."""
        if not self.path.get_method_id() == self.method_id:
            self._setup_path()
        return self._path_id
    
    @property
    def method_dir(self):
        """Get the directory name of the method."""
        if not self.path.get_method_id() == self.method_id:
            self._setup_path()
        return self._method_dir
    
    @property
    def parent_method_dir(self):
        """Get the parent directory name of the method."""
        if not self.path.get_method_id() == self.method_id:
            self._setup_path()
        return self._parent_method_dir
    
    @property
    def inchikey(self):
        """Get inchikey."""
        return self._inchikey
    
    @inchikey.setter
    def inchikey(self, inchikey):
        """Set inchikey, and update inchikey of logger."""
        if inchikey is not None and not is_inchikey(inchikey):
            raise RuntimeError('invalid inchikey: %s' % inchikey)
        self._inchikey = inchikey
        self.log_all.inchikey = inchikey
    
    @classmethod
    def get_method_name(cls):
        """Return the name of the method, derived from the subclass name."""
        return cls.__name__.replace('_', '').lower()
    
    def _setup_path(self):
        """Setup path given current method id and parent path."""
        self.path.setup_path(self.method_id, self._parent_path_id)
        self._path_id = self.path.get_path_id()
        self._method_dir = self.path.get_path_directory()
        self._parent_method_dir = self.path.get_parent_path_directory()
    
    def _insert_method(self):
        """Set insert program to db, set up hash, and insert method to db."""
        total_changes = self.db.total_changes
        query = ('INSERT OR IGNORE INTO method '
                 '(program_id, geop, name, shortdesc, citation, hash) '
                 'SELECT program.program_id, ?, ?, ?, ?, ? '
                 'FROM program '
                 'WHERE program.name=? AND program.version=?')
        self.db.execute(query, (self.geop, self.method_name, self.shortdesc,
                                self.method_citation, self.hash,
                                self.prog_name, self.prog_version))
        if self.db.total_changes - total_changes > 0:
            self.log_all.info('new %s method added to MESS.DB',
                              self.method_name)
    
    def _insert_program(self):
        """Adds row to program table in mess.db."""
        total_changes = self.db.total_changes
        query = ('INSERT OR IGNORE INTO program '
                 '(name, version, url, citation) '
                 'VALUES (?, ?, ?, ?)')
        self.db.execute(query,
                        (self.prog_name, self.prog_version, self.prog_url,
                         self.prog_citation))
        if self.db.total_changes - total_changes > 0:
            self.log_all.info('program %s %s added to MESS.DB',
                              self.prog_name, self.prog_version)
    
    def _insert_parameters(self):
        """Import paramaters dict to mess.db.
        
        Args:
            name: Name of parameter.
            setting: The value the parameter is set to.
        """
        added_parameters = 0
        for name, setting in self.parameters.items():
            query = ('INSERT OR IGNORE INTO parameter (name) VALUES (?)')
            self.db.execute(query, (name, ))
            total_changes = self.db.total_changes
            query = ('INSERT OR IGNORE INTO method_parameter '
                     '(method_id, parameter_id, setting) '
                     'SELECT ?, parameter.parameter_id, ? '
                     'FROM program, parameter '
                     'WHERE parameter.name=?')
            self.db.execute(query, (self.method_id, setting, name))
            added_parameters += (self.db.total_changes - total_changes)
        if added_parameters > 0:
            self.log_all.info('%i method parameters added to MESS.DB',
                              added_parameters)
    
    def get_insert_property_query(self, inchikey, name, description,
                                  format_, value, units=''):
        """Returns query to insert property value to mess.db.
        
        Args:
            inchikey: The inchikey of a molecule in MESS.DB.
            method_path_id: Path id for the calculations that generated the
                            property.
            name: The property name.
            description: A description of the property.
            format_: A description of the format the property is in.
            value: The calculated property.
            units: Units for the property value.
        """
        query = ('INSERT OR IGNORE INTO molecule_method_property_denorm '
                 'VALUES (?, ?, ?, ?, ?, ?, ?);')
        return (query, (inchikey, self.path_id, name, description,
                        format_, units, value))
    
    def get_insert_moldata_queries(self, inchikey, mol,
                                   description='', units=''):
        """Returns queries to insert molecule data values to mess.db."""
        for name, value in mol.data.iteritems():
            yield self.get_insert_property_query(inchikey,
                                                 name,
                                                 description,
                                                 type(value).__name__,
                                                 value,
                                                 units)
    
    def get_timing_query(self, inchikey, start):
        """Get a query to insert execution time property into db."""
        return self.get_insert_property_query(inchikey, 'runtime',
                                              'execution time',
                                              type(start).__name__,
                                              time.time() - start, 's')
    
    def set_parent_path(self, parent_path):
        """Set the parent path (e.g., path to method containing input
        geometry.)"""
        if parent_path > 0:
            self._parent_path_id = parent_path
    
    def has_parent_path(self, inchikey):
        """Returns True if molecule has had entire parent path calculated,
        False otherwise."""
        query = ('SELECT inchikey FROM molecule_method_property WHERE '
                 'inchikey = ? AND method_path_id = ?')
        try:
            self.db.execute(query,
                            (inchikey, self._parent_path_id)).fetchone()[0]
            return True
        except TypeError:
            return False
    
    def check_dependencies(self):
        """If check_dependencies is not implemented, raise error."""
        raise NotImplementedError(("every method needs a 'check_dependencies' "
                                   'method'))
    
    def check(self):
        """If check is not implemented, raise error."""
        # the check method should be called before a calculation (so
        # calculations are not repeated) and after (to verify success)
        raise NotImplementedError("every method needs a 'check' method")
    
    def map(self, inchikey, inchikey_dir):
        """Generally, maps molecule to calculation via method, emits
        query/value pairs.
        """
        raise NotImplementedError(("every method needs a 'map' method"))
    
    def reduce(self, query, values):
        """Run queries/values on the db."""
        total_changes = self.db.total_changes
        if query or values[0]:
            self.db.executemany(query, values)
            self.log_all.info('%i properties added to MESS.DB',
                              self.db.total_changes - total_changes)
            total_changes = self.db.total_changes
    
    def setup(self):
        """Set up method."""
        self._insert_program()
        self._insert_method()
        self._insert_parameters()
Example #2
0
class TestMethodPath(unittest.TestCase):
    def setUp(self):
        self.tmp_dir = './tmp'
        if not os.path.exists(self.tmp_dir):
            os.mkdir(self.tmp_dir)
        self.path = MethodPath()
        with suppress_stderr():  # silence 'MESS.DB created' message
            self.path._db = MessDB(database='%s/test.db' % self.tmp_dir)
        self.path._graph = DirectedGraph()

    def tearDown(self):
        shutil.rmtree(self.tmp_dir)

    def assert_path_consistency(self):
        self.assertEquals(self.path._path_id, self.path.get_path_id())

    def test_init(self):
        self.assertEquals(self.path._db.tries, 0)
        self.assertEquals(self.path._graph._node_count, 0)
        self.assertEquals(self.path._path, [])
        self.assertIsNone(self.path._path_id)
        self.assert_path_consistency()
    
    def test_load_graph(self):
        insert_query = 'INSERT INTO method_edge VALUES (?, ?, ?)'
        self.path._db.executemany(insert_query, ((1, 1, 1),
                                                 (2, 1, 2),
                                                 (3, 2, 3)))
        self.path._load_graph()
        self.assertEquals(sorted(self.path._graph.get_node_ids()), [1, 2, 3])
        self.assert_path_consistency()

    def test_setup_path(self):
        self.path.setup_path(1)
        self.assertEquals(self.path._graph._node_count, 1)
        self.assertEquals(self.path._path, [1])
        self.assertEquals(self.path.get_length(), 0)
        self.assert_path_consistency()
        # check that new path isn't added for same method
        new_path = MethodPath()
        new_path._db = self.path._db
        new_path._graph = DirectedGraph()
        new_path._load_graph()
        new_path.setup_path(1)
        self.assertEquals(new_path._graph._node_count, 1)
        self.assertEquals(new_path._path, [1])
        self.assertEquals(new_path.get_length(), 0)

        
    def test_extend_path(self):
        self.path.setup_path(42)
        self.path.extend_path(43)
        self.assertEquals(self.path._graph._node_count, 2)
        self.assertEquals(self.path._path, [1, 2])
        self.assertEquals(self.path.get_length(), 1)
        self.path.extend_path(44)
        self.assertEquals(self.path._graph._node_count, 3)
        self.assertEquals(self.path._path, [1, 2, 5])
        self.assertEquals(self.path.get_length(), 2)
        self.path.extend_path(42)
        self.assertEquals(self.path._graph._node_count, 3)
        self.assertEquals(self.path._path, [1, 2, 5, 8])
        self.assertEquals(self.path.get_length(), 3)
        self.assert_path_consistency()
        
    def test_set_path(self):
        self.path.setup_path(42)
        self.path.extend_path(43)
        self.path.extend_path(44)
        self.path.set_path(2)
        self.assertEquals(self.path._graph._node_count, 3)
        self.assertEquals(self.path._path, [1, 2])
        self.assertEquals(self.path.get_length(), 1)
        self.assert_path_consistency()
    
    def test_get_path_id(self):
        self.assertIsNone(self.path.get_path_id())
        self.path.setup_path(42)
        self.path.extend_path(43)
        self.path.extend_path(44)
        self.assertEquals(self.path.get_path_id(), 3)
    
    def test_get_parent_path_id(self):
        self.assertIsNone(self.path.get_parent_path_id())
        self.path.setup_path(42)
        self.path.extend_path(43)
        self.path.extend_path(44)
        self.assertEquals(self.path.get_parent_path_id(), 2)
    
    def test_get_method_id(self):
        self.assertIsNone(self.path.get_method_id())
        self.path.setup_path(42)
        self.assertEquals(self.path.get_method_id(), 42)
        self.path.extend_path(43)
        self.assertEquals(self.path.get_method_id(), 43)
    
    def test_get_parent_method_id(self):
        self.assertIsNone(self.path.get_parent_method_id())
        self.path.setup_path(42)
        self.assertEquals(self.path.get_parent_method_id(), 42)
        self.path.extend_path(43)
        self.assertEquals(self.path.get_parent_method_id(), 42)
        self.path.extend_path(44)
        self.assertEquals(self.path.get_parent_method_id(), 43)
    
    def test_get_superparent_method_id(self):
        self.assertIsNone(self.path.get_superparent_method_id())
        self.path.setup_path(42)
        self.assertEquals(self.path.get_parent_method_id(), 42)
        self.path.extend_path(43)
        self.assertEquals(self.path.get_parent_method_id(), 42)
        self.path.extend_path(44)
        self.assertEquals(self.path.get_superparent_method_id(), 42)
        self.path.extend_path(45)
        self.assertEquals(self.path.get_superparent_method_id(), 43)
    
    def test_get_directory(self):
        self.assertIsNone(self.path._get_directory(1, 2, 3))

    def test_get_path_directory(self):
        self.assertIsNone(self.path.get_path_directory())

    def test_get_parent_path_directory(self):
        self.assertIsNone(self.path.get_parent_path_directory())