Ejemplo n.º 1
0
    def fit_with_mapper(self, mapper):
        coords = [[0.000000, 0.000000, 0.000000],
                  [0.000000, 0.000000, 1.089000],
                  [1.026719, 0.000000, -0.363000],
                  [-0.513360, -0.889165, -0.363000],
                  [-0.513360, 0.889165, -0.363000]]
        mol1 = Molecule(["C", "H", "H", "H", "H"], coords)
        op = SymmOp.from_origin_axis_angle([0, 0, 0], [0.1, 0.2, 0.3], 60)
        rotcoords = [op.operate(c) for c in coords]
        mol2 = Molecule(["C", "H", "H", "H", "H"], rotcoords)
        mm = MoleculeMatcher(mapper=mapper)
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "benzene1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "benzene2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "benzene1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "t2.xyz"))
        self.assertFalse(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "c1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "c2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "t3.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "t4.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "j1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "j2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "ethene1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "ethene2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "toluene1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "toluene2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "cyclohexane1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "cyclohexane2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "oxygen1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "oxygen2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mm = MoleculeMatcher(tolerance=0.001, mapper=mapper)
        mol1 = Molecule.from_file(os.path.join(test_dir, "t3.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "t4.xyz"))
        self.assertFalse(mm.fit(mol1, mol2))
Ejemplo n.º 2
0
 def test_thiane_ethynyl(self):
     mm = MoleculeMatcher(tolerance=0.05, mapper=InchiMolAtomMapper())
     mol1 = Molecule.from_file(os.path.join(test_dir, "thiane_ethynyl1.sdf"))
     mol2 = Molecule.from_file(os.path.join(test_dir, "thiane_ethynyl2.sdf"))
     self.assertFalse(mm.fit(mol1, mol2))
Ejemplo n.º 3
0
 def test_cdi_23(self):
     mm = MoleculeMatcher(tolerance=0.05, mapper=InchiMolAtomMapper())
     mol1 = Molecule.from_file(os.path.join(test_dir, "cdi_23_1.xyz"))
     mol2 = Molecule.from_file(os.path.join(test_dir, "cdi_23_2.xyz"))
     self.assertFalse(mm.fit(mol1, mol2))
Ejemplo n.º 4
0
 def test_thiane(self):
     mm = MoleculeMatcher(tolerance=0.05, mapper=InchiMolAtomMapper())
     mol1 = read_mol(os.path.join(test_dir, "thiane1.sdf"))
     mol2 = read_mol(os.path.join(test_dir, "thiane2.sdf"))
     self.assertFalse(mm.fit(mol1, mol2))
Ejemplo n.º 5
0
 def test_strange_inchi(self):
     mm = MoleculeMatcher(tolerance=0.05, mapper=InchiMolAtomMapper())
     mol1 = Molecule.from_file(os.path.join(test_dir, "k1.sdf"))
     mol2 = Molecule.from_file(os.path.join(test_dir, "k2.sdf"))
     self.assertTrue(mm.fit(mol1, mol2))
Ejemplo n.º 6
0
 def test_strange_inchi(self):
     mm = MoleculeMatcher(tolerance=0.05, mapper=InchiMolAtomMapper())
     mol1 = Molecule.from_file(os.path.join(test_dir, "k1.sdf"))
     mol2 = Molecule.from_file(os.path.join(test_dir, "k2.sdf"))
     self.assertTrue(mm.fit(mol1, mol2))
Ejemplo n.º 7
0
    def fit_with_mapper(self, mapper):
        coords = [[0.000000, 0.000000, 0.000000],
                  [0.000000, 0.000000, 1.089000],
                  [1.026719, 0.000000, -0.363000],
                  [-0.513360, -0.889165, -0.363000],
                  [-0.513360, 0.889165, -0.363000]]
        mol1 = Molecule(["C", "H", "H", "H", "H"], coords)
        op = SymmOp.from_origin_axis_angle([0, 0, 0], [0.1, 0.2, 0.3], 60)
        rotcoords = [op.operate(c) for c in coords]
        mol2 = Molecule(["C", "H", "H", "H", "H"], rotcoords)
        mm = MoleculeMatcher(mapper=mapper)
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "benzene1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "benzene2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "benzene1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "t2.xyz"))
        self.assertFalse(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "c1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "c2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "t3.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "t4.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "j1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "j2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "ethene1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "ethene2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "toluene1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "toluene2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "cyclohexane1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "cyclohexane2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mol1 = Molecule.from_file(os.path.join(test_dir, "oxygen1.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "oxygen2.xyz"))
        self.assertTrue(mm.fit(mol1, mol2))

        mm = MoleculeMatcher(tolerance=0.001, mapper=mapper)
        mol1 = Molecule.from_file(os.path.join(test_dir, "t3.xyz"))
        mol2 = Molecule.from_file(os.path.join(test_dir, "t4.xyz"))
        self.assertFalse(mm.fit(mol1, mol2))
Ejemplo n.º 8
0
 def test_cdi_23(self):
     mm = MoleculeMatcher(tolerance=0.05, mapper=InchiMolAtomMapper())
     mol1 = Molecule.from_file(os.path.join(test_dir, "cdi_23_1.xyz"))
     mol2 = Molecule.from_file(os.path.join(test_dir, "cdi_23_2.xyz"))
     self.assertFalse(mm.fit(mol1, mol2))
Ejemplo n.º 9
0
 def test_thiane(self):
     mm = MoleculeMatcher(tolerance=0.05, mapper=InchiMolAtomMapper())
     mol1 = Molecule.from_file(os.path.join(test_dir, "thiane1.sdf"))
     mol2 = Molecule.from_file(os.path.join(test_dir, "thiane2.sdf"))
     self.assertFalse(mm.fit(mol1, mol2))
Ejemplo n.º 10
0
class RedundancyGuard(object):
    '''
    A RedundancyGuard object is used to check if an Organism is redundant with
    other organisms already seen by the algorithm.
    '''
    def __init__(self, redundancy_parameters, geometry):
        '''
        Makes a RedundancyGuard, and sets default parameter values if
        necessary.

        TODO: currently using pymatgen's structure matcher for comparing bulk
            and sheet structures, both pymatgen's structure matcher and
            molecule matcher for comparing wires, and only the molecule matcher
            for clusters. The sheet and wire cases aren't ideal, since the
            structure matcher assumes periodicity in all three dimensions, and
            the molecule matcher assumes no periodicity.

        Args:
            redundancy parameters: a dictionary of parameters

            geometry: the Geometry object
        '''

        # defaults
        #
        # lattice length tolerance, in fractional coordinates
        self.default_lattice_length_tol = 0.05
        # lattice angle tolerance, in degrees
        self.default_lattice_angle_tol = 2
        # site tolerance, in fraction of average free length per atom
        self.default_site_tol = 0.1
        # whether to transform to primitive cells before comparing
        self.default_use_primitive_cell = True
        # whether to check if structures are equal to supercells of each other
        self.default_attempt_supercell = True
        # RMSD tolerance for comparing clusters
        self.default_rmsd_tol = 0.1
        # the epa difference interval
        self.default_epa_diff = 0.0

        # set to defaults
        if redundancy_parameters in (None, 'default'):
            self.set_all_to_defaults()
        # parse the parameters, and set to defaults if necessary
        else:
            # lattice length tolerance
            if 'lattice_length_tol' not in redundancy_parameters:
                self.lattice_length_tol = self.default_lattice_length_tol
            elif redundancy_parameters['lattice_length_tol'] in (None,
                                                                 'default'):
                self.lattice_length_tol = self.default_lattice_length_tol
            else:
                self.lattice_length_tol = redundancy_parameters[
                    'lattice_length_tol']

            # lattice angle tolerance
            if 'lattice_angle_tol' not in redundancy_parameters:
                self.lattice_angle_tol = self.default_lattice_angle_tol
            elif redundancy_parameters['lattice_angle_tol'] in (None,
                                                                'default'):
                self.lattice_angle_tol = self.default_lattice_angle_tol
            else:
                self.lattice_angle_tol = redundancy_parameters[
                    'lattice_angle_tol']

            # site tolerance
            if 'site_tol' not in redundancy_parameters:
                self.site_tol = self.default_site_tol
            elif redundancy_parameters['site_tol'] in (None, 'default'):
                self.site_tol = self.default_site_tol
            else:
                self.site_tol = redundancy_parameters['site_tol']

            # whether to use primitive cells
            if 'use_primitive_cell' not in redundancy_parameters:
                self.use_primitive_cell = self.default_use_primitive_cell
            elif redundancy_parameters['use_primitive_cell'] in (None,
                                                                 'default'):
                self.use_primitive_cell = self.default_use_primitive_cell
            else:
                self.use_primitive_cell = redundancy_parameters[
                    'use_primitive_cell']

            # whether to try matching supercells
            if 'attempt_supercell' not in redundancy_parameters:
                self.attempt_supercell = self.default_attempt_supercell
            elif redundancy_parameters['attempt_supercell'] in (None,
                                                                'default'):
                self.attempt_supercell = self.default_attempt_supercell
            else:
                self.attempt_supercell = redundancy_parameters[
                    'attempt_supercell']

            # RMSD tolerance
            if 'rmsd_tol' not in redundancy_parameters:
                self.rmsd_tol = self.default_rmsd_tol
            elif redundancy_parameters['rmsd_tol'] in (None, 'default'):
                self.rmsd_tol = self.default_rmsd_tol
            else:
                self.rmsd_tol = redundancy_parameters['rmsd_tol']

            # epa difference
            if 'epa_diff' not in redundancy_parameters:
                self.epa_diff = self.default_epa_diff
            elif redundancy_parameters['epa_diff'] in (None, 'default'):
                self.epa_diff = self.default_epa_diff
            else:
                self.epa_diff = redundancy_parameters['epa_diff']

        # make the StructureMatcher object
        #
        # the first False is to prevent the matcher from scaling the volumes,
        # and the second False is to prevent subset matching
        self.structure_matcher = StructureMatcher(
            self.lattice_length_tol, self.site_tol, self.lattice_angle_tol,
            self.use_primitive_cell, False, self.attempt_supercell, False,
            ElementComparator())

        # make the MoleculeMatcher object
        if geometry.shape == 'cluster' or geometry.shape == 'wire':
            iso_mol_atom_mapper = IsomorphismMolAtomMapper()
            self.molecule_matcher = MoleculeMatcher(self.rmsd_tol,
                                                    iso_mol_atom_mapper)
            ob.obErrorLog.SetOutputLevel(0)  # to suppress openbabel warnings

    def set_all_to_defaults(self):
        '''
        Sets all the redundancy parameters to default values.
        '''

        self.lattice_length_tol = self.default_lattice_length_tol
        self.lattice_angle_tol = self.default_lattice_angle_tol
        self.site_tol = self.default_site_tol
        self.use_primitive_cell = self.default_use_primitive_cell
        self.attempt_supercell = self.default_attempt_supercell
        self.rmsd_tol = self.default_rmsd_tol
        self.epa_diff = self.default_epa_diff

    def check_redundancy(self, new_organism, orgs_list, geometry):
        '''
        Checks for redundancy, both structural and if specified, epa (d-value).

        Returns the organism with which new_organism is redundant, or None if
        no redundancy.

        Args:
            new_organism: the Organism to check for redundancy

            orgs_list: the list containing all Organisms to check against

            geometry: the Geometry of the search
        '''

        # if new_organism isn't relaxed, then just check structures
        if new_organism.epa is None:
            for organism in orgs_list:
                if new_organism.id != organism.id:  # just in case
                    # check if their structures match
                    if self.check_structures(new_organism, organism, geometry):
                        print('Organism {} failed structural redundancy - '
                              'looks like organism {} '.format(
                                  new_organism.id, organism.id))
                        return organism

        # if new_organism is relaxed, only check against relaxed organisms
        else:
            for organism in orgs_list:
                if new_organism.id != organism.id and organism.epa is not None:
                    # check if their structures match
                    if self.check_structures(new_organism, organism, geometry):
                        print('Organism {} failed structural redundancy - '
                              'looks like organism {} '.format(
                                  new_organism.id, organism.id))
                        return organism
                    # check how close their epa's are
                    if abs(new_organism.epa - organism.epa) < self.epa_diff:
                        print('Organism {} failed energy per atom redundancy '
                              '- looks like organism {} '.format(
                                  new_organism.id, organism.id))
                        return organism
        return None

    def check_structures(self, org1, org2, geometry):
        '''
        Compares the structures of two organisms to determine if they are
        redundant.

        Returns a boolean indicating whether the structures of the two
        organisms are redundant.

        Args:
            org1: the first Organism

            org2: the second Organism

            geometry: the Geometry of the search
        '''

        # use the molecule matcher for cluster searches
        if geometry.shape == 'cluster':
            return self.match_molecules(org1.cell, org2.cell)
        elif geometry.shape == 'wire':
            molecules_match = self.match_molecules(org1.cell, org2.cell)
            structures_match = self.structure_matcher.fit(org1.cell, org2.cell)
            return molecules_match or structures_match
        else:
            return self.structure_matcher.fit(org1.cell, org2.cell)

    def match_molecules(self, cell1, cell2):
        '''
        Compares two cells to determine if they are redundant using pymatgen's
        comparison algorithm that assumes no periodicity in any direction.

        Returns a boolean indicating whether the cells are redundant.

        Args:
            cell1: the first Cell

            cell2: the second Cell
        '''

        mol1 = Molecule(cell1.species, cell1.cart_coords)
        mol2 = Molecule(cell2.species, cell2.cart_coords)
        return self.molecule_matcher.fit(mol1, mol2)