def __init__(self, min_shell: int = 1, max_shell: int = 7, nauty: Optional[Nauty] = None, traceable: Optional[bool] = False, versioning: Optional[bool] = False) -> None: self.__nauty = nauty if nauty else Nauty() self.__min_shell = max(min_shell, 0) self.__max_shell = max_shell self.__versioning = versioning self.__traceable = traceable if not versioning: self.charges_iacm = defaultdict( lambda: defaultdict(list)) # type: EitherChargeSet self.charges_elem = defaultdict( lambda: defaultdict(list)) # type: EitherChargeSet else: self.charges_iacm = defaultdict( lambda: defaultdict(_VersioningList)) # type: EitherChargeSet self.charges_elem = defaultdict( lambda: defaultdict(_VersioningList)) # type: EitherChargeSet if traceable: self.iso_iacm = defaultdict(list) self.iso_elem = defaultdict(list)
def test_create(): nauty = Nauty() assert nauty.exe != '' nauty_path = Path(nauty.exe) assert nauty_path.exists() assert nauty_path.is_file() assert nauty_path.stat().st_mode & stat.S_IXUSR assert not nauty._Nauty__process.poll()
class _CanonicalizationWorker: """Returns a canonical hash of a graph. Isomorphic graphs return the same hash (key). """ def __init__(self, color_key: str): self.__nauty = Nauty() self.__color_key = color_key def process(self, molid: int, graph: nx.Graph) -> Tuple[int, str]: return molid, self.__nauty.canonize(graph, color_key=self.__color_key)
def __init__(self, repository: Repository, rounding_digits: int, nauty: Optional[Nauty] = None) -> None: """Create a SimpleCollector. Args: repository: The repository to collect charges from rounding_digits: Number of digits to round charges to nauty: An external Nauty instance to use """ self._repository = repository self._rounding_digits = rounding_digits self._nauty = nauty if nauty is not None else Nauty()
def test_cross_validate_molecule(mock_traceable_methane_repository, ref_graph_charged, charger_iacm): charger, iacm = charger_iacm nauty = Nauty() report = cross_validate_molecule(mock_traceable_methane_repository, 1, ref_graph_charged, charger, [1], iacm, nauty) assert report.category('C').total_atoms == 1 assert report.category('H').total_atoms == 4 assert report.category('O').total_atoms == 0 assert report.molecule.total_mols == 1 assert report.category('C').sum_abs_atom_err == 0.0 assert report.category('H').sum_abs_atom_err == 0.0 assert report.molecule.sum_abs_total_err == 0.0
def atoms_neighborhoods_charges( graph: nx.Graph, nauty: Nauty, shell: int, atom_type_key: str) -> Generator[Tuple[Atom, str, float], None, None]: """Yields neighborhood hash and partial charge for each atom. Args: nauty: The Nauty instance to use to canonize the neighborhoods. shell: The shell size to use to make the neighborhood atom_type_key: The name of the atom type attribute to use Yields: Tuples containing an atom, the neighborhood hash, and the \ partial charge of the atom. """ for atom in graph.nodes(): if 'partial_charge' not in graph.node[atom]: raise KeyError( 'Missing property "partial_charge" for atom {}'.format(atom)) partial_charge = graph.node[atom]['partial_charge'] key = nauty.canonize_neighborhood(graph, atom, shell, atom_type_key) yield atom, key, partial_charge
def __init__(self, repository: Repository, rounding_digits: Optional[int] = ROUNDING_DIGITS, nauty: Optional[Nauty] = None, **kwargs) -> None: """Create a ChargerBase. Args: repository: The repository to obtain charges from rounding_digits: Number of significant digits to round charges to. nauty: An external Nauty instance to use for canonization """ # These are all protected, not private self._nauty = nauty if nauty is not None else Nauty() self._repo = repository self._rounding_digits = min(max(rounding_digits, 0), MAX_ROUNDING_DIGITS) # To be assigned in derived classes self._collector = None # type: Collector self._solver = None # type: Solver
def __init__(self, shell: int, color_key: str): self.__shell = shell self.__color_key = color_key self.__nauty = Nauty()
def __init__(self, color_key: str): self.__nauty = Nauty() self.__color_key = color_key
def nauty(): nauty = Nauty() yield nauty nauty.__del__()
def cross_validate_molecules( charger_type: str, iacm: bool, data_location: str, data_type: IOType = IOType.LGF, shell: Union[None, int, Iterable[int]] = None, repo: Optional[Repository] = None, bucket: int = 0, num_buckets: int = 1 ) -> ValidationReport: """Cross-validates a particular method on the given molecule data. Runs through all the molecules in the repository, and for each, \ predicts charges using the given charger type and from the rest of \ the molecules in the repository. If iacm is False, the test molecule is stripped of its charge data \ and its IACM atom types, leaving only plain elements. It is then \ matched first against the IACM side of the repository, and if that \ yields no charges, against the plain element side of the repository. If iacm is True, the test molecule is stripped of charges but keeps \ its IACM atom types. It is then matched against the IACM side of the \ repository, and if no matches are found, its plain elements are \ matched against the plain element side. If bucket and num_buckets are specified, then this will only run \ the cross-validation if (molid % num_buckets) == bucket. Args: charger_type: Name of a Charger-derived class implementing an \ assignment method. iacm: Whether to use IACM or plain element atoms. data_location: Path to the directory with the molecule data. data_type: Format of the molecule data to expect. shell: (List of) shell size(s) to use. repo: A Repository with traceable charges. bucket: Cross-validate for this bucket. num_buckets: Total number of buckets that will run. Returns: A dict containing AtomReports per element category, and a MoleculeReport. Keyed by category name, and 'Molecule' for the per-molecule statistics. """ if shell is None: min_shell, max_shell = None, None wanted_shells = None else: if isinstance(shell, int): shell = [shell] min_shell, max_shell = min(shell), max(shell) wanted_shells = sorted(shell, reverse=True) if repo is None: if min_shell is not None: repo = Repository.create_from(data_location, data_type, min_shell, max_shell, traceable=True) else: repo = Repository.create_from(data_location, data_type, traceable=True) if wanted_shells is None: shells = sorted(repo.charges_iacm.keys(), reverse=True) else: shells = [] for s in wanted_shells: if s not in repo.charges_iacm.keys(): msg = 'Shell {} will not be used, as it is not in the repository' warn(msg.format(s)) else: shells.append(s) nauty = Nauty() extension = data_type.get_extension() molids = [int(fn.replace(extension, '')) for fn in os.listdir(data_location) if fn.endswith(extension)] report = ValidationReport() for molid in molids: if (molid % num_buckets) == bucket: #print('molid: {}'.format(molid)) mol_path = os.path.join(data_location, '{}{}'.format(molid, extension)) with open(mol_path, 'r') as f: graph = convert_from(f.read(), data_type) report += cross_validate_molecule(repo, molid, graph, charger_type, shells, iacm, nauty) return report