Esempio n. 1
0
    def __init__(self,
                 min_shell: int = 1,
                 max_shell: int = 7,
                 nauty: Optional[Nauty] = None,
                 traceable: Optional[bool] = False,
                 versioning: Optional[bool] = False) -> None:

        self.__nauty = nauty if nauty else Nauty()
        self.__min_shell = max(min_shell, 0)
        self.__max_shell = max_shell
        self.__versioning = versioning
        self.__traceable = traceable

        if not versioning:
            self.charges_iacm = defaultdict(
                lambda: defaultdict(list))  # type: EitherChargeSet
            self.charges_elem = defaultdict(
                lambda: defaultdict(list))  # type: EitherChargeSet
        else:
            self.charges_iacm = defaultdict(
                lambda: defaultdict(_VersioningList))  # type: EitherChargeSet
            self.charges_elem = defaultdict(
                lambda: defaultdict(_VersioningList))  # type: EitherChargeSet

        if traceable:
            self.iso_iacm = defaultdict(list)
            self.iso_elem = defaultdict(list)
Esempio n. 2
0
def test_create():
    nauty = Nauty()
    assert nauty.exe != ''
    nauty_path = Path(nauty.exe)
    assert nauty_path.exists()
    assert nauty_path.is_file()
    assert nauty_path.stat().st_mode & stat.S_IXUSR
    assert not nauty._Nauty__process.poll()
Esempio n. 3
0
class _CanonicalizationWorker:
    """Returns a canonical hash of a graph.

    Isomorphic graphs return the same hash (key).
    """
    def __init__(self, color_key: str):
        self.__nauty = Nauty()
        self.__color_key = color_key

    def process(self, molid: int, graph: nx.Graph) -> Tuple[int, str]:
        return molid, self.__nauty.canonize(graph, color_key=self.__color_key)
Esempio n. 4
0
    def __init__(self,
                 repository: Repository,
                 rounding_digits: int,
                 nauty: Optional[Nauty] = None) -> None:
        """Create a SimpleCollector.

        Args:
            repository: The repository to collect charges from
            rounding_digits: Number of digits to round charges to
            nauty: An external Nauty instance to use
        """
        self._repository = repository
        self._rounding_digits = rounding_digits
        self._nauty = nauty if nauty is not None else Nauty()
Esempio n. 5
0
def test_cross_validate_molecule(mock_traceable_methane_repository,
                                 ref_graph_charged, charger_iacm):
    charger, iacm = charger_iacm
    nauty = Nauty()

    report = cross_validate_molecule(mock_traceable_methane_repository, 1,
                                     ref_graph_charged, charger, [1], iacm,
                                     nauty)

    assert report.category('C').total_atoms == 1
    assert report.category('H').total_atoms == 4
    assert report.category('O').total_atoms == 0
    assert report.molecule.total_mols == 1
    assert report.category('C').sum_abs_atom_err == 0.0
    assert report.category('H').sum_abs_atom_err == 0.0
    assert report.molecule.sum_abs_total_err == 0.0
Esempio n. 6
0
def atoms_neighborhoods_charges(
        graph: nx.Graph, nauty: Nauty, shell: int,
        atom_type_key: str) -> Generator[Tuple[Atom, str, float], None, None]:
    """Yields neighborhood hash and partial charge for each atom.

    Args:
        nauty: The Nauty instance to use to canonize the neighborhoods.
        shell: The shell size to use to make the neighborhood
        atom_type_key: The name of the atom type attribute to use

    Yields:
        Tuples containing an atom, the neighborhood hash, and the \
                partial charge of the atom.
    """
    for atom in graph.nodes():
        if 'partial_charge' not in graph.node[atom]:
            raise KeyError(
                'Missing property "partial_charge" for atom {}'.format(atom))
        partial_charge = graph.node[atom]['partial_charge']
        key = nauty.canonize_neighborhood(graph, atom, shell, atom_type_key)
        yield atom, key, partial_charge
Esempio n. 7
0
    def __init__(self,
                 repository: Repository,
                 rounding_digits: Optional[int] = ROUNDING_DIGITS,
                 nauty: Optional[Nauty] = None,
                 **kwargs) -> None:
        """Create a ChargerBase.

        Args:
            repository: The repository to obtain charges from
            rounding_digits: Number of significant digits to round charges to.
            nauty: An external Nauty instance to use for canonization
        """
        # These are all protected, not private
        self._nauty = nauty if nauty is not None else Nauty()
        self._repo = repository
        self._rounding_digits = min(max(rounding_digits, 0),
                                    MAX_ROUNDING_DIGITS)

        # To be assigned in derived classes
        self._collector = None  # type: Collector
        self._solver = None  # type: Solver
Esempio n. 8
0
 def __init__(self, shell: int, color_key: str):
     self.__shell = shell
     self.__color_key = color_key
     self.__nauty = Nauty()
Esempio n. 9
0
 def __init__(self, color_key: str):
     self.__nauty = Nauty()
     self.__color_key = color_key
Esempio n. 10
0
def nauty():
    nauty = Nauty()
    yield nauty
    nauty.__del__()
Esempio n. 11
0
def cross_validate_molecules(
        charger_type: str,
        iacm: bool,
        data_location: str,
        data_type: IOType = IOType.LGF,
        shell: Union[None, int, Iterable[int]] = None,
        repo: Optional[Repository] = None,
        bucket: int = 0,
        num_buckets: int = 1
        ) -> ValidationReport:
    """Cross-validates a particular method on the given molecule data.

    Runs through all the molecules in the repository, and for each, \
    predicts charges using the given charger type and from the rest of \
    the molecules in the repository.

    If iacm is False, the test molecule is stripped of its charge data \
    and its IACM atom types, leaving only plain elements. It is then \
    matched first against the IACM side of the repository, and if that \
    yields no charges, against the plain element side of the repository.

    If iacm is True, the test molecule is stripped of charges but keeps \
    its IACM atom types. It is then matched against the IACM side of the \
    repository, and if no matches are found, its plain elements are \
    matched against the plain element side.

    If bucket and num_buckets are specified, then this will only run \
    the cross-validation if (molid % num_buckets) == bucket.

    Args:
        charger_type: Name of a Charger-derived class implementing an \
                assignment method.
        iacm: Whether to use IACM or plain element atoms.
        data_location: Path to the directory with the molecule data.
        data_type: Format of the molecule data to expect.
        shell: (List of) shell size(s) to use.
        repo: A Repository with traceable charges.
        bucket: Cross-validate for this bucket.
        num_buckets: Total number of buckets that will run.

    Returns:
        A dict containing AtomReports per element category, and a
        MoleculeReport. Keyed by category name, and 'Molecule' for
        the per-molecule statistics.
    """
    if shell is None:
        min_shell, max_shell = None, None
        wanted_shells = None
    else:
        if isinstance(shell, int):
            shell = [shell]
        min_shell, max_shell = min(shell), max(shell)
        wanted_shells = sorted(shell, reverse=True)

    if repo is None:
        if min_shell is not None:
            repo = Repository.create_from(data_location, data_type, min_shell,
                    max_shell, traceable=True)
        else:
            repo = Repository.create_from(data_location, data_type,
                    traceable=True)

    if wanted_shells is None:
        shells = sorted(repo.charges_iacm.keys(), reverse=True)
    else:
        shells = []
        for s in wanted_shells:
            if s not in repo.charges_iacm.keys():
                msg = 'Shell {} will not be used, as it is not in the repository'
                warn(msg.format(s))
            else:
                shells.append(s)

    nauty = Nauty()

    extension = data_type.get_extension()
    molids = [int(fn.replace(extension, ''))
              for fn in os.listdir(data_location)
              if fn.endswith(extension)]

    report = ValidationReport()

    for molid in molids:
        if (molid % num_buckets) == bucket:
            #print('molid: {}'.format(molid))

            mol_path = os.path.join(data_location, '{}{}'.format(molid, extension))
            with open(mol_path, 'r') as f:
                graph = convert_from(f.read(), data_type)

            report += cross_validate_molecule(repo, molid, graph, charger_type, shells, iacm, nauty)

    return report