Esempio n. 1
0
def charge_molecule(molecule: bytes, total_charge: int) -> str:
    """Submit a molecule for charging

    Accepts input and produces output in Lemon Graph Format, for which
    there is no MIME type, so this specifies text/plain (which it is).
    See http://lemon.cs.elte.hu/pub/doc/1.2.3/a00002.html.

    Args:
        molecule: Description of the input molecule.
        total_charge: Desired total charge of the molecule.
    """
    try:
        graph = convert_from(molecule.decode('utf-8'), IOType.LGF)
    except (ValueError, AttributeError):
        return ('Error decoding input, is it valid LGF, and sent as'
                ' "Content-Type: text/plain" ?'), 400

    try:
        charge_server.charge(graph, total_charge)
    except AssignmentError:
        return ('Charges could not be assigned due to lack of data or because'
                ' the total charge was too far off from our reference charges.'
                ), 404

    lgf_output = convert_to(graph, IOType.LGF)
    return lgf_output.encode()
Esempio n. 2
0
 def process(self, molid: int) -> Tuple[int, nx.Graph]:
     filename = os.path.join(self.__data_location,
                             '%d%s' % (molid, self.__extension))
     with open(filename, 'r') as f:
         graph = convert_from(f.read(), self.__data_type)
         if graph.number_of_nodes() == 0:
             raise RuntimeError('Molecule with molid {} read from file {}'
                                ' has no atoms! Is this file valid?'.format(
                                    molid, filename))
         return molid, graph
Esempio n. 3
0
def test_from_rdkit(ref_graph_rdkit, ref_graph_shifted):
    graph = convert_from(ref_graph_rdkit, IOType.RDKIT)
    assert_same_graph(graph, ref_graph_shifted)
Esempio n. 4
0
def test_from_itp(ref_graph_itp, ref_graph):
    graph = convert_from(ref_graph_itp, IOType.ITP)
    assert_same_graph(graph, ref_graph)
Esempio n. 5
0
def test_from_gml(ref_graph_gml, ref_graph):
    graph = convert_from(ref_graph_gml, IOType.GML)
    assert_same_graph(graph, ref_graph)
Esempio n. 6
0
def test_invalid_lgf():
    invalid_lgf = 'This is not valid LGF'
    with pytest.raises(ValueError):
        graph = convert_from(invalid_lgf, IOType.LGF)
Esempio n. 7
0
def test_from_lgf(ref_graph_lgf, ref_graph):
    graph = convert_from(ref_graph_lgf, IOType.LGF)
    assert_same_graph(graph, ref_graph)
Esempio n. 8
0
def cross_validate_molecules(
        charger_type: str,
        iacm: bool,
        data_location: str,
        data_type: IOType = IOType.LGF,
        shell: Union[None, int, Iterable[int]] = None,
        repo: Optional[Repository] = None,
        bucket: int = 0,
        num_buckets: int = 1
        ) -> ValidationReport:
    """Cross-validates a particular method on the given molecule data.

    Runs through all the molecules in the repository, and for each, \
    predicts charges using the given charger type and from the rest of \
    the molecules in the repository.

    If iacm is False, the test molecule is stripped of its charge data \
    and its IACM atom types, leaving only plain elements. It is then \
    matched first against the IACM side of the repository, and if that \
    yields no charges, against the plain element side of the repository.

    If iacm is True, the test molecule is stripped of charges but keeps \
    its IACM atom types. It is then matched against the IACM side of the \
    repository, and if no matches are found, its plain elements are \
    matched against the plain element side.

    If bucket and num_buckets are specified, then this will only run \
    the cross-validation if (molid % num_buckets) == bucket.

    Args:
        charger_type: Name of a Charger-derived class implementing an \
                assignment method.
        iacm: Whether to use IACM or plain element atoms.
        data_location: Path to the directory with the molecule data.
        data_type: Format of the molecule data to expect.
        shell: (List of) shell size(s) to use.
        repo: A Repository with traceable charges.
        bucket: Cross-validate for this bucket.
        num_buckets: Total number of buckets that will run.

    Returns:
        A dict containing AtomReports per element category, and a
        MoleculeReport. Keyed by category name, and 'Molecule' for
        the per-molecule statistics.
    """
    if shell is None:
        min_shell, max_shell = None, None
        wanted_shells = None
    else:
        if isinstance(shell, int):
            shell = [shell]
        min_shell, max_shell = min(shell), max(shell)
        wanted_shells = sorted(shell, reverse=True)

    if repo is None:
        if min_shell is not None:
            repo = Repository.create_from(data_location, data_type, min_shell,
                    max_shell, traceable=True)
        else:
            repo = Repository.create_from(data_location, data_type,
                    traceable=True)

    if wanted_shells is None:
        shells = sorted(repo.charges_iacm.keys(), reverse=True)
    else:
        shells = []
        for s in wanted_shells:
            if s not in repo.charges_iacm.keys():
                msg = 'Shell {} will not be used, as it is not in the repository'
                warn(msg.format(s))
            else:
                shells.append(s)

    nauty = Nauty()

    extension = data_type.get_extension()
    molids = [int(fn.replace(extension, ''))
              for fn in os.listdir(data_location)
              if fn.endswith(extension)]

    report = ValidationReport()

    for molid in molids:
        if (molid % num_buckets) == bucket:
            #print('molid: {}'.format(molid))

            mol_path = os.path.join(data_location, '{}{}'.format(molid, extension))
            with open(mol_path, 'r') as f:
                graph = convert_from(f.read(), data_type)

            report += cross_validate_molecule(repo, molid, graph, charger_type, shells, iacm, nauty)

    return report