def charge_molecule(molecule: bytes, total_charge: int) -> str: """Submit a molecule for charging Accepts input and produces output in Lemon Graph Format, for which there is no MIME type, so this specifies text/plain (which it is). See http://lemon.cs.elte.hu/pub/doc/1.2.3/a00002.html. Args: molecule: Description of the input molecule. total_charge: Desired total charge of the molecule. """ try: graph = convert_from(molecule.decode('utf-8'), IOType.LGF) except (ValueError, AttributeError): return ('Error decoding input, is it valid LGF, and sent as' ' "Content-Type: text/plain" ?'), 400 try: charge_server.charge(graph, total_charge) except AssignmentError: return ('Charges could not be assigned due to lack of data or because' ' the total charge was too far off from our reference charges.' ), 404 lgf_output = convert_to(graph, IOType.LGF) return lgf_output.encode()
def process(self, molid: int) -> Tuple[int, nx.Graph]: filename = os.path.join(self.__data_location, '%d%s' % (molid, self.__extension)) with open(filename, 'r') as f: graph = convert_from(f.read(), self.__data_type) if graph.number_of_nodes() == 0: raise RuntimeError('Molecule with molid {} read from file {}' ' has no atoms! Is this file valid?'.format( molid, filename)) return molid, graph
def test_from_rdkit(ref_graph_rdkit, ref_graph_shifted): graph = convert_from(ref_graph_rdkit, IOType.RDKIT) assert_same_graph(graph, ref_graph_shifted)
def test_from_itp(ref_graph_itp, ref_graph): graph = convert_from(ref_graph_itp, IOType.ITP) assert_same_graph(graph, ref_graph)
def test_from_gml(ref_graph_gml, ref_graph): graph = convert_from(ref_graph_gml, IOType.GML) assert_same_graph(graph, ref_graph)
def test_invalid_lgf(): invalid_lgf = 'This is not valid LGF' with pytest.raises(ValueError): graph = convert_from(invalid_lgf, IOType.LGF)
def test_from_lgf(ref_graph_lgf, ref_graph): graph = convert_from(ref_graph_lgf, IOType.LGF) assert_same_graph(graph, ref_graph)
def cross_validate_molecules( charger_type: str, iacm: bool, data_location: str, data_type: IOType = IOType.LGF, shell: Union[None, int, Iterable[int]] = None, repo: Optional[Repository] = None, bucket: int = 0, num_buckets: int = 1 ) -> ValidationReport: """Cross-validates a particular method on the given molecule data. Runs through all the molecules in the repository, and for each, \ predicts charges using the given charger type and from the rest of \ the molecules in the repository. If iacm is False, the test molecule is stripped of its charge data \ and its IACM atom types, leaving only plain elements. It is then \ matched first against the IACM side of the repository, and if that \ yields no charges, against the plain element side of the repository. If iacm is True, the test molecule is stripped of charges but keeps \ its IACM atom types. It is then matched against the IACM side of the \ repository, and if no matches are found, its plain elements are \ matched against the plain element side. If bucket and num_buckets are specified, then this will only run \ the cross-validation if (molid % num_buckets) == bucket. Args: charger_type: Name of a Charger-derived class implementing an \ assignment method. iacm: Whether to use IACM or plain element atoms. data_location: Path to the directory with the molecule data. data_type: Format of the molecule data to expect. shell: (List of) shell size(s) to use. repo: A Repository with traceable charges. bucket: Cross-validate for this bucket. num_buckets: Total number of buckets that will run. Returns: A dict containing AtomReports per element category, and a MoleculeReport. Keyed by category name, and 'Molecule' for the per-molecule statistics. """ if shell is None: min_shell, max_shell = None, None wanted_shells = None else: if isinstance(shell, int): shell = [shell] min_shell, max_shell = min(shell), max(shell) wanted_shells = sorted(shell, reverse=True) if repo is None: if min_shell is not None: repo = Repository.create_from(data_location, data_type, min_shell, max_shell, traceable=True) else: repo = Repository.create_from(data_location, data_type, traceable=True) if wanted_shells is None: shells = sorted(repo.charges_iacm.keys(), reverse=True) else: shells = [] for s in wanted_shells: if s not in repo.charges_iacm.keys(): msg = 'Shell {} will not be used, as it is not in the repository' warn(msg.format(s)) else: shells.append(s) nauty = Nauty() extension = data_type.get_extension() molids = [int(fn.replace(extension, '')) for fn in os.listdir(data_location) if fn.endswith(extension)] report = ValidationReport() for molid in molids: if (molid % num_buckets) == bucket: #print('molid: {}'.format(molid)) mol_path = os.path.join(data_location, '{}{}'.format(molid, extension)) with open(mol_path, 'r') as f: graph = convert_from(f.read(), data_type) report += cross_validate_molecule(repo, molid, graph, charger_type, shells, iacm, nauty) return report