def determine_unique_configurations(configurations):

    cutoff = float(np.max(configurations[0].cell.array) / 2 + 1)

    unique_reps, unique_config, reps, count_configs = [], [], [], []
    schnet = SchNet(n_atom_basis=32,
                    n_filters=32,
                    n_interactions=1,
                    cutoff=cutoff,
                    cutoff_network=CosineCutoff)
    env = AseEnvironmentProvider(cutoff=cutoff)

    data = [posinp_to_ase_atoms(pos) for pos in configurations]
    data = SchnetPackData(data=data,
                          environment_provider=env,
                          collect_triples=False)
    data_loader = AtomsLoader(data, batch_size=1)

    for batch in data_loader:
        reps.append(torch.squeeze(schnet(batch)))

    for i, rep in enumerate(reps):
        for j, uni in enumerate(unique_reps):
            if compare_reps(rep, uni):
                count_configs[j] += 1
                break
        else:
            unique_reps.append(rep)
            unique_config.append(configurations[i])
            count_configs.append(1)
    return unique_config, count_configs
Example #2
0
        json.dumps(run_params).encode()).hexdigest()[:6]

    # Determine the output directory
    test_dir = os.path.join(
        'networks',
        f'T{args.num_messages}_b{args.batch_size}_n{args.num_epochs}_{params_hash}'
    )
    os.makedirs(test_dir)
    with open(os.path.join(test_dir, 'config.json'), 'w') as fp:
        json.dump(run_params, fp)

    # Making the data loaders
    train_data = AtomsData('datasets/train.db')
    train_loader = AtomsLoader(train_data,
                               args.batch_size,
                               shuffle=True,
                               pin_memory=True,
                               num_workers=2)
    test_data = AtomsData('datasets/test.db')
    test_loader = AtomsLoader(test_data, args.batch_size)
    valid_data = AtomsData('datasets/valid.db')
    valid_loader = AtomsLoader(valid_data,
                               args.batch_size,
                               pin_memory=True,
                               num_workers=2)

    # Make the model
    mean, std = train_loader.get_statistics('ip',
                                            divide_by_atoms=args.atomwise)
    model = build_fn(atom_features=args.atom_features,
                     message_steps=args.num_messages,
Example #3
0
    run_params = args.__dict__
    params_hash = hashlib.sha256(
        json.dumps(run_params).encode()).hexdigest()[:6]

    # Determine the output directory
    test_dir = os.path.join(
        'networks',
        f'T{args.num_messages}_b{args.batch_size}_n{args.num_epochs}_{params_hash}'
    )
    os.makedirs(test_dir)
    with open(os.path.join(test_dir, 'config.json'), 'w') as fp:
        json.dump(run_params, fp)

    # Making the data loaders
    train_data = AtomsData('datasets/train.db')
    train_loader = AtomsLoader(train_data, args.batch_size, shuffle=True)
    test_data = AtomsData('datasets/test.db')
    test_loader = AtomsLoader(test_data, args.batch_size)
    valid_data = AtomsData('datasets/valid.db')
    valid_loader = AtomsLoader(valid_data, args.batch_size)

    # Make the model
    mean, std = train_loader.get_statistics('delta',
                                            divide_by_atoms=args.atomwise)
    model = build_fn(atom_features=args.atom_features,
                     message_steps=args.num_messages,
                     output_layers=args.output_layers,
                     reduce_fn=args.readout_fn,
                     atomwise=args.atomwise,
                     mean=mean['delta'],
                     std=std['delta'])
Example #4
0
    def run(
        self,
        property,
        posinp=None,
        batch_size=128,
    ):
        r"""
        Central method to use when making a calculation with
        the calculator.

        Parameters
        ----------
        property : str
            Property to be predicted by the calculator
        posinp : Posinp
            Atomic configuration to pass to the model
        batch_size : int
            Batch sizes. Default is 128.

        Returns
        -------
        predictions : :class:`numpy.ndarray`
            Corresponding prediction by the model.
        """
        init_property, out_name, derivative, wrt = get_derivative_names(
            property, self.available_properties)
        if abs(derivative) >= 1:
            self.model.output_modules[0].create_graph = True

        if len(posinp) > 1 and derivative:
            batch_size = 1

        data = [posinp_to_ase_atoms(pos) for pos in posinp]
        pbc = True if any(pos.pbc.any() for pos in data) else False
        environment_provider = (AseEnvironmentProvider(
            cutoff=self.cutoff) if pbc else SimpleEnvironmentProvider())
        data = SchnetPackData(
            data=data,
            environment_provider=environment_provider,
            collect_triples=self.model_type == "wacsf",
        )
        data_loader = AtomsLoader(data, batch_size=batch_size)

        pred = []
        if derivative == 0:
            if self.model.output_modules[0].derivative is not None:
                for batch in data_loader:
                    batch = {k: v.to(self.device) for k, v in batch.items()}
                    pred.append(self.model(batch))
            else:
                with torch.no_grad():
                    for batch in data_loader:
                        batch = {
                            k: v.to(self.device)
                            for k, v in batch.items()
                        }
                        pred.append(self.model(batch))
        if abs(derivative) == 1:
            for batch in data_loader:
                batch = {k: v.to(self.device) for k, v in batch.items()}
                batch[wrt[0]].requires_grad_()
                results = self.model(batch)
                deriv1 = torch.unsqueeze(
                    torch_derivative(results[init_property], batch[wrt[0]]), 0)
                if derivative < 0:
                    deriv1 = -1.0 * deriv1
                pred.append({out_name: deriv1})
        if abs(derivative) == 2:
            for batch in data_loader:
                batch = {k: v.to(self.device) for k, v in batch.items()}
                for inp in set(wrt):
                    batch[inp].requires_grad_()
                results = self.model(batch)
                deriv2 = torch.unsqueeze(
                    torch_derivative(
                        torch_derivative(
                            results[init_property],
                            batch[wrt[0]],
                            create_graph=True,
                        ),
                        batch[wrt[0]],
                    ),
                    0,
                )
                if derivative < 0:
                    deriv2 = -1.0 * deriv2
                pred.append({out_name: deriv2})

        predictions = {}
        if self.md:
            for p in ["energy", "forces"]:
                predictions[p] = np.concatenate(
                    [batch[p].cpu().detach().numpy() for batch in pred])
        else:
            if derivative:
                predictions[property] = np.concatenate(
                    [batch[out_name].cpu().detach().numpy() for batch in pred])
            else:
                predictions[property] = np.concatenate([
                    batch[init_property].cpu().detach().numpy()
                    for batch in pred
                ])
        return predictions
Example #5
0
    def run(
        self,
        property,
        posinp=None,
        batch_size=1,
    ):
        r"""
        Central method to use when making a calculation with
        the calculator.

        Parameters
        ----------
        property : str
            Property to be predicted by the calculator
        posinp : Posinp
            Atomic configuration to pass to the model

        Returns
        -------
        predictions : :class:`numpy.ndarray`
            Corresponding prediction by the model.
        """

        # Initial setup
        assert (
            len(posinp) == 1
        ), "Use the PatchSPCalculator for one configuration at a time."
        atoms = posinp_to_ase_atoms(posinp[0])

        if property == "hessian" and any(self.subgrid == 2):
            raise warnings.warn(
                """
            The hessian matrix can have some bad values with a grid of
            size 2 because the same atom can be copied multiple times
            in the buffers of the same subcell. Use a larger grid.
            """
            )

        init_property, out_name, derivative, wrt = get_derivative_names(
            property, self.available_properties
        )
        if abs(derivative) >= 1:
            self.model.output_modules[0].create_graph = True

        pbc = True if atoms.pbc.any() else False
        environment_provider = (
            AseEnvironmentProvider(cutoff=self.cutoff)
            if pbc
            else SimpleEnvironmentProvider()
        )

        # Split the configuration according to the subgrid
        at_to_patches = AtomsToPatches(
            cutoff=self.cutoff, n_interaction=self.n_interaction, grid=self.subgrid
        )
        (
            subcells,
            subcells_main_idx,
            original_cell_idx,
            complete_subcell_copy_idx,
        ) = at_to_patches.split_atoms(atoms)

        # Pass each subcell independantly
        results = []
        for subcell in subcells:
            data = SchnetPackData(
                data=[subcell],
                environment_provider=environment_provider,
                collect_triples=self.model_type == "wacsf",
            )
            data_loader = AtomsLoader(data, batch_size=1)

            if derivative == 0:
                if self.model.output_modules[0].derivative is not None:
                    for batch in data_loader:
                        batch = {k: v.to(self.device) for k, v in batch.items()}
                        results.append(self.model(batch))
                else:
                    with torch.no_grad():
                        for batch in data_loader:
                            batch = {k: v.to(self.device) for k, v in batch.items()}
                            results.append(self.model(batch))

            if abs(derivative) == 1:
                for batch in data_loader:
                    batch = {k: v.to(self.device) for k, v in batch.items()}
                    batch[wrt[0]].requires_grad_()
                    forward_results = self.model(batch)
                    deriv1 = torch_derivative(
                        forward_results[init_property], batch[wrt[0]]
                    )
                    if derivative < 0:
                        deriv1 = -1.0 * deriv1
                    results.append({out_name: deriv1})

            if abs(derivative) == 2:
                raise NotImplementedError()

        predictions = {}
        if property == "energy":
            predictions["energy"] = np.sum(
                [
                    patch["individual_energy"][subcells_main_idx[i]]
                    .detach()
                    .cpu()
                    .numpy()
                    for i, patch in enumerate(results)
                ]
            )

        elif property == "forces":
            forces = np.zeros((len(atoms), 3))
            for i in range(len(results)):
                forces[original_cell_idx[i]] = (
                    results[i]["forces"]
                    .detach()
                    .squeeze()
                    .cpu()
                    .numpy()[subcells_main_idx[i]]
                )
            predictions["forces"] = forces

        elif property == "hessian":
            hessian = np.zeros((3 * len(atoms), 3 * len(atoms)))

            for i in range(len(results)):

                (
                    hessian_original_cell_idx_0,
                    hessian_original_cell_idx_1,
                ) = prepare_hessian_indices(
                    original_cell_idx[i], complete_subcell_copy_idx[i]
                )

                (
                    hessian_subcells_main_idx_0,
                    hessian_subcells_main_idx_1,
                ) = prepare_hessian_indices(
                    subcells_main_idx[i],
                    np.arange(0, len(complete_subcell_copy_idx[i])),
                )

                hessian[hessian_original_cell_idx_0, hessian_original_cell_idx_1] = (
                    results[i]["hessian"]
                    .detach()
                    .squeeze()
                    .cpu()
                    .numpy()[hessian_subcells_main_idx_0, hessian_subcells_main_idx_1]
                )
            predictions["hessian"] = hessian

        else:
            raise NotImplementedError()

        return predictions
Example #6
0
def predict(
    modelpath,
    posinp,
    name=None,
    device="cpu",
    disk_out=True,
    batch_size=128,
    overwrite=False,
    return_values=False,
):

    if overwrite:
        to_remove = [dat for dat in os.listdir() if dat.endswith(".db")]
        for f in to_remove:
            os.remove(f)

    model = load_model(modelpath, device=device)

    if "representation.cutoff.cutoff" in model.state_dict().keys():
        model_type = "wacsf"
        cutoff = float(model.state_dict()["representation.cutoff.cutoff"])
    elif any(name in model.state_dict().keys() for name in [
            "module.representation.embedding.weight",
            "representation.embedding.weight",
    ]):
        model_type = "schnet"
        try:
            cutoff = float(
                model.state_dict()
                ["module.representation.interactions.0.cutoff_network.cutoff"])
        except KeyError:
            cutoff = float(
                model.state_dict()
                ["representation.interactions.0.cutoff_network.cutoff"])
    else:
        raise NotImplementedError("Model type is not recognized.")

    if isinstance(posinp, str):
        if posinp.endswith(".xyz"):
            name = posinp.split("/")[-1].strip(".xyz")
            pos = mybigdft.Posinp.from_file(posinp)
            pbc = False if pos.boundary_conditions == "free" else True
            data = [pos]
        elif posinp.endswith(".db"):
            name = posinp.split("/")[-1].strip(".db")
            data = connect(posinp)
            pbc = True if any(row["pbc"].any()
                              for row in data.select()) else False
        else:
            raise NotImplementedError("File format not supported.")

    elif isinstance(posinp, list):
        if name is None or name == "":
            name = "structures"
        if all([isinstance(pos, mybigdft.Posinp) for pos in posinp]):
            data = [sim.mb_posinp_to_ase_atoms(pos) for pos in posinp]
        else:
            raise TypeError(
                "Posinp should be a list of exclusively mybigdft.Posinp instances."
            )

        pbc = True if any(pos.pbc.any() for pos in data) else False
    else:
        raise TypeError("""
            Positions should be given either as a path to a file or
            database, or as a list of mybigdft.Posinp instances.
            """)

    environment_provider = (AseEnvironmentProvider(
        cutoff=cutoff) if pbc else SimpleEnvironmentProvider())

    data = BigdftAtomsData(
        data=data,
        environment_provider=environment_provider,
        collect_triples=model_type == "wacsf",
    )
    data_loader = AtomsLoader(data, batch_size=batch_size)

    with torch.no_grad():
        pred = []
        for batch in data_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            pred.append(model(batch))

    predictions = {"idx": np.arange(1, len(data) + 1)}
    for property in list(pred[0].keys()):
        predictions[property] = np.concatenate(
            [p[property].cpu().numpy() for p in pred])

    if disk_out:
        outfile = name + ".out"
        with open(outfile, "w") as file:
            wr = csv.writer(file)
            wr.writerow(list(predictions.keys()))
            wr.writerows(
                zip(*[
                    predictions[property]
                    for property in list(predictions.keys())
                ]))

    if return_values:
        return predictions
Example #7
0
def qm9_test_loader(qm9_splits, batch_size, shuffle):
    return AtomsLoader(qm9_splits[2], batch_size=batch_size, shuffle=shuffle)
Example #8
0
def qm9_val_loader(qm9_splits, batch_size, shuffle):
    return AtomsLoader(qm9_splits[1], batch_size=batch_size, shuffle=shuffle)
Example #9
0
def qm9_train_loader(qm9_splits, batch_size, shuffle):
    return AtomsLoader(qm9_splits[0], batch_size=batch_size, shuffle=shuffle)