Exemple #1
0
 def test_get_input(self):
     cg = CrystalGraph(cutoff=4, bond_converter=GaussianDistance(np.linspace(0, 5, 100), 0.5))
     inp = cg.get_input(self.structures[0])
     self.assertEqual(len(inp), 7)
     shapes = [i.shape for i in inp]
     true_shapes = [(1, 28), (1, 704, 100), (1, 1, 2), (1, 704), (1, 704), (1, 28), (1, 704)]
     for i, j in zip(shapes, true_shapes):
         self.assertListEqual(list(i), list(j))
Exemple #2
0
 def test_check_dimension(self):
     gc = CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 20), 0.5))
     s = Structure(Lattice.cubic(3), ['Si'], [[0, 0, 0]])
     graph = gc.convert(s)
     model = MEGNetModel(10, 2, nblocks=1, lr=1e-2,
                         n1=4, n2=4, n3=4, npass=1, ntarget=1,
                         graph_converter=CrystalGraph(bond_converter=gc),
                         )
     with self.assertRaises(Exception) as context:
         model.check_dimension(graph)
         self.assertTrue('The data dimension for bond' in str(context.exception))
    def __init__(self,
                 nfeat_edge=None,
                 nfeat_global=None,
                 nfeat_node=None,
                 nblocks=3,
                 lr=1e-3,
                 n1=64,
                 n2=32,
                 n3=16,
                 nvocal=95,
                 embedding_dim=16,
                 nbvocal=None,
                 bond_embedding_dim=None,
                 ngvocal=None,
                 global_embedding_dim=None,
                 npass=3,
                 ntarget=1,
                 act=softplus2,
                 is_classification=False,
                 loss="mse",
                 l2_coef=None,
                 dropout=None,
                 graph_converter=None,
                 optimizer_kwargs=None):

        # Build th MEG Model
        model = make_megnet_model(nfeat_edge=nfeat_edge,
                                  nfeat_global=nfeat_global,
                                  nfeat_node=nfeat_node,
                                  nblocks=nblocks,
                                  n1=n1,
                                  n2=n2,
                                  n3=n3,
                                  nvocal=nvocal,
                                  embedding_dim=embedding_dim,
                                  nbvocal=nbvocal,
                                  bond_embedding_dim=bond_embedding_dim,
                                  ngvocal=ngvocal,
                                  global_embedding_dim=global_embedding_dim,
                                  npass=npass,
                                  ntarget=ntarget,
                                  act=act,
                                  is_classification=is_classification,
                                  l2_coef=l2_coef,
                                  dropout=dropout)

        # Compile the model with the optimizer
        loss = 'binary_crossentropy' if is_classification else loss

        opt_params = {'lr': lr}
        if optimizer_kwargs is not None:
            opt_params.update(optimizer_kwargs)
        model.compile(Adam(**opt_params), loss)

        if graph_converter is None:
            graph_converter = CrystalGraph(cutoff=4,
                                           bond_converter=GaussianDistance(
                                               np.linspace(0, 5, 100), 0.5))

        super().__init__(model=model, graph_converter=graph_converter)
Exemple #4
0
    def setUpClass(cls):
        cls.n_feature = 3
        cls.n_bond_features = 10
        cls.n_global_features = 2

        class Generator(Sequence):
            def __init__(self, x, y):
                self.x = x
                self.y = y
            def __len__(self):
                return 10
            def __getitem__(self, index):
                return  self.x, self.y

        x_crystal = [np.array([1, 2, 3, 4]).reshape((1, -1)),
                     np.random.normal(size=(1, 6, cls.n_bond_features)),
                     np.random.normal(size=(1, 2, cls.n_global_features)),
                     np.array([[0, 0, 1, 1, 2, 3]]),
                     np.array([[1, 1, 0, 0, 3, 2]]),
                     np.array([[0, 0, 1, 1]]),
                     np.array([[0, 0, 0, 0, 1, 1]]),
                     ]

        y = np.random.normal(size=(1, 2, 1))
        cls.train_gen_crystal = Generator(x_crystal, y)
        x_mol = [np.random.normal(size=(1, 4, cls.n_feature)),
                 np.random.normal(size=(1, 6, cls.n_bond_features)),
                 np.random.normal(size=(1, 2, cls.n_global_features)),
                 np.array([[0, 0, 1, 1, 2, 3]]),
                 np.array([[1, 1, 0, 0, 3, 2]]),
                 np.array([[0, 0, 1, 1]]),
                 np.array([[0, 0, 0, 0, 1, 1]]),
                 ]
        y = np.random.normal(size=(1, 2, 1))
        cls.train_gen_mol = Generator(x_mol, y)

        cls.model = MEGNetModel(10, 2, nblocks=1, lr=1e-2,
                                n1=4, n2=4, n3=4, npass=1, ntarget=1,
                                graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5)),
                                )
        cls.model2 = MEGNetModel(10, 2, nblocks=1, lr=1e-2,
                                 n1=4, n2=4, n3=4, npass=1, ntarget=2,
                                 graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5)),
                                 )
    def setUpClass(cls):
        cls.n_feature = 3
        cls.n_bond_features = 10
        cls.n_global_features = 2

        def generator(x, y):
            while True:
                yield x, y

        x_crystal = [
            np.array([1, 2, 3, 4]).reshape((1, -1)),
            np.random.normal(size=(1, 6, cls.n_bond_features)),
            np.random.normal(size=(1, 2, cls.n_global_features)),
            np.array([[0, 0, 1, 1, 2, 3]]),
            np.array([[1, 1, 0, 0, 3, 2]]),
            np.array([[0, 0, 1, 1]]),
            np.array([[0, 0, 0, 0, 1, 1]]),
        ]

        y = np.random.normal(size=(1, 2, 1))
        cls.train_gen_crystal = generator(x_crystal, y)
        x_mol = [
            np.random.normal(size=(1, 4, cls.n_feature)),
            np.random.normal(size=(1, 6, cls.n_bond_features)),
            np.random.normal(size=(1, 2, cls.n_global_features)),
            np.array([[0, 0, 1, 1, 2, 3]]),
            np.array([[1, 1, 0, 0, 3, 2]]),
            np.array([[0, 0, 1, 1]]),
            np.array([[0, 0, 0, 0, 1, 1]]),
        ]
        y = np.random.normal(size=(1, 2, 1))
        cls.train_gen_mol = generator(x_mol, y)

        cls.model = MEGNetModel(
            10,
            2,
            nblocks=1,
            lr=1e-2,
            n1=4,
            n2=4,
            n3=4,
            npass=1,
            ntarget=1,
            graph_convertor=CrystalGraph(
                bond_convertor=GaussianDistance(np.linspace(0, 5, 10), 0.5)),
        )
Exemple #6
0
 def test_crystalgraph(self):
     cg = CrystalGraph(cutoff=4)
     graph = cg.convert(self.structures[0])
     self.assertEqual(cg.cutoff, 4)
     keys = set(graph.keys())
     self.assertSetEqual({"bond", "atom", "index1", "index2", "state"}, keys)
     cg2 = CrystalGraph(cutoff=6)
     self.assertEqual(cg2.cutoff, 6)
     graph2 = cg2.convert(self.structures[0])
     self.assertListEqual(to_list(graph2["state"][0]), [0, 0])
     graph3 = cg(self.structures[0])
     np.testing.assert_almost_equal(graph["atom"], graph3["atom"])
 def test_crystalgraph(self):
     cg = CrystalGraph(cutoff=4)
     graph = cg.convert(self.structures[0])
     self.assertEqual(cg.cutoff, 4)
     keys = set(graph.keys())
     self.assertSetEqual({"bond", "atom", "index1", "index2", "state"},
                         keys)
     cg2 = CrystalGraph(cutoff=6)
     self.assertEqual(cg2.cutoff, 6)
     graph2 = cg2.convert(self.structures[0])
     self.assertListEqual(graph2['state'][0], [0, 0])
     graph3 = cg(self.structures[0])
     self.assertListEqual(graph['atom'], graph3['atom'])
Exemple #8
0
 def test_crystalgraph(self):
     cg = CrystalGraph()
     graph = cg.convert(self.structures[0])
     self.assertEqual(cg.r, 4)
     keys = set(graph.keys())
     self.assertSetEqual({"distance", "node", "index1", "index2", "state"},
                         keys)
     cg2 = CrystalGraph(r=6)
     self.assertEqual(cg2.r, 6)
     graph2 = cg2.convert(self.structures[0])
     self.assertListEqual(graph2['state'][0], [0, 0])
     graph3 = cg(self.structures[0])
     self.assertListEqual(graph['node'], graph3['node'])
Exemple #9
0
def prepare_model_megnet(individuals, epochs, outfile, excl=[]):
    # prepares model file
    # prepares Megnet model based on list of individuals
    # uses total energy per atom
    # excl - excluding particular stoichiometry - important for network learning
    structures = []
    energies = []
    adapt = AseAtomsAdaptor()
    empty = 0
    if not excl:
        empty = 1

    i = 0
    for ind in individuals:
        struct_ase = ind.get_init_structure()
        chem_sym = struct_ase.get_chemical_symbols()
        e_tot = ind.e_tot
        struct_pymatgen = adapt.get_structure(struct_ase)
        flag = 1
        if empty == 0 and chem_sym == excl:
            flag = 0

        if flag == 1:
            structures.append(struct_pymatgen)
            energies.append(e_tot)
            i = i + 1

    print("read data of " + str(i) + " structures total")

    # standard vales as taken from Megnet manual
    nfeat_bond = 100
    nfeat_global = 2
    r_cutoff = 5
    gaussian_centers = np.linspace(0, r_cutoff + 1, nfeat_bond)
    gaussian_width = 0.5
    distance_converter = GaussianDistance(gaussian_centers, gaussian_width)
    graph_converter = CrystalGraph(bond_converter=distance_converter, cutoff=r_cutoff)
    model = MEGNetModel(nfeat_bond, nfeat_global, graph_converter=graph_converter)

    # model training
    model.train(structures, energies, epochs=epochs)

    model.save_model(outfile)
Exemple #10
0
 def test_crystal_model_v2(self):
     cg = CrystalGraph()
     s = Structure(Lattice.cubic(3), ['Si'], [[0, 0, 0]])
     with ScratchDir('.'):
         model = MEGNetModel(nfeat_edge=None,
                             nfeat_global=2,
                             nblocks=1,
                             lr=1e-2,
                             n1=4,
                             n2=4,
                             n3=4,
                             npass=1,
                             ntarget=1,
                             graph_converter=cg,
                             centers=np.linspace(0, 4, 10),
                             width=0.5)
         model = model.train([s, s], [0.1, 0.1], epochs=2)
         t = model.predict_structure(s)
         self.assertTrue(t.shape == (1, ))
Exemple #11
0
 def test_train_pred(self):
     model = megnet_model(10, 2, n_blocks=1, lr=1e-2,
                          n1=4, n2=4, n3=4, n_pass=1, n_target=1,
                          graph_convertor=CrystalGraph(),
                          distance_convertor=GaussianDistance(np.linspace(0, 5, 10), 0.5))
     s = Structure.from_file(os.path.join(cwd, '../data/tests/cifs/BaTiO3_mp-2998_computed.cif'))
     structures = [s] * 4
     targets = [0.1, 0.1, 0.1, 0.1]
     model.train(structures,
                 targets,
                 validation_structures=structures[:2],
                 validation_targets=[0.1, 0.1],
                 batch_size=2,
                 epochs=1,
                 verbose=2)
     preds = model.predict_structure(structures[0])
     if os.path.isdir('callback'):
         shutil.rmtree('callback')
     self.assertTrue(np.size(preds) == 1)
Exemple #12
0
def default_megnet_config(
    nfeat_bond: int = 100, r_cutoff: float = 5.0, gaussian_width: float = 0.5
) -> dict:
    """Get sensible defaults for MEGNetModel configuration.

    These arguments are taken from the `MEGNet README file
    <https://github.com/materialsvirtuallab/megnet#training-a-new-megnetmodel-from-structures>`_.

    Examples:
        Create a MEGNetModel using these defaults:

        >>> model = MEGNetModel(**default_megnet_config())

    """
    gaussian_centres = np.linspace(0, r_cutoff + 1, nfeat_bond)
    graph_converter = CrystalGraph(cutoff=r_cutoff)
    return {
        "graph_converter": graph_converter,
        "centers": gaussian_centres,
        "width": gaussian_width,
    }
Exemple #13
0
tf.compat.v1.disable_eager_execution()

## Import megnet related modules
from megnet.callbacks import ManualStop, ReduceLRUponNan
from megnet.data.crystal import CrystalGraph
from megnet.data.graph import GaussianDistance, GraphBatchDistanceConvert
from megnet.models import MEGNetModel

## Set GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

#  2. Model construction
##  Graph converter
crystal_graph = CrystalGraph(bond_converter=GaussianDistance(
    centers=np.linspace(0, 6, 100), width=0.5),
                             cutoff=5.0)
## model setup
model = MEGNetModel(
    nfeat_edge=100,
    nfeat_global=None,
    ngvocal=len(TRAIN_FIDELITIES),
    global_embedding_dim=16,
    nblocks=3,
    nvocal=95,
    npass=2,
    graph_converter=crystal_graph,
    lr=1e-3,
)

#  3. Data loading and processing
 def test_get_flat_data(self):
     cg = CrystalGraph(cutoff=4)
     graphs = [cg.convert(i) for i in self.structures]
     targets = [0.1, 0.2]
     inp = cg.get_flat_data(graphs, targets)
     self.assertListEqual([len(i) for i in inp], [2] * 6)
 def test_convert(self):
     cg = CrystalGraph(cutoff=4)
     graph = cg.convert(self.structures[0])
     self.assertListEqual(graph['atom'],
                          [i.specie.Z for i in self.structures[0]])
Exemple #16
0
def megnet_input(prop, ZeroVals, bond, nfeat_global, cutoff, width, *fraction):
    """
    megnet_input(prop, ZeroVals, bond, nfeat_global, cutoff, width, *fraction)

    Extracts valid structures and targets and splits them into user specified
    datsets. 

    Inputs:
    prop-                   Optical property of interest. 
    ZeroVals-               Exclude/Include zero optical property values.
    bond-                   MEGNet feature bond.
    nfeat_global-           MEGNet feature global.
    cutoff-                 MEGNet MEGNet radial cutoff. 
    width-                  MEGNet gaussian width.
    *fraction-              Fraction of data to split into training and 
                            validation sets. Passing an extra argument to 
                            split data based on quantity is permissible.

    Outputs:
    1-                      Featurised structures for training with 
                            MEGNet. 
    2-                      Valid structures and targets.
    3-                      Inputs for extraction of activations. 
    4-                      Pool, test, training and validation sets. 
    """
    logging.info("Get graph inputs to MEGNet ...")
    print("Bond features = ", bond)
    print("Global features = ", nfeat_global)
    print("Radial cutoff = ", cutoff)
    print("Gaussian width = ", width)
    gaussian_centers = np.linspace(0, cutoff, bond)
    distance_converter = GaussianDistance(gaussian_centers, width)
    graph_converter = CrystalGraph(bond_converter=distance_converter)
    model = MEGNetModel(bond, nfeat_global, graph_converter=graph_converter)

    datafile = "%s_data.pkl" % prop
    inputs = pd.read_pickle(datafile)
    print("\nNumber of input entries found for %s data = %s" %
          (prop, len(inputs)))
    if ZeroVals == False:
        logging.info(
            "Excluding zero optical property values from the dataset ...")
        mask = np.array(
            [i for i, val in enumerate(inputs[prop]) if abs(val) == 0.])
        structures = np.delete(inputs["structure"].to_numpy(), mask)
        targets = np.delete(inputs[prop].to_numpy(), mask)
        print("Remaining number of entries = %s" % len(targets))
    else:
        logging.info("Zero optical property values will be included ...")
        structures = inputs["structure"].to_numpy()
        targets = inputs[prop].to_numpy()

    # Get the valid structures and targets i.e exclude isolated atoms
    logging.info("Obtaining valid structures and targets ...")
    valid_structures = []
    valid_targets = []
    activations_input_full = []
    for s, t in zip(structures, targets):
        try:
            activations_input_full.append(
                StructureGraph.get_input(graph_converter, s))
        except:
            print("Skipping structure with isolated atom ...")
            continue
        valid_structures.append(s)
        valid_targets.append(t)
    print("Number of invalid structures = %s" %
          (len(targets) - len(valid_targets)))
    print("\nTotal number of entries available for analysis = %s" %
          len(valid_targets))

    pool_frac = fraction[0][0]
    if len(fraction) == 1:
        if (fraction[0][0] + fraction[0][1]) == 1.:
            # For train-test split and k-fold cross-validation
            test_frac = fraction[0][1]

            logging.info("The pool is the same as the training set ...")
            print("Requested pool: %s%%" % (pool_frac * 100))
            print("Requested test set: %s%%" % (test_frac * 100))

            # Data split is based on percentages
            pool_boundary = int(len(valid_targets) * pool_frac)
            Xpool = np.array(valid_structures[0:pool_boundary])
            ypool = np.array(valid_targets[0:pool_boundary])
            Xtest = np.array(valid_structures[pool_boundary:])
            ytest = np.array(valid_targets[pool_boundary:])

            logging.info("The pool is the same as the training set ...")
            print("Pool:", ypool.shape)
            print("Test set:", ytest.shape)
            return (model, activations_input_full, valid_structures,
                    valid_targets, Xpool, ypool, Xtest, ytest)

        elif (fraction[0][0] + fraction[0][1]) < 1.:
            #  For repeat active learning
            val_frac = fraction[0][1]
            test_frac = np.round(1 - pool_frac, decimals=2)

            pool_boundary = int(len(valid_targets) * pool_frac)
            Xpool = np.array(valid_structures[0:pool_boundary])
            ypool = np.array(valid_targets[0:pool_boundary])
            Xtest = np.array(valid_structures[pool_boundary:])
            ytest = np.array(valid_targets[pool_boundary:])

            val_boundary = int(pool_boundary * val_frac)
            Xtrain = Xpool[:-val_boundary]
            ytrain = ypool[:-val_boundary]

            Xval = Xpool[-val_boundary:]
            yval = ypool[-val_boundary:]
            print("Requested validation set: %s%% of pool" % (val_frac * 100))
            print("Training set:", ytrain.shape)
            print("Validation set:", yval.shape)
            print("Test set:", ytest.shape)
            return (model, activations_input_full, valid_structures,
                    valid_targets, Xpool, ypool, Xtest, ytest, Xtrain, ytrain,
                    Xval, yval)

    else:
        return (model, activations_input_full, np.array(valid_structures),
                np.array(valid_targets))
Exemple #17
0
    def __init__(self,
                 nfeat_edge: int = None,
                 nfeat_global: int = None,
                 nfeat_node: int = None,
                 nblocks: int = 3,
                 lr: float = 1e-3,
                 n1: int = 64,
                 n2: int = 32,
                 n3: int = 16,
                 nvocal: int = 95,
                 embedding_dim: int = 16,
                 nbvocal: int = None,
                 bond_embedding_dim: int = None,
                 ngvocal: int = None,
                 global_embedding_dim: int = None,
                 npass: int = 3,
                 ntarget: int = 1,
                 act: Callable = softplus2,
                 is_classification: bool = False,
                 loss: str = "mse",
                 metrics: List[str] = None,
                 l2_coef: float = None,
                 dropout: float = None,
                 graph_converter: StructureGraph = None,
                 target_scaler: Scaler = DummyScaler(),
                 optimizer_kwargs: Dict = None,
                 dropout_on_predict: bool = False):
        """
        Args:
            nfeat_edge: (int) number of bond features
            nfeat_global: (int) number of state features
            nfeat_node: (int) number of atom features
            nblocks: (int) number of MEGNetLayer blocks
            lr: (float) learning rate
            n1: (int) number of hidden units in layer 1 in MEGNetLayer
            n2: (int) number of hidden units in layer 2 in MEGNetLayer
            n3: (int) number of hidden units in layer 3 in MEGNetLayer
            nvocal: (int) number of total element
            embedding_dim: (int) number of embedding dimension
            nbvocal: (int) number of bond types if bond attributes are types
            bond_embedding_dim: (int) number of bond embedding dimension
            ngvocal: (int) number of global types if global attributes are types
            global_embedding_dim: (int) number of global embedding dimension
            npass: (int) number of recurrent steps in Set2Set layer
            ntarget: (int) number of output targets
            act: (object) activation function
            l2_coef: (float or None) l2 regularization parameter
            is_classification: (bool) whether it is a classification task
            loss: (object or str) loss function
            metrics: (list or dict) List or dictionary of Keras metrics to be evaluated by the model during training
                and testing
            dropout: (float) dropout rate
            graph_converter: (object) object that exposes a "convert" method for structure to graph conversion
            target_scaler: (object) object that exposes a "transform" and "inverse_transform" methods for transforming
                the target values
            optimizer_kwargs (dict): extra keywords for optimizer, for example clipnorm and clipvalue
        """

        # Build the MEG Model
        model = make_megnet_model(nfeat_edge=nfeat_edge,
                                  nfeat_global=nfeat_global,
                                  nfeat_node=nfeat_node,
                                  nblocks=nblocks,
                                  n1=n1,
                                  n2=n2,
                                  n3=n3,
                                  nvocal=nvocal,
                                  embedding_dim=embedding_dim,
                                  nbvocal=nbvocal,
                                  bond_embedding_dim=bond_embedding_dim,
                                  ngvocal=ngvocal,
                                  global_embedding_dim=global_embedding_dim,
                                  npass=npass,
                                  ntarget=ntarget,
                                  act=act,
                                  is_classification=is_classification,
                                  l2_coef=l2_coef,
                                  dropout=dropout,
                                  dropout_on_predict=dropout_on_predict)

        # Compile the model with the optimizer
        loss = 'binary_crossentropy' if is_classification else loss

        opt_params = {'lr': lr}
        if optimizer_kwargs is not None:
            opt_params.update(optimizer_kwargs)
        model.compile(Adam(**opt_params), loss, metrics=metrics)

        if graph_converter is None:
            graph_converter = CrystalGraph(cutoff=4,
                                           bond_converter=GaussianDistance(
                                               np.linspace(0, 5, 100), 0.5))

        super().__init__(model=model,
                         target_scaler=target_scaler,
                         graph_converter=graph_converter)
Exemple #18
0
    def __init__(self,
                 nfeat_edge,
                 nfeat_global,
                 nfeat_node=None,
                 nblocks=3,
                 lr=1e-3,
                 n1=64,
                 n2=32,
                 n3=16,
                 nvocal=95,
                 embedding_dim=16,
                 npass=3,
                 ntarget=1,
                 act=softplus2,
                 is_classification=False,
                 loss="mse",
                 l2_coef=None,
                 dropout=None,
                 graph_convertor=None,
                 optimizer_kwargs=None
                 ):
        int32 = 'int32'
        if nfeat_node is None:
            x1 = Input(shape=(None,), dtype=int32)  # only z as feature
            x1_ = Embedding(nvocal, embedding_dim)(x1)
        else:
            x1 = Input(shape=(None, nfeat_node))
            x1_ = x1
        x2 = Input(shape=(None, nfeat_edge))
        x3 = Input(shape=(None, nfeat_global))
        x4 = Input(shape=(None,), dtype=int32)
        x5 = Input(shape=(None,), dtype=int32)
        x6 = Input(shape=(None,), dtype=int32)
        x7 = Input(shape=(None,), dtype=int32)

        if l2_coef is not None:
            reg = l2(l2_coef)
        else:
            reg = None

        # two feedforward layers
        def ff(x, n_hiddens=[n1, n2]):
            out = x
            for i in n_hiddens:
                out = Dense(i, activation=act, kernel_regularizer=reg)(out)
            return out

        # a block corresponds to two feedforward layers + one MEGNetLayer layer
        # Note the first block does not contain the feedforward layer since
        # it will be explicitly added before the block
        def one_block(a, b, c, has_ff=True):
            if has_ff:
                x1_ = ff(a)
                x2_ = ff(b)
                x3_ = ff(c)
            else:
                x1_ = a
                x2_ = b
                x3_ = c
            out = MEGNetLayer(
                [n1, n1, n2], [n1, n1, n2], [n1, n1, n2],
                pool_method='mean', activation=act, kernel_regularizer=reg)(
                [x1_, x2_, x3_, x4, x5, x6, x7])

            x1_temp = out[0]
            x2_temp = out[1]
            x3_temp = out[2]
            if dropout:
                x1_temp = Dropout(dropout)(x1_temp)
                x2_temp = Dropout(dropout)(x2_temp)
                x3_temp = Dropout(dropout)(x3_temp)
            return x1_temp, x2_temp, x3_temp

        x1_ = ff(x1_)
        x2_ = ff(x2)
        x3_ = ff(x3)
        for i in range(nblocks):
            if i == 0:
                has_ff = False
            else:
                has_ff = True
            x1_1 = x1_
            x2_1 = x2_
            x3_1 = x3_
            x1_1, x2_1, x3_1 = one_block(x1_1, x2_1, x3_1, has_ff)
            # skip connection
            x1_ = Add()([x1_, x1_1])
            x2_ = Add()([x2_, x2_1])
            x3_ = Add()([x3_, x3_1])

        # set2set for both the atom and bond
        node_vec = Set2Set(T=npass, n_hidden=n3, kernel_regularizer=reg)([x1_, x6])
        edge_vec = Set2Set(T=npass, n_hidden=n3, kernel_regularizer=reg)([x2_, x7])
        # concatenate atom, bond, and global
        final_vec = Concatenate(axis=-1)([node_vec, edge_vec, x3_])
        if dropout:
            final_vec = Dropout(dropout)(final_vec)
        # final dense layers
        final_vec = Dense(n2, activation=act, kernel_regularizer=reg)(final_vec)
        final_vec = Dense(n3, activation=act, kernel_regularizer=reg)(final_vec)

        if is_classification:
            final_act = 'sigmoid'
            loss = 'binary_crossentropy'
        else:
            final_act = None
            loss = loss

        out = Dense(ntarget, activation=final_act)(final_vec)
        model = Model(inputs=[x1, x2, x3, x4, x5, x6, x7], outputs=out)

        opt_params = {'lr': lr}
        if optimizer_kwargs is not None:
            opt_params.update(optimizer_kwargs)
        model.compile(Adam(**opt_params), loss)

        if graph_convertor is None:
            graph_convertor = CrystalGraph(cutoff=4, bond_convertor=GaussianDistance(np.linspace(0, 5, 100), 0.5))

        super().__init__(model=model, graph_convertor=graph_convertor)
Exemple #19
0
def train():
    # Parse args
    args = parse_args()
    radius = args.radius
    n_works = args.n_works
    warm_start = args.warm_start
    output_path = args.output_path
    graph_file = args.graph_file
    prop_col = args.property
    learning_rate = args.learning_rate
    embedding_file = args.embedding_file
    k_folds = list(map(int, args.k_folds.split(",")))
    print("args is : {}".format(args))

    print("Local devices are : {}, \n\n Available gpus are : {}".format(
        device_lib.list_local_devices(),
        K.tensorflow_backend._get_available_gpus()))

    # prepare output path
    if not os.path.exists(output_path):
        os.makedirs(output_path, exist_ok=True)

    # Get a crystal graph with cutoff radius A
    cg = CrystalGraph(
        bond_convertor=GaussianDistance(np.linspace(0, radius + 1, 100), 0.5),
        cutoff=radius,
    )

    if graph_file is not None:
        # load graph data
        with gzip.open(graph_file, "rb") as f:
            valid_graph_dict = pickle.load(f)
        idx_list = list(range(len(valid_graph_dict)))
        valid_idx_list = [
            idx for idx, graph in valid_graph_dict.items() if graph is not None
        ]
    else:
        # load structure data
        with gzip.open(args.input_file, "rb") as f:
            df = pd.DataFrame(pickle.load(f))[["structure", prop_col]]
        idx_list = list(range(len(df)))

        # load embedding data for transfer learning
        if embedding_file is not None:
            with open(embedding_file) as json_file:
                embedding_data = json.load(json_file)

        # Calculate and save valid graphs
        valid_idx_list = list()
        valid_graph_dict = dict()
        for idx in idx_list:
            try:
                graph = cg.convert(df["structure"].iloc[idx])
                if embedding_file is not None:
                    graph["atom"] = [embedding_data[i] for i in graph["atom"]]
                valid_graph_dict[idx] = {
                    "graph": graph,
                    "target": df[prop_col].iloc[idx],
                }
                valid_idx_list.append(idx)
            except RuntimeError:
                valid_graph_dict[idx] = None

        # Save graphs
        with gzip.open(os.path.join(output_path, "graphs.pkl.gzip"),
                       "wb") as f:
            pickle.dump(valid_graph_dict, f)

    # Split data
    kf = KFold(n_splits=args.cv, random_state=18012019, shuffle=True)
    for fold, (train_val_idx, test_idx) in enumerate(kf.split(idx_list)):
        print(fold)
        if fold not in k_folds:
            continue
        fold_output_path = os.path.join(output_path, "kfold_{}".format(fold))
        fold_model_path = os.path.join(fold_output_path, "model")
        if not os.path.exists(fold_model_path):
            os.makedirs(fold_model_path, exist_ok=True)

        train_idx, val_idx = train_test_split(train_val_idx,
                                              test_size=0.25,
                                              random_state=18012019,
                                              shuffle=True)

        # Calculate valid train validation test ids and save it
        valid_train_idx = sorted(list(set(train_idx) & (set(valid_idx_list))))
        valid_val_idx = sorted(list(set(val_idx) & (set(valid_idx_list))))
        valid_test_idx = sorted(list(set(test_idx) & (set(valid_idx_list))))
        np.save(os.path.join(fold_output_path, "train_idx.npy"),
                valid_train_idx)
        np.save(os.path.join(fold_output_path, "val_idx.npy"), valid_val_idx)
        np.save(os.path.join(fold_output_path, "test_idx.npy"), valid_test_idx)

        # Prepare training graphs
        train_graphs = [valid_graph_dict[i]["graph"] for i in valid_train_idx]
        train_targets = [
            valid_graph_dict[i]["target"] for i in valid_train_idx
        ]

        # Prepare validation graphs
        val_graphs = [valid_graph_dict[i]["graph"] for i in valid_val_idx]
        val_targets = [valid_graph_dict[i]["target"] for i in valid_val_idx]

        # Normalize targets or not
        if args.normalize:
            y_scaler = StandardScaler()
            train_targets = y_scaler.fit_transform(
                np.array(train_targets).reshape(-1, 1)).ravel()
            val_targets = y_scaler.transform(
                np.array(val_targets).reshape((-1, 1))).ravel()
        else:
            y_scaler = None

        # Initialize model
        if warm_start is None:
            #  Set up model
            if learning_rate is None:
                learning_rate = 1e-3
            model = MEGNetModel(
                100,
                2,
                nblocks=args.n_blocks,
                nvocal=95,
                npass=args.n_pass,
                lr=learning_rate,
                loss=args.loss,
                graph_convertor=cg,
                is_classification=True
                if args.type == "classification" else False,
                nfeat_node=None if embedding_file is None else 16,
            )

            initial_epoch = 0
        else:
            # Model file
            model_list = [
                m_file for m_file in os.listdir(
                    os.path.join(warm_start, "kfold_{}".format(fold), "model"))
                if m_file.endswith(".hdf5")
            ]
            if args.type == "classification":
                model_list.sort(
                    key=lambda m_file: float(
                        m_file.split("_")[3].replace(".hdf5", "")),
                    reverse=False,
                )
            else:
                model_list.sort(
                    key=lambda m_file: float(
                        m_file.split("_")[3].replace(".hdf5", "")),
                    reverse=True,
                )

            model_file = os.path.join(warm_start, "kfold_{}".format(fold),
                                      "model", model_list[-1])

            #  Load model from file
            if learning_rate is None:
                full_model = load_model(
                    model_file,
                    custom_objects={
                        "softplus2": softplus2,
                        "Set2Set": Set2Set,
                        "mean_squared_error_with_scale":
                        mean_squared_error_with_scale,
                        "MEGNetLayer": MEGNetLayer,
                    },
                )

                learning_rate = K.get_value(full_model.optimizer.lr)
            # Set up model
            model = MEGNetModel(
                100,
                2,
                nblocks=args.n_blocks,
                nvocal=95,
                npass=args.n_pass,
                lr=learning_rate,
                loss=args.loss,
                graph_convertor=cg,
                is_classification=True
                if args.type == "classification" else False,
                nfeat_node=None if embedding_file is None else 16,
            )
            model.load_weights(model_file)
            initial_epoch = int(model_list[-1].split("_")[2])
            print("warm start from : {}, \nlearning_rate is {}.".format(
                model_file, learning_rate))

        # Train
        model.train_from_graphs(
            train_graphs,
            train_targets,
            val_graphs,
            val_targets,
            batch_size=args.batch_size,
            epochs=args.max_epochs,
            verbose=2,
            initial_epoch=initial_epoch,
            use_multiprocessing=False if n_works <= 1 else True,
            workers=n_works,
            dirname=fold_model_path,
            y_scaler=y_scaler,
            save_best_only=args.save_best_only,
        )
db = connect(db_name)

###### megnet example hyper-parameters
from megnet.models import MEGNetModel
from megnet.data.graph import GaussianDistance
from megnet.data.crystal import CrystalGraph
import numpy as np

nfeat_bond = 100
nfeat_global = 2
r_cutoff = 5
gaussian_centers = np.linspace(0, r_cutoff + 1, nfeat_bond)
gaussian_width = 0.5
distance_converter = GaussianDistance(gaussian_centers, gaussian_width)
graph_converter = CrystalGraph(bond_converter=distance_converter,
                               cutoff=r_cutoff)
model = MEGNetModel(nfeat_bond, nfeat_global, graph_converter=graph_converter)

#########################################


def cvt_fmt_graph(rows):
    structures = []
    props = []
    for row in rows:
        structures.append(
            pymatgen_io_ase.AseAtomsAdaptor.get_structure(row.toatoms()))
        props.append(row.data[predict_item] / 100)
        # props.append(abs(row.data[predict_item]/10))
    graphs_valid = []
    targets_valid = []
     result = keras.losses.mean_squared_error(y_true, y_pred)
     # result = K.print_tensor(result, message='losses')
     return result

# === megnet start === #

from megnet.models import MEGNetModel
from megnet.data.graph import GaussianDistance
from megnet.data.crystal import CrystalGraph
from megnet.utils.preprocessing import StandardScaler

from megnet.callbacks import ReduceLRUponNan, ManualStop, XiaotongCB

import numpy as np

gc = CrystalGraph(bond_converter=GaussianDistance(
        np.linspace(0, 5, 100), 0.5), cutoff=4)
model = MEGNetModel(100, 2, graph_converter=gc, lr=1e-4, loss=examine_loss) # , metrics=[examine_loss])
INTENSIVE = False # U0 is an extensive quantity
scaler = StandardScaler.from_training_data(structures, targets, is_intensive=INTENSIVE)
model.target_scaler = scaler

# callbacks = [ReduceLRUponNan(patience=500), ManualStop(), XiaotongCB()]

# change structures to megnet predictable structures
mp_strs = []

train_graphs, train_targets = model.get_all_graphs_targets(structures, targets)
train_nb_atoms = [len(i['atom']) for i in train_graphs]
train_targets = [model.target_scaler.transform(i, j) for i, j in zip(train_targets, train_nb_atoms)]

Exemple #22
0
            ME += e
            error_lst.append(e)
            if abs(e) > 0.5:
                targets[i] = model.predict_structure(structures[i]).ravel()
            # targets[i] = (model.predict_structure(structures[i]).ravel() + targets[i])/2
        ME /= sz
        f = open(str(sz) + 'txt', 'wb')
        pickle.dump(error_lst, f)
        f.close()
        # for i in range(idx, idx + sz):
        #     targets[i] += ME
        idx += sz

model = MEGNetModel(10, 2, nblocks=1, lr=1e-4,
        n1=4, n2=4, n3=4, npass=1, ntarget=1,
        graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5)))


ep = 5000
callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, restore_best_weights=True)

for s in test_structures:
    test_input.append(model.graph_converter.graph_to_input(model.graph_converter.convert(s)))

if training_mode == 0: # PBE -> HSE ... -> part EXP, one by one
    idx = 0
    for i in range(len(data_size)):
        model.train(structures[idx:idx+data_size[i]], targets[idx:idx+data_size[i]], epochs=ep)
        idx += data_size[i]
        prediction(model)
elif training_mode == 1: # all training set together
Exemple #23
0
epochs = 5
batch_size = 56

Xtrain = inputs.iloc[0:boundary]['structure']
ytrain = inputs.iloc[0:boundary]['band_gap']

Xtest = inputs.iloc[boundary:]['structure']
ytest = inputs.iloc[boundary:]['band_gap']

nfeat_bond = 10
nfeat_global = 2
r_cutoff = 5
gaussian_centers = np.linspace(0, 5, 10)
gaussian_width = 0.5
distance_convertor = GaussianDistance(gaussian_centers, gaussian_width)
bond_convertor = CrystalGraph(bond_convertor=distance_convertor,
                              cutoff=r_cutoff)
graph_convertor = CrystalGraph(
    bond_convertor=GaussianDistance(np.linspace(0, 5, 10), 0.5))
model = MEGNetModel(nfeat_bond, nfeat_global, graph_convertor=graph_convertor)

model.from_file('fitted_gap_model.hdf5')

model.train(Xtrain,
            ytrain,
            epochs=epochs,
            batch_size=batch_size,
            validation_structures=Xtest,
            validation_targets=ytest,
            scrub_failed_structures=True)

model.save_model('fitted_gap_model.hdf5')
Exemple #24
0
for i in range(queries):
# Train MegNet
    print('Query ', i)
    print('Sample from y test for consistency', len(ytest), ytest[0])
    Xactive = np.concatenate((Xpool, Xunlab))
    yactive = np.concatenate((ypool, yunlab))
    training.active(i, prop, model, 'entropy', 
                    batch, epochs, Xpool, ypool, 
                    Xtest, ytest)

# Get the activations for the active set
    activations = []
    gaussian_centers = np.linspace(0, cutoff, bond)
    distance_converter = GaussianDistance(gaussian_centers, width)
    graph_converter = CrystalGraph(bond_converter=distance_converter)
    for s in Xactive:
        activations.append(StructureGraph.get_input(graph_converter, s))
# Obtain latent points
    tsne_active = latent.active(
       i, prop, perp, layer, 'entropy', activations, Xactive, Xpool, ypool,
       Xtest, val_frac, ndims, niters)
# Split the data
    tsne_pool = tsne_active[:len(ypool)]
    tsne_unlab = tsne_active[len(ypool):]
    cut = int(len(tsne_pool)*split_pool)
    tsne_train = tsne_pool[:cut]
    ytrain = ypool[:cut]
    tsne_val = tsne_pool[cut:]
    yval = ypool[cut:]
            if abs(e) > cut_value:
                targets[it][i] = prdc
            # targets[i] = (model.predict_structure(structures[i]).ravel() + targets[i])/2
        logging.info('Data count: {dc}, std orig dft value: {std_orig}, std of model output: {std_model}'.format(
            dc=len(targets_lst), std_orig=np.std(targets_lst), std_model=np.std(prediction_lst)))
        logging.info('Data count: {dc}, Mean orig: {mean_orig}, Mean_model: {mean_model}'.format(
            dc=len(targets_lst), mean_orig=np.mean(targets_lst), mean_model=np.mean(prediction_lst)))
        f = open(dump_model_name + '_'+ it + '.txt', 'wb') # to store and analyze the error
        pickle.dump(error_lst, f)
        f.close()

# model = MEGNetModel(10, 2, nblocks=3, lr=1e-3,
#         n1=4, n2=4, n3=4, npass=1, ntarget=1,
#         graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5)))

model = MEGNetModel(nfeat_edge=10, nfeat_global=2, graph_converter=CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 10), 0.5)))
model.save_model(dump_model_name+'_1by1_init_randomly' + '.hdf5')
init_model_tag = 'EGPHS'

ep = 5000
callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)

for s in test_structures:
    test_input.append(model.graph_converter.graph_to_input(model.graph_converter.convert(s)))

db_short_full_dict = {'G': 'gllb-sc', 'H': 'hse', 'S': 'scan', 'P': 'pbe', 'E': 'E1'}

def construct_dataset_from_str(db_short_str):
    s = []
    t = []
    for i in range(len(db_short_str)):