コード例 #1
0
 def test_crystalgraph(self):
     cg = CrystalGraph(cutoff=4)
     graph = cg.convert(self.structures[0])
     self.assertEqual(cg.cutoff, 4)
     keys = set(graph.keys())
     self.assertSetEqual({"bond", "atom", "index1", "index2", "state"}, keys)
     cg2 = CrystalGraph(cutoff=6)
     self.assertEqual(cg2.cutoff, 6)
     graph2 = cg2.convert(self.structures[0])
     self.assertListEqual(to_list(graph2["state"][0]), [0, 0])
     graph3 = cg(self.structures[0])
     np.testing.assert_almost_equal(graph["atom"], graph3["atom"])
コード例 #2
0
 def test_crystalgraph(self):
     cg = CrystalGraph(cutoff=4)
     graph = cg.convert(self.structures[0])
     self.assertEqual(cg.cutoff, 4)
     keys = set(graph.keys())
     self.assertSetEqual({"bond", "atom", "index1", "index2", "state"},
                         keys)
     cg2 = CrystalGraph(cutoff=6)
     self.assertEqual(cg2.cutoff, 6)
     graph2 = cg2.convert(self.structures[0])
     self.assertListEqual(graph2['state'][0], [0, 0])
     graph3 = cg(self.structures[0])
     self.assertListEqual(graph['atom'], graph3['atom'])
コード例 #3
0
 def test_crystalgraph(self):
     cg = CrystalGraph()
     graph = cg.convert(self.structures[0])
     self.assertEqual(cg.r, 4)
     keys = set(graph.keys())
     self.assertSetEqual({"distance", "node", "index1", "index2", "state"},
                         keys)
     cg2 = CrystalGraph(r=6)
     self.assertEqual(cg2.r, 6)
     graph2 = cg2.convert(self.structures[0])
     self.assertListEqual(graph2['state'][0], [0, 0])
     graph3 = cg(self.structures[0])
     self.assertListEqual(graph['node'], graph3['node'])
コード例 #4
0
ファイル: test_models.py プロジェクト: idocx/megnet
 def test_check_dimension(self):
     gc = CrystalGraph(bond_converter=GaussianDistance(np.linspace(0, 5, 20), 0.5))
     s = Structure(Lattice.cubic(3), ['Si'], [[0, 0, 0]])
     graph = gc.convert(s)
     model = MEGNetModel(10, 2, nblocks=1, lr=1e-2,
                         n1=4, n2=4, n3=4, npass=1, ntarget=1,
                         graph_converter=CrystalGraph(bond_converter=gc),
                         )
     with self.assertRaises(Exception) as context:
         model.check_dimension(graph)
         self.assertTrue('The data dimension for bond' in str(context.exception))
コード例 #5
0
with open("mp.2019.04.01.json") as f:
    structure_data = {i["material_id"]: i["structure"] for i in json.load(f)}
print("All structures in mp.2019.04.01.json contain %d structures" %
      len(structure_data))

##  Band gap data
with gzip.open("data_no_structs.json.gz", "rb") as f:
    bandgap_data = json.loads(f.read())

useful_ids = set.union(*[set(bandgap_data[i].keys()) for i in ALL_FIDELITIES
                         ])  # mp ids that are used in training
print("Only %d structures are used" % len(useful_ids))
print("Calculating the graphs for all structures... this may take minutes.")
structure_data = {i: structure_data[i] for i in useful_ids}
structure_data = {
    i: crystal_graph.convert(Structure.from_str(j, fmt="cif"))
    for i, j in structure_data.items()
}

##  Generate graphs with fidelity information
graphs = []
targets = []
material_ids = []

for fidelity_id, fidelity in enumerate(ALL_FIDELITIES):
    for mp_id in bandgap_data[fidelity]:
        graph = deepcopy(structure_data[mp_id])

        # The fidelity information is included here by changing the state attributes
        # PBE: 0, GLLB-SC: 1, HSE: 2, SCAN: 3
        graph["state"] = [fidelity_id]
コード例 #6
0
 def test_get_flat_data(self):
     cg = CrystalGraph(cutoff=4)
     graphs = [cg.convert(i) for i in self.structures]
     targets = [0.1, 0.2]
     inp = cg.get_flat_data(graphs, targets)
     self.assertListEqual([len(i) for i in inp], [2] * 6)
コード例 #7
0
 def test_convert(self):
     cg = CrystalGraph(cutoff=4)
     graph = cg.convert(self.structures[0])
     self.assertListEqual(graph['atom'],
                          [i.specie.Z for i in self.structures[0]])
コード例 #8
0
ファイル: megnet.py プロジェクト: zhigangmei/automatminer
def train():
    # Parse args
    args = parse_args()
    radius = args.radius
    n_works = args.n_works
    warm_start = args.warm_start
    output_path = args.output_path
    graph_file = args.graph_file
    prop_col = args.property
    learning_rate = args.learning_rate
    embedding_file = args.embedding_file
    k_folds = list(map(int, args.k_folds.split(",")))
    print("args is : {}".format(args))

    print("Local devices are : {}, \n\n Available gpus are : {}".format(
        device_lib.list_local_devices(),
        K.tensorflow_backend._get_available_gpus()))

    # prepare output path
    if not os.path.exists(output_path):
        os.makedirs(output_path, exist_ok=True)

    # Get a crystal graph with cutoff radius A
    cg = CrystalGraph(
        bond_convertor=GaussianDistance(np.linspace(0, radius + 1, 100), 0.5),
        cutoff=radius,
    )

    if graph_file is not None:
        # load graph data
        with gzip.open(graph_file, "rb") as f:
            valid_graph_dict = pickle.load(f)
        idx_list = list(range(len(valid_graph_dict)))
        valid_idx_list = [
            idx for idx, graph in valid_graph_dict.items() if graph is not None
        ]
    else:
        # load structure data
        with gzip.open(args.input_file, "rb") as f:
            df = pd.DataFrame(pickle.load(f))[["structure", prop_col]]
        idx_list = list(range(len(df)))

        # load embedding data for transfer learning
        if embedding_file is not None:
            with open(embedding_file) as json_file:
                embedding_data = json.load(json_file)

        # Calculate and save valid graphs
        valid_idx_list = list()
        valid_graph_dict = dict()
        for idx in idx_list:
            try:
                graph = cg.convert(df["structure"].iloc[idx])
                if embedding_file is not None:
                    graph["atom"] = [embedding_data[i] for i in graph["atom"]]
                valid_graph_dict[idx] = {
                    "graph": graph,
                    "target": df[prop_col].iloc[idx],
                }
                valid_idx_list.append(idx)
            except RuntimeError:
                valid_graph_dict[idx] = None

        # Save graphs
        with gzip.open(os.path.join(output_path, "graphs.pkl.gzip"),
                       "wb") as f:
            pickle.dump(valid_graph_dict, f)

    # Split data
    kf = KFold(n_splits=args.cv, random_state=18012019, shuffle=True)
    for fold, (train_val_idx, test_idx) in enumerate(kf.split(idx_list)):
        print(fold)
        if fold not in k_folds:
            continue
        fold_output_path = os.path.join(output_path, "kfold_{}".format(fold))
        fold_model_path = os.path.join(fold_output_path, "model")
        if not os.path.exists(fold_model_path):
            os.makedirs(fold_model_path, exist_ok=True)

        train_idx, val_idx = train_test_split(train_val_idx,
                                              test_size=0.25,
                                              random_state=18012019,
                                              shuffle=True)

        # Calculate valid train validation test ids and save it
        valid_train_idx = sorted(list(set(train_idx) & (set(valid_idx_list))))
        valid_val_idx = sorted(list(set(val_idx) & (set(valid_idx_list))))
        valid_test_idx = sorted(list(set(test_idx) & (set(valid_idx_list))))
        np.save(os.path.join(fold_output_path, "train_idx.npy"),
                valid_train_idx)
        np.save(os.path.join(fold_output_path, "val_idx.npy"), valid_val_idx)
        np.save(os.path.join(fold_output_path, "test_idx.npy"), valid_test_idx)

        # Prepare training graphs
        train_graphs = [valid_graph_dict[i]["graph"] for i in valid_train_idx]
        train_targets = [
            valid_graph_dict[i]["target"] for i in valid_train_idx
        ]

        # Prepare validation graphs
        val_graphs = [valid_graph_dict[i]["graph"] for i in valid_val_idx]
        val_targets = [valid_graph_dict[i]["target"] for i in valid_val_idx]

        # Normalize targets or not
        if args.normalize:
            y_scaler = StandardScaler()
            train_targets = y_scaler.fit_transform(
                np.array(train_targets).reshape(-1, 1)).ravel()
            val_targets = y_scaler.transform(
                np.array(val_targets).reshape((-1, 1))).ravel()
        else:
            y_scaler = None

        # Initialize model
        if warm_start is None:
            #  Set up model
            if learning_rate is None:
                learning_rate = 1e-3
            model = MEGNetModel(
                100,
                2,
                nblocks=args.n_blocks,
                nvocal=95,
                npass=args.n_pass,
                lr=learning_rate,
                loss=args.loss,
                graph_convertor=cg,
                is_classification=True
                if args.type == "classification" else False,
                nfeat_node=None if embedding_file is None else 16,
            )

            initial_epoch = 0
        else:
            # Model file
            model_list = [
                m_file for m_file in os.listdir(
                    os.path.join(warm_start, "kfold_{}".format(fold), "model"))
                if m_file.endswith(".hdf5")
            ]
            if args.type == "classification":
                model_list.sort(
                    key=lambda m_file: float(
                        m_file.split("_")[3].replace(".hdf5", "")),
                    reverse=False,
                )
            else:
                model_list.sort(
                    key=lambda m_file: float(
                        m_file.split("_")[3].replace(".hdf5", "")),
                    reverse=True,
                )

            model_file = os.path.join(warm_start, "kfold_{}".format(fold),
                                      "model", model_list[-1])

            #  Load model from file
            if learning_rate is None:
                full_model = load_model(
                    model_file,
                    custom_objects={
                        "softplus2": softplus2,
                        "Set2Set": Set2Set,
                        "mean_squared_error_with_scale":
                        mean_squared_error_with_scale,
                        "MEGNetLayer": MEGNetLayer,
                    },
                )

                learning_rate = K.get_value(full_model.optimizer.lr)
            # Set up model
            model = MEGNetModel(
                100,
                2,
                nblocks=args.n_blocks,
                nvocal=95,
                npass=args.n_pass,
                lr=learning_rate,
                loss=args.loss,
                graph_convertor=cg,
                is_classification=True
                if args.type == "classification" else False,
                nfeat_node=None if embedding_file is None else 16,
            )
            model.load_weights(model_file)
            initial_epoch = int(model_list[-1].split("_")[2])
            print("warm start from : {}, \nlearning_rate is {}.".format(
                model_file, learning_rate))

        # Train
        model.train_from_graphs(
            train_graphs,
            train_targets,
            val_graphs,
            val_targets,
            batch_size=args.batch_size,
            epochs=args.max_epochs,
            verbose=2,
            initial_epoch=initial_epoch,
            use_multiprocessing=False if n_works <= 1 else True,
            workers=n_works,
            dirname=fold_model_path,
            y_scaler=y_scaler,
            save_best_only=args.save_best_only,
        )