def test_cgcnn_clf(df_matbench_phonons): elem_emb = "cgcnn92" targets = ["phdos_clf"] tasks = ["classification"] losses = ["CSE"] robust = True model_name = "cgcnn-clf-test" elem_fea_len = 32 h_fea_len = 128 n_graph = 3 n_hidden = 1 ensemble = 2 run_id = 1 data_seed = 42 epochs = 10 log = False sample = 1 test_size = 0.2 resume = False fine_tune = None transfer = None optim = "AdamW" learning_rate = 3e-4 momentum = 0.9 weight_decay = 1e-6 batch_size = 128 workers = 0 device = "cuda" if torch.cuda.is_available() else "cpu" task_dict = dict(zip(targets, tasks)) loss_dict = dict(zip(targets, losses)) dataset = CrystalGraphData(df=df_matbench_phonons, elem_emb=elem_emb, task_dict=task_dict) n_targets = dataset.n_targets elem_emb_len = dataset.elem_emb_len nbr_fea_len = dataset.nbr_fea_dim train_idx = list(range(len(dataset))) print(f"using {test_size} of training set as test set") train_idx, test_idx = split(train_idx, random_state=data_seed, test_size=test_size) test_set = torch.utils.data.Subset(dataset, test_idx) print("No validation set used, using test set for evaluation purposes") # NOTE that when using this option care must be taken not to # peak at the test-set. The only valid model to use is the one # obtained after the final epoch where the epoch count is # decided in advance of the experiment. val_set = test_set train_set = torch.utils.data.Subset(dataset, train_idx[0::sample]) data_params = { "batch_size": batch_size, "num_workers": workers, "pin_memory": False, "shuffle": True, "collate_fn": collate_batch, } setup_params = { "optim": optim, "learning_rate": learning_rate, "weight_decay": weight_decay, "momentum": momentum, "device": device, } restart_params = { "resume": resume, "fine_tune": fine_tune, "transfer": transfer, } model_params = { "task_dict": task_dict, "robust": robust, "n_targets": n_targets, "elem_emb_len": elem_emb_len, "nbr_fea_len": nbr_fea_len, "elem_fea_len": elem_fea_len, "n_graph": n_graph, "h_fea_len": h_fea_len, "n_hidden": n_hidden, } train_ensemble( model_class=CrystalGraphConvNet, model_name=model_name, run_id=run_id, ensemble_folds=ensemble, epochs=epochs, train_set=train_set, val_set=val_set, log=log, data_params=data_params, setup_params=setup_params, restart_params=restart_params, model_params=model_params, loss_dict=loss_dict, ) data_params["batch_size"] = 64 * batch_size # faster model inference data_params["shuffle"] = False # need fixed data order due to ensembling results_dict = results_multitask( model_class=CrystalGraphConvNet, model_name=model_name, run_id=run_id, ensemble_folds=ensemble, test_set=test_set, data_params=data_params, robust=robust, task_dict=task_dict, device=device, eval_type="checkpoint", save_results=False, ) logits = results_dict["phdos_clf"]["logits"] target = results_dict["phdos_clf"]["target"] # calculate metrics and errors with associated errors for ensembles ens_logits = np.mean(logits, axis=0) target_ohe = np.zeros_like(ens_logits) target_ohe[np.arange(target.size), target] = 1 ens_acc = accuracy_score(target, np.argmax(ens_logits, axis=1)) ens_roc_auc = roc_auc_score(target_ohe, ens_logits) assert ens_acc > 0.85 assert ens_roc_auc > 0.9
def main( # noqa: C901 data_path, targets, tasks, losses, robust, elem_emb="cgcnn92", model_name="cgcnn", n_graph=4, elem_fea_len=64, n_hidden=1, h_fea_len=128, radius=5, max_num_nbr=12, dmin=0, step=0.2, ensemble=1, run_id=1, data_seed=42, epochs=100, patience=None, log=True, sample=1, test_size=0.2, test_path=None, val_size=0.0, val_path=None, resume=None, fine_tune=None, transfer=None, train=True, evaluate=True, optim="AdamW", learning_rate=3e-4, momentum=0.9, weight_decay=1e-6, batch_size=128, workers=0, device="cuda" if torch.cuda.is_available() else "cpu", **kwargs, ): if not len(targets) == len(tasks) == len(losses): raise AssertionError if not (evaluate or train): raise AssertionError( "No action given - At least one of 'train' or 'evaluate' cli flags required" ) if test_path: test_size = 0.0 if not (test_path and val_path): if test_size + val_size >= 1.0: raise AssertionError( f"'test_size'({test_size}) " f"plus 'val_size'({val_size}) must be less than 1") if ensemble > 1 and (fine_tune or transfer): raise NotImplementedError( "If training an ensemble with fine tuning or transferring" " options the models must be trained one by one using the" " run-id flag.") if fine_tune and transfer: raise AssertionError( "Cannot fine-tune and transfer checkpoint(s) at the same time.") task_dict = dict(zip(targets, tasks)) loss_dict = dict(zip(targets, losses)) dist_dict = { "radius": radius, "max_num_nbr": max_num_nbr, "dmin": dmin, "step": step, } if not os.path.exists(data_path): raise AssertionError(f"{data_path} does not exist!") # NOTE make sure to use dense datasets, here do not use the default na # as they can clash with "NaN" which is a valid material df = pd.read_csv(data_path, keep_default_na=False, na_values=[], comment="#") dataset = CrystalGraphData(df=df, elem_emb=elem_emb, task_dict=task_dict, **dist_dict) n_targets = dataset.n_targets elem_emb_len = dataset.elem_emb_len nbr_fea_len = dataset.nbr_fea_dim train_idx = list(range(len(dataset))) if evaluate: if test_path: if not os.path.exists(test_path): raise AssertionError(f"{test_path} does not exist!") # NOTE make sure to use dense datasets, # NOTE do not use default_na as "NaN" is a valid material df = pd.read_csv(test_path, keep_default_na=False, na_values=[]) print(f"using independent test set: {test_path}") test_set = CrystalGraphData(df=df, elem_emb=elem_emb, task_dict=task_dict, **dist_dict) test_set = torch.utils.data.Subset(test_set, range(len(test_set))) elif test_size == 0.0: raise ValueError("test-size must be non-zero to evaluate model") else: print(f"using {test_size} of training set as test set") train_idx, test_idx = split(train_idx, random_state=data_seed, test_size=test_size) test_set = torch.utils.data.Subset(dataset, test_idx) if train: if val_path: if not os.path.exists(val_path): raise AssertionError(f"{val_path} does not exist!") # NOTE make sure to use dense datasets, # NOTE do not use default_na as "NaN" is a valid material df = pd.read_csv(val_path, keep_default_na=False, na_values=[]) print(f"using independent validation set: {val_path}") val_set = CrystalGraphData(df=df, elem_emb=elem_emb, task_dict=task_dict, **dist_dict) val_set = torch.utils.data.Subset(val_set, range(len(val_set))) else: if val_size == 0.0 and evaluate: print( "No validation set used, using test set for evaluation purposes" ) # NOTE that when using this option care must be taken not to # peak at the test-set. The only valid model to use is the one # obtained after the final epoch where the epoch count is # decided in advance of the experiment. val_set = test_set elif val_size == 0.0: val_set = None else: print(f"using {val_size} of training set as validation set") train_idx, val_idx = split( train_idx, random_state=data_seed, test_size=val_size / (1 - test_size), ) val_set = torch.utils.data.Subset(dataset, val_idx) train_set = torch.utils.data.Subset(dataset, train_idx[0::sample]) data_params = { "batch_size": batch_size, "num_workers": workers, "pin_memory": False, "shuffle": True, "collate_fn": collate_batch, } setup_params = { "optim": optim, "learning_rate": learning_rate, "weight_decay": weight_decay, "momentum": momentum, "device": device, } if resume: resume = f"models/{model_name}/checkpoint-r{run_id}.pth.tar" restart_params = { "resume": resume, "fine_tune": fine_tune, "transfer": transfer, } model_params = { "task_dict": task_dict, "robust": robust, "n_targets": n_targets, "elem_emb_len": elem_emb_len, "nbr_fea_len": nbr_fea_len, "elem_fea_len": elem_fea_len, "n_graph": n_graph, "h_fea_len": h_fea_len, "n_hidden": n_hidden, } if train: train_ensemble( model_class=CrystalGraphConvNet, model_name=model_name, run_id=run_id, ensemble_folds=ensemble, epochs=epochs, patience=patience, train_set=train_set, val_set=val_set, log=log, data_params=data_params, setup_params=setup_params, restart_params=restart_params, model_params=model_params, loss_dict=loss_dict, ) if evaluate: data_reset = { "batch_size": 16 * batch_size, # faster model inference "shuffle": False, # need fixed data order due to ensembling } data_params.update(data_reset) return results_multitask( model_class=CrystalGraphConvNet, model_name=model_name, run_id=run_id, ensemble_folds=ensemble, test_set=test_set, data_params=data_params, robust=robust, task_dict=task_dict, device=device, eval_type="checkpoint", )
def test_roost_regression(df_matbench_phonons): elem_emb = "matscholar200" targets = ["last phdos peak"] tasks = ["regression"] losses = ["L1"] robust = True model_name = "roost-reg-test" elem_fea_len = 64 n_graph = 3 ensemble = 2 run_id = 1 data_seed = 42 epochs = 25 log = False sample = 1 test_size = 0.2 resume = False fine_tune = None transfer = None optim = "AdamW" learning_rate = 3e-4 momentum = 0.9 weight_decay = 1e-6 batch_size = 128 workers = 0 device = "cuda" if torch.cuda.is_available() else "cpu" task_dict = dict(zip(targets, tasks)) loss_dict = dict(zip(targets, losses)) dataset = CompositionData( df=df_matbench_phonons, elem_emb=elem_emb, task_dict=task_dict ) n_targets = dataset.n_targets elem_emb_len = dataset.elem_emb_len train_idx = list(range(len(dataset))) print(f"using {test_size} of training set as test set") train_idx, test_idx = split(train_idx, random_state=data_seed, test_size=test_size) test_set = torch.utils.data.Subset(dataset, test_idx) print("No validation set used, using test set for evaluation purposes") # NOTE that when using this option care must be taken not to # peak at the test-set. The only valid model to use is the one # obtained after the final epoch where the epoch count is # decided in advance of the experiment. val_set = test_set train_set = torch.utils.data.Subset(dataset, train_idx[0::sample]) data_params = { "batch_size": batch_size, "num_workers": workers, "pin_memory": False, "shuffle": True, "collate_fn": collate_batch, } setup_params = { "optim": optim, "learning_rate": learning_rate, "weight_decay": weight_decay, "momentum": momentum, "device": device, } restart_params = { "resume": resume, "fine_tune": fine_tune, "transfer": transfer, } model_params = { "task_dict": task_dict, "robust": robust, "n_targets": n_targets, "elem_emb_len": elem_emb_len, "elem_fea_len": elem_fea_len, "n_graph": n_graph, "elem_heads": 2, "elem_gate": [256], "elem_msg": [256], "cry_heads": 2, "cry_gate": [256], "cry_msg": [256], "trunk_hidden": [256, 256], "out_hidden": [128, 64], } train_ensemble( model_class=Roost, model_name=model_name, run_id=run_id, ensemble_folds=ensemble, epochs=epochs, train_set=train_set, val_set=val_set, log=log, data_params=data_params, setup_params=setup_params, restart_params=restart_params, model_params=model_params, loss_dict=loss_dict, ) data_params["batch_size"] = 64 * batch_size # faster model inference data_params["shuffle"] = False # need fixed data order due to ensembling results_dict = results_multitask( model_class=Roost, model_name=model_name, run_id=run_id, ensemble_folds=ensemble, test_set=test_set, data_params=data_params, robust=robust, task_dict=task_dict, device=device, eval_type="checkpoint", save_results=False, ) pred = results_dict["last phdos peak"]["pred"] target = results_dict["last phdos peak"]["target"] y_ens = np.mean(pred, axis=0) mae = np.abs(target - y_ens).mean() mse = np.square(target - y_ens).mean() rmse = np.sqrt(mse) r2 = r2_score(target, y_ens) assert r2 > 0.7 assert mae < 150 assert rmse < 300