def test_train_small_bootstrap_presets(small_moddata, tf_session): """Tests the `fit_preset()` method.""" from modnet.model_presets import gen_presets from modnet.models import EnsembleMODNetModel modified_presets = gen_presets(100, 100)[:2] for ind, preset in enumerate(modified_presets): modified_presets[ind]["epochs"] = 2 data = small_moddata # set 'optimal' features manually data.optimal_features = [ col for col in data.df_featurized.columns if col.startswith("ElementProperty") ] model = EnsembleMODNetModel( [[["eform", "egap"]]], weights={ "eform": 1, "egap": 1 }, num_neurons=[[4], [2], [2], [2]], n_feat=3, n_models=2, bootstrap=True, ) # nested=0/False -> no inner loop, so only 1 model # nested=1/True -> inner loop, but default n_folds so 5 for num_nested, nested_option in zip([2, 1], [2, 0]): results = model.fit_preset( data, presets=modified_presets, nested=nested_option, val_fraction=0.2, n_jobs=2, ) models = results[0] assert len(models) == len(modified_presets) assert len(models[0]) == num_nested
num_generations=10, n_jobs=16, early_stopping=True, refit=True, ) else: # ... a list of presets (kind of dynamic grid search) ( models, val_losses, best_learning_curve, learning_curves, best_presets, ) = model.fit_preset( train_data, classification=classification, nested=5, n_jobs=16, ) # Load and featurize test dataset test_df = task.get_test_data(fold, include_target=False, as_type="df") try: materials = test_df[ "structure"] if "structure" in test_df.columns else train_df[ "composition"].map(Composition) except KeyError: raise RuntimeError( f"Could not find any materials data dataset for task {task!r}!" )