def test_small_moddata_feature_selection_classif(small_moddata): """ This test creates classifier MODData and test the feature selection method """ x1 = np.array([0] * 500 + [1] * 500 + [2] * 500, dtype='float') x2 = np.random.choice(2, 1500) x3 = x1 * x2 x4 = x1 + (x2 * 0.5) targets = np.array(x1, dtype='int').reshape(-1, 1) features = np.array([x1, x2, x3, x4]).T names = ['my_classes'] c_nmi = pd.DataFrame([[1, 0, 0.5, 0.5], [0, 1, 0.5, 0.5], [0.5, 0.5, 1, 0.5], [0.5, 0.5, 0.5, 1]], columns=['f1', 'f2', 'f3', 'f4'], index=['f1', 'f2', 'f3', 'f4']) classif_md = MODData(['dummy'] * 1500, targets, target_names=names, num_classes={"my_classes": 3}) classif_md.df_featurized = pd.DataFrame(features, columns=['f1', 'f2', 'f3', 'f4']) classif_md.feature_selection(n=3, cross_nmi=c_nmi) assert len(classif_md.get_optimal_descriptors()) == 3 assert classif_md.get_optimal_descriptors() == ['f1', 'f4', 'f3']
def test_precomputed_cross_nmi(small_moddata): new = MODData( materials=small_moddata.structures, targets=small_moddata.targets, target_names=small_moddata.names, df_featurized=small_moddata.df_featurized, ) new.feature_selection(5, use_precomputed_cross_nmi=True)
"structure"] if "structure" in train_df.columns else train_df[ "composition"].map(Composition) except KeyError: raise RuntimeError( f"Could not find any materials data dataset for task {task!r}!" ) fast_oxid_featurizer = DeBreuck2020Featurizer(fast_oxid=True) train_data = MODData( materials=materials.tolist(), targets=train_df[targets].values, target_names=targets, featurizer=fast_oxid_featurizer, ) train_data.featurize(n_jobs=32) train_data.feature_selection(n=-1, use_precomputed_cross_nmi=True) # create model targets_hierarchy = [[[field for field in targets]]] weights = {field: 1 for field in targets} model = EnsembleMODNetModel(targets_hierarchy, weights) # fit model if USE_GA: # you can either use a GA for hyper-parameter optimization or... from modnet.hyper_opt import FitGenetic ga = FitGenetic(train_data) model = ga.run( size_pop=20, num_generations=10,