Esempio n. 1
0
def test_small_moddata_feature_selection_classif(small_moddata):
    """ This test creates classifier MODData and test the feature selection method """

    x1 = np.array([0] * 500 + [1] * 500 + [2] * 500, dtype='float')
    x2 = np.random.choice(2, 1500)
    x3 = x1 * x2
    x4 = x1 + (x2 * 0.5)
    targets = np.array(x1, dtype='int').reshape(-1, 1)
    features = np.array([x1, x2, x3, x4]).T
    names = ['my_classes']

    c_nmi = pd.DataFrame([[1, 0, 0.5, 0.5], [0, 1, 0.5, 0.5],
                          [0.5, 0.5, 1, 0.5], [0.5, 0.5, 0.5, 1]],
                         columns=['f1', 'f2', 'f3', 'f4'],
                         index=['f1', 'f2', 'f3', 'f4'])

    classif_md = MODData(['dummy'] * 1500,
                         targets,
                         target_names=names,
                         num_classes={"my_classes": 3})
    classif_md.df_featurized = pd.DataFrame(features,
                                            columns=['f1', 'f2', 'f3', 'f4'])
    classif_md.feature_selection(n=3, cross_nmi=c_nmi)
    assert len(classif_md.get_optimal_descriptors()) == 3
    assert classif_md.get_optimal_descriptors() == ['f1', 'f4', 'f3']
Esempio n. 2
0
def test_precomputed_cross_nmi(small_moddata):

    new = MODData(
        materials=small_moddata.structures,
        targets=small_moddata.targets,
        target_names=small_moddata.names,
        df_featurized=small_moddata.df_featurized,
    )
    new.feature_selection(5, use_precomputed_cross_nmi=True)
Esempio n. 3
0
                "structure"] if "structure" in train_df.columns else train_df[
                    "composition"].map(Composition)
        except KeyError:
            raise RuntimeError(
                f"Could not find any materials data dataset for task {task!r}!"
            )

        fast_oxid_featurizer = DeBreuck2020Featurizer(fast_oxid=True)
        train_data = MODData(
            materials=materials.tolist(),
            targets=train_df[targets].values,
            target_names=targets,
            featurizer=fast_oxid_featurizer,
        )
        train_data.featurize(n_jobs=32)
        train_data.feature_selection(n=-1, use_precomputed_cross_nmi=True)

        # create model
        targets_hierarchy = [[[field for field in targets]]]
        weights = {field: 1 for field in targets}
        model = EnsembleMODNetModel(targets_hierarchy, weights)

        # fit model

        if USE_GA:
            # you can either use a GA for hyper-parameter optimization or...
            from modnet.hyper_opt import FitGenetic
            ga = FitGenetic(train_data)
            model = ga.run(
                size_pop=20,
                num_generations=10,