コード例 #1
0
ファイル: test_torch.py プロジェクト: jambit/sensAI
def test_MLPClassifier(irisDataSet, irisClassificationTestCase, testResources):
    featureNames = irisDataSet.getInputOutputData().inputs.columns
    dftNorm = DFTNormalisation([DFTNormalisation.Rule(re.escape(f)) for f in featureNames], defaultTransformerFactory=sklearn.preprocessing.StandardScaler)
    model = sensai.torch.models.MultiLayerPerceptronVectorClassificationModel(hiddenDims=(50,25,8), cuda=False, epochs=100, optimiser="adam",
            batchSize=200, normalisationMode=NormalisationMode.NONE, hidActivationFunction=torch.tanh) \
        .withName("torchMLPClassifier") \
        .withInputTransformers([dftNorm]) \
        .withFeatureGenerator(FeatureGeneratorTakeColumns())
    irisClassificationTestCase.testMinAccuracy(model, 0.8)
コード例 #2
0
 def test_multiColumnSingleRule(self):
     arr = np.array([1, 5, 10])
     df = pd.DataFrame({"foo": arr, "bar": arr * 100})
     dft = DFTNormalisation([
         DFTNormalisation.Rule(
             r"foo|bar",
             transformer=sklearn.preprocessing.MaxAbsScaler(),
             independentColumns=False)
     ])
     df2 = dft.fitApply(df)
     assert np.all(df2.foo == arr / 1000) and np.all(df2.bar == arr / 10)
コード例 #3
0
 def test_arrayValued(self):
     arr = np.array([1, 5, 10])
     df = pd.DataFrame({"foo": [arr, 2 * arr, 10 * arr]})
     dft = DFTNormalisation([
         DFTNormalisation.Rule(
             r"foo|bar",
             transformer=sklearn.preprocessing.MaxAbsScaler(),
             arrayValued=True)
     ])
     df2 = dft.fitApply(df)
     assert np.all(df2.foo.iloc[0] == arr /
                   100) and np.all(df2.foo.iloc[-1] == arr / 10)
コード例 #4
0
ファイル: test_featuregen.py プロジェクト: jambit/sensAI
def test_FeatureGeneratorNAMarker(irisClassificationTestCase):
    """
    Integration test for handling of N/A values via marker features (using FeatureGeneratorNAMarker) in the context of models
    that do not support N/A values, replacing them with a different value (using FillNA)
    """
    iodata = irisClassificationTestCase.data

    # create some random N/A values in the data set
    inputs = iodata.inputs.copy()
    rand = random.Random(42)
    fullIndices = list(range(len(inputs)))
    for col in inputs.columns:
        indices = rand.sample(fullIndices, 20)
        inputs[col].iloc[indices] = np.nan
    iodata = InputOutputData(inputs, iodata.outputs)

    for useFGNA in (True, False):
        fgs = [
            FeatureGeneratorTakeColumns(
                normalisationRuleTemplate=DFTNormalisation.RuleTemplate(
                    independentColumns=True))
        ]
        if useFGNA:
            fgs.append(FeatureGeneratorNAMarker(inputs.columns))
        fCollector = FeatureCollector(*fgs)
        model = SkLearnMLPVectorClassificationModel() \
            .withFeatureCollector(fCollector) \
            .withInputTransformers(
                DFTNormalisation(fCollector.getNormalisationRules(), defaultTransformerFactory=SkLearnTransformerFactoryFactory.StandardScaler()),
                DFTFillNA(-3))
        # NOTE: using -3 instead of 0 to fill N/A values in order to force the model to learn the purpose of the N/A markers,
        # because 0 values are actually a reasonable fallback (which happens to work) when using StandardScaler
        # NOTE: it is important to apply DFTNormalisation before DFTFillNA, because DFTNormalisation would learn using the filled values otherwise

        ev = VectorClassificationModelEvaluator(iodata, testFraction=0.2)
        ev.fitModel(model)
        result = ev.evalModel(model)
        accuracy = result.getEvalStats().getAccuracy()
        log.info(f"Accuracy (for useFGNA={useFGNA}) = {accuracy}")
        if useFGNA:
            assert accuracy > 0.85
        else:
            assert accuracy < 0.85