def test_prepare_data_exception_mismatch_columns_numpy(self):
    clf = PMMLBaseEstimator(pmml=StringIO("""
    <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3">
      <DataDictionary>
        <DataField name="Class" optype="categorical" dataType="string">
          <Value value="setosa"/>
          <Value value="versicolor"/>
          <Value value="virginica"/>
        </DataField>
        <DataField name="test1" optype="continuous" dataType="double"/>
      </DataDictionary>
      <MiningSchema>
        <MiningField name="Class" usageType="target"/>
      </MiningSchema>
    </PMML>
    """))

    X = pd.DataFrame(data=[[1, 2], [3, 4], [5, 6]], columns=["test1", "test2"])

    with self.assertRaises(Exception) as cm:
      clf._prepare_data(np.asanyarray(X))

    assert str(cm.exception) == "The number of features in provided data does not match expected number of features " \
                                "in the PMML. Provide pandas.Dataframe, or provide data matching the DataFields in " \
                                "the PMML document."
  def test_prepare_data_exception_mismatch_columns_pandas(self):
    clf = PMMLBaseEstimator(pmml=StringIO("""
    <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3">
      <DataDictionary>
        <DataField name="Class" optype="categorical" dataType="string">
          <Value value="setosa"/>
          <Value value="versicolor"/>
          <Value value="virginica"/>
        </DataField>
        <DataField name="test1" optype="continuous" dataType="double"/>
        <DataField name="test2" optype="continuous" dataType="double"/>
      </DataDictionary>
      <MiningSchema>
        <MiningField name="Class" usageType="target"/>
      </MiningSchema>
    </PMML>
    """))

    X = pd.DataFrame(data=[[1, 2], [3, 4], [5, 6]], columns=["Test_1", "Test_2"])

    with self.assertRaises(Exception) as cm:
      clf._prepare_data(X)

    assert str(cm.exception) == "The features in the input data do not match features expected by the PMML model."
  def test_prepare_data_removes_unused_columns(self):
    clf = PMMLBaseEstimator(pmml=StringIO("""
      <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3">
        <DataDictionary>
          <DataField name="Class" optype="categorical" dataType="string">
            <Value value="setosa"/>
            <Value value="versicolor"/>
            <Value value="virginica"/>
          </DataField>
          <DataField name="test1" optype="continuous" dataType="double"/>
        </DataDictionary>
        <MiningSchema>
          <MiningField name="Class" usageType="target"/>
        </MiningSchema>
      </PMML>
      """))

    X = pd.DataFrame(data=[[1, 2], [3, 4], [5, 6]], columns=["test1", "test2"])
    result = clf._prepare_data(X)

    assert list(X.columns) == ["test1", "test2"]
    assert list(result.columns) == ["test1"]