def randFor(rData, lData):    
    randClass = RandomForestClassifier(n_estimators = 100)
    
    respTrain, respTest, labTrain, labTest = train_test_split(rData, lData, random_state=1)    
    
    vect = TfidfVectorizer(min_df=1, max_df=1.0, stop_words='english')        
    respTrainVec = vect.fit_transform(respTrain)    

    # To be commented for Pickle Building of Vectorizer
    respTestVec = vect.transform(respTest)
    
    randClass.fit(respTrainVec, labTrain)        
    # To be commented for Pickle Building of Rand Class Model
    labPredClass = randClass.predict(respTestVec)                                 
    
    #display(HTML(eli5.show_weights(randClass, top=5)))
    #print type(eli5.explain_prediction(randClass, respTest[0], vec=vect, target_names=targetList))
    #tDF = eli5.explain_prediction_df(randClass)
    #tDF1 = eli5.show_weights(randClass, vec=vect, target_names=targetList)
    #print type(eli5.show_prediction(randClass, respTest[0], vec=vect, target_names=targetList))     
    
    # Explain the Weights of this Estimator ----------------------------------
    #print eli5.explain_weights(randClass)
    print eli5.format_as_dataframes(eli5.show_weights(randClass))   
    print respTest[0]
    #prediction = eli5.explain_prediction (randClass, respTest[0], vec=vect, target_names=targetList, top=5)
    #weigths = eli5.explain_prediction (randClass, respTest[0], vec=vect, target_names=targetList, top=5)
    #print ( eli5.format_as_dataframes( weigths ) )
    
    # Modify to return specfic class types
    return (metrics.accuracy_score(labTest, labPredClass))
Ejemplo n.º 2
0
def test_feature_importances(with_std, with_value):
    expl = Explanation(estimator='some estimator',
                       feature_importances=FeatureImportances(
                           importances=[
                               FeatureWeight('a',
                                             1,
                                             std=0.1 if with_std else None,
                                             value=1 if with_value else None),
                               FeatureWeight('b',
                                             2,
                                             std=0.2 if with_std else None,
                                             value=3 if with_value else None),
                           ],
                           remaining=10,
                       ))
    df_dict = format_as_dataframes(expl)
    assert isinstance(df_dict, dict)
    assert list(df_dict) == ['feature_importances']
    df = df_dict['feature_importances']
    expected_df = pd.DataFrame({'weight': [1, 2]}, index=['a', 'b'])
    if with_std:
        expected_df['std'] = [0.1, 0.2]
    if with_value:
        expected_df['value'] = [1, 3]
    print(df, expected_df, sep='\n')
    assert expected_df.equals(df)

    single_df = format_as_dataframe(expl)
    assert expected_df.equals(single_df)
Ejemplo n.º 3
0
def test_transition_features():
    expl = Explanation(
        estimator='some estimator',
        targets=[
            TargetExplanation('class1',
                              feature_weights=FeatureWeights(
                                  pos=[FeatureWeight('pos', 13, value=1)],
                                  neg=[],
                              )),
            TargetExplanation('class2',
                              feature_weights=FeatureWeights(
                                  pos=[FeatureWeight('pos', 13, value=1)],
                                  neg=[],
                              )),
        ],
        transition_features=TransitionFeatureWeights(
            class_names=['class2', 'class1'],  # reverse on purpose
            coef=np.array([[1.5, 2.5], [3.5, 4.5]]),
        ))
    df_dict = format_as_dataframes(expl)
    assert isinstance(df_dict, dict)
    assert set(df_dict) == {'targets', 'transition_features'}
    assert df_dict['targets'].equals(format_as_dataframe(expl.targets))
    df = df_dict['transition_features']
    print(df)
    print(format_as_text(expl))
    assert str(df) == ('to      class2  class1\n'
                       'from                  \n'
                       'class2     1.5     2.5\n'
                       'class1     3.5     4.5')

    with pytest.warns(UserWarning):
        single_df = format_as_dataframe(expl)
    assert single_df.equals(df)
Ejemplo n.º 4
0
 def transition_features(self):
     data_frame = eli5.format_as_dataframes(
         eli5.explain_weights_sklearn_crfsuite(self.crf))
     return list(
         zip(data_frame['transition_features']['from'],
             data_frame['transition_features']['coef'],
             data_frame['transition_features']['to']))
Ejemplo n.º 5
0
def test_sklearn_crfsuite(xseq, yseq):
    crf = CRF(c1=0.0, c2=0.1, max_iterations=50)
    crf.fit([xseq], [yseq])

    expl = explain_weights(crf)
    text, html = format_as_all(expl, crf)

    assert "y='sunny' top features" in text
    assert "y='rainy' top features" in text
    assert "Transition features" in text
    assert "sunny   -0.130    0.696" in text
    assert u'+0.124  солнце:не светит' in text

    html_nospaces = html.replace(' ', '').replace("\n", '')
    assert u'солнце:не светит' in html
    assert '<th>rainy</th><th>sunny</th>' in html_nospaces

    try:
        from eli5 import format_as_dataframe, format_as_dataframes
    except ImportError:
        pass
    else:
        from .test_formatters_as_dataframe import check_targets_dataframe
        df_dict = format_as_dataframes(expl)
        check_targets_dataframe(df_dict['targets'], expl)
        df_transition = df_dict['transition_features']
        transition = expl.transition_features
        print(df_transition)
        assert list(transition.class_names) == ['rainy', 'sunny']
        assert np.isclose(df_transition['rainy']['rainy'], transition.coef[0,
                                                                           0])
        assert np.isclose(df_transition['sunny']['rainy'], transition.coef[0,
                                                                           1])
        assert np.isclose(df_transition['rainy']['sunny'], transition.coef[1,
                                                                           0])
Ejemplo n.º 6
0
    def weight_targets(self):
        data_frame = eli5.format_as_dataframes(
            eli5.explain_weights_sklearn_crfsuite(self.crf, top=2**10000))

        return list(
            zip(data_frame['targets']['target'],
                data_frame['targets']['feature'],
                data_frame['targets']['weight']))
Ejemplo n.º 7
0
def test_targets(with_std, with_value):
    expl = Explanation(
        estimator='some estimator',
        targets=[
            TargetExplanation(
                'y',
                feature_weights=FeatureWeights(
                    pos=[
                        FeatureWeight('a',
                                      13,
                                      std=0.13 if with_std else None,
                                      value=2 if with_value else None),
                        FeatureWeight('b',
                                      5,
                                      std=0.5 if with_std else None,
                                      value=1 if with_value else None)
                    ],
                    neg=[
                        FeatureWeight('neg1',
                                      -10,
                                      std=0.2 if with_std else None,
                                      value=5 if with_value else None),
                        FeatureWeight('neg2',
                                      -1,
                                      std=0.3 if with_std else None,
                                      value=4 if with_value else None)
                    ],
                )),
            TargetExplanation('y2',
                              feature_weights=FeatureWeights(
                                  pos=[FeatureWeight('f', 1)],
                                  neg=[],
                              )),
        ],
    )
    df_dict = format_as_dataframes(expl)
    assert isinstance(df_dict, dict)
    assert list(df_dict) == ['targets']
    df = df_dict['targets']
    expected_df = pd.DataFrame(
        {
            'target': ['y', 'y', 'y', 'y', 'y2'],
            'feature': ['a', 'b', 'neg2', 'neg1', 'f'],
            'weight': [13, 5, -1, -10, 1]
        },
        columns=['target', 'feature', 'weight'])
    if with_std:
        expected_df['std'] = [0.13, 0.5, 0.3, 0.2, None]
    if with_value:
        expected_df['value'] = [2, 1, 4, 5, None]
    print(df, expected_df, sep='\n')
    assert expected_df.equals(df)

    single_df = format_as_dataframe(expl)
    assert expected_df.equals(single_df)
Ejemplo n.º 8
0
def test_transition_features():
    expl = Explanation(
        estimator='some estimator',
        targets=[
            TargetExplanation('class1',
                              feature_weights=FeatureWeights(
                                  pos=[FeatureWeight('pos', 13, value=1)],
                                  neg=[],
                              )),
            TargetExplanation('class2',
                              feature_weights=FeatureWeights(
                                  pos=[FeatureWeight('pos', 13, value=1)],
                                  neg=[],
                              )),
        ],
        transition_features=TransitionFeatureWeights(
            class_names=['class2', 'class1'],  # reverse on purpose
            coef=np.array([[1.5, 2.5], [3.5, 4.5]]),
        ))
    df_dict = format_as_dataframes(expl)
    assert isinstance(df_dict, dict)
    assert set(df_dict) == {'targets', 'transition_features'}
    assert df_dict['targets'].equals(format_as_dataframe(expl.targets))
    df = df_dict['transition_features']
    print(df)
    print(format_as_text(expl))
    expected = pd.DataFrame([
        {
            'from': 'class2',
            'to': 'class2',
            'coef': 1.5
        },
        {
            'from': 'class2',
            'to': 'class1',
            'coef': 2.5
        },
        {
            'from': 'class1',
            'to': 'class2',
            'coef': 3.5
        },
        {
            'from': 'class1',
            'to': 'class1',
            'coef': 4.5
        },
    ],
                            columns=['from', 'to', 'coef'])
    assert df.equals(expected)
    with pytest.warns(UserWarning):
        single_df = format_as_dataframe(expl)
    assert single_df.equals(df)
Ejemplo n.º 9
0
 def transition_features_to_csv(self):
     data_frame = eli5.format_as_dataframes(
         eli5.explain_weights_sklearn_crfsuite(self.crf))
     data_frame['transition_features'].to_csv(os.path.abspath(
         'server/nlp/data/data_transition_features.csv'))
     return 'ok'
Ejemplo n.º 10
0
 def state_features_to_csv(self):
     data_frame = eli5.format_as_dataframes(
         eli5.explain_weights_sklearn_crfsuite(self.crf, top=2**10000))
     data_frame['targets'].to_csv(os.path.abspath(
         'server/nlp/data/data_state_features.csv'))
     return 'ok'