def randFor(rData, lData): randClass = RandomForestClassifier(n_estimators = 100) respTrain, respTest, labTrain, labTest = train_test_split(rData, lData, random_state=1) vect = TfidfVectorizer(min_df=1, max_df=1.0, stop_words='english') respTrainVec = vect.fit_transform(respTrain) # To be commented for Pickle Building of Vectorizer respTestVec = vect.transform(respTest) randClass.fit(respTrainVec, labTrain) # To be commented for Pickle Building of Rand Class Model labPredClass = randClass.predict(respTestVec) #display(HTML(eli5.show_weights(randClass, top=5))) #print type(eli5.explain_prediction(randClass, respTest[0], vec=vect, target_names=targetList)) #tDF = eli5.explain_prediction_df(randClass) #tDF1 = eli5.show_weights(randClass, vec=vect, target_names=targetList) #print type(eli5.show_prediction(randClass, respTest[0], vec=vect, target_names=targetList)) # Explain the Weights of this Estimator ---------------------------------- #print eli5.explain_weights(randClass) print eli5.format_as_dataframes(eli5.show_weights(randClass)) print respTest[0] #prediction = eli5.explain_prediction (randClass, respTest[0], vec=vect, target_names=targetList, top=5) #weigths = eli5.explain_prediction (randClass, respTest[0], vec=vect, target_names=targetList, top=5) #print ( eli5.format_as_dataframes( weigths ) ) # Modify to return specfic class types return (metrics.accuracy_score(labTest, labPredClass))
def test_feature_importances(with_std, with_value): expl = Explanation(estimator='some estimator', feature_importances=FeatureImportances( importances=[ FeatureWeight('a', 1, std=0.1 if with_std else None, value=1 if with_value else None), FeatureWeight('b', 2, std=0.2 if with_std else None, value=3 if with_value else None), ], remaining=10, )) df_dict = format_as_dataframes(expl) assert isinstance(df_dict, dict) assert list(df_dict) == ['feature_importances'] df = df_dict['feature_importances'] expected_df = pd.DataFrame({'weight': [1, 2]}, index=['a', 'b']) if with_std: expected_df['std'] = [0.1, 0.2] if with_value: expected_df['value'] = [1, 3] print(df, expected_df, sep='\n') assert expected_df.equals(df) single_df = format_as_dataframe(expl) assert expected_df.equals(single_df)
def test_transition_features(): expl = Explanation( estimator='some estimator', targets=[ TargetExplanation('class1', feature_weights=FeatureWeights( pos=[FeatureWeight('pos', 13, value=1)], neg=[], )), TargetExplanation('class2', feature_weights=FeatureWeights( pos=[FeatureWeight('pos', 13, value=1)], neg=[], )), ], transition_features=TransitionFeatureWeights( class_names=['class2', 'class1'], # reverse on purpose coef=np.array([[1.5, 2.5], [3.5, 4.5]]), )) df_dict = format_as_dataframes(expl) assert isinstance(df_dict, dict) assert set(df_dict) == {'targets', 'transition_features'} assert df_dict['targets'].equals(format_as_dataframe(expl.targets)) df = df_dict['transition_features'] print(df) print(format_as_text(expl)) assert str(df) == ('to class2 class1\n' 'from \n' 'class2 1.5 2.5\n' 'class1 3.5 4.5') with pytest.warns(UserWarning): single_df = format_as_dataframe(expl) assert single_df.equals(df)
def transition_features(self): data_frame = eli5.format_as_dataframes( eli5.explain_weights_sklearn_crfsuite(self.crf)) return list( zip(data_frame['transition_features']['from'], data_frame['transition_features']['coef'], data_frame['transition_features']['to']))
def test_sklearn_crfsuite(xseq, yseq): crf = CRF(c1=0.0, c2=0.1, max_iterations=50) crf.fit([xseq], [yseq]) expl = explain_weights(crf) text, html = format_as_all(expl, crf) assert "y='sunny' top features" in text assert "y='rainy' top features" in text assert "Transition features" in text assert "sunny -0.130 0.696" in text assert u'+0.124 солнце:не светит' in text html_nospaces = html.replace(' ', '').replace("\n", '') assert u'солнце:не светит' in html assert '<th>rainy</th><th>sunny</th>' in html_nospaces try: from eli5 import format_as_dataframe, format_as_dataframes except ImportError: pass else: from .test_formatters_as_dataframe import check_targets_dataframe df_dict = format_as_dataframes(expl) check_targets_dataframe(df_dict['targets'], expl) df_transition = df_dict['transition_features'] transition = expl.transition_features print(df_transition) assert list(transition.class_names) == ['rainy', 'sunny'] assert np.isclose(df_transition['rainy']['rainy'], transition.coef[0, 0]) assert np.isclose(df_transition['sunny']['rainy'], transition.coef[0, 1]) assert np.isclose(df_transition['rainy']['sunny'], transition.coef[1, 0])
def weight_targets(self): data_frame = eli5.format_as_dataframes( eli5.explain_weights_sklearn_crfsuite(self.crf, top=2**10000)) return list( zip(data_frame['targets']['target'], data_frame['targets']['feature'], data_frame['targets']['weight']))
def test_targets(with_std, with_value): expl = Explanation( estimator='some estimator', targets=[ TargetExplanation( 'y', feature_weights=FeatureWeights( pos=[ FeatureWeight('a', 13, std=0.13 if with_std else None, value=2 if with_value else None), FeatureWeight('b', 5, std=0.5 if with_std else None, value=1 if with_value else None) ], neg=[ FeatureWeight('neg1', -10, std=0.2 if with_std else None, value=5 if with_value else None), FeatureWeight('neg2', -1, std=0.3 if with_std else None, value=4 if with_value else None) ], )), TargetExplanation('y2', feature_weights=FeatureWeights( pos=[FeatureWeight('f', 1)], neg=[], )), ], ) df_dict = format_as_dataframes(expl) assert isinstance(df_dict, dict) assert list(df_dict) == ['targets'] df = df_dict['targets'] expected_df = pd.DataFrame( { 'target': ['y', 'y', 'y', 'y', 'y2'], 'feature': ['a', 'b', 'neg2', 'neg1', 'f'], 'weight': [13, 5, -1, -10, 1] }, columns=['target', 'feature', 'weight']) if with_std: expected_df['std'] = [0.13, 0.5, 0.3, 0.2, None] if with_value: expected_df['value'] = [2, 1, 4, 5, None] print(df, expected_df, sep='\n') assert expected_df.equals(df) single_df = format_as_dataframe(expl) assert expected_df.equals(single_df)
def test_transition_features(): expl = Explanation( estimator='some estimator', targets=[ TargetExplanation('class1', feature_weights=FeatureWeights( pos=[FeatureWeight('pos', 13, value=1)], neg=[], )), TargetExplanation('class2', feature_weights=FeatureWeights( pos=[FeatureWeight('pos', 13, value=1)], neg=[], )), ], transition_features=TransitionFeatureWeights( class_names=['class2', 'class1'], # reverse on purpose coef=np.array([[1.5, 2.5], [3.5, 4.5]]), )) df_dict = format_as_dataframes(expl) assert isinstance(df_dict, dict) assert set(df_dict) == {'targets', 'transition_features'} assert df_dict['targets'].equals(format_as_dataframe(expl.targets)) df = df_dict['transition_features'] print(df) print(format_as_text(expl)) expected = pd.DataFrame([ { 'from': 'class2', 'to': 'class2', 'coef': 1.5 }, { 'from': 'class2', 'to': 'class1', 'coef': 2.5 }, { 'from': 'class1', 'to': 'class2', 'coef': 3.5 }, { 'from': 'class1', 'to': 'class1', 'coef': 4.5 }, ], columns=['from', 'to', 'coef']) assert df.equals(expected) with pytest.warns(UserWarning): single_df = format_as_dataframe(expl) assert single_df.equals(df)
def transition_features_to_csv(self): data_frame = eli5.format_as_dataframes( eli5.explain_weights_sklearn_crfsuite(self.crf)) data_frame['transition_features'].to_csv(os.path.abspath( 'server/nlp/data/data_transition_features.csv')) return 'ok'
def state_features_to_csv(self): data_frame = eli5.format_as_dataframes( eli5.explain_weights_sklearn_crfsuite(self.crf, top=2**10000)) data_frame['targets'].to_csv(os.path.abspath( 'server/nlp/data/data_state_features.csv')) return 'ok'