Python LimeTextExplainer.explain_instance 예제들, lime.lime_text.LimeTextExplainer.explain_instance Python 예제들

예제 #1

0

파일 보기

파일: test_lime_text.py 프로젝트: marcotcr/lime

    def test_lime_text_tabular_not_equal_random_state(self):
        categories = ['alt.atheism', 'soc.religion.christian']
        newsgroups_train = fetch_20newsgroups(subset='train',
                                              categories=categories)
        newsgroups_test = fetch_20newsgroups(subset='test',
                                             categories=categories)
        class_names = ['atheism', 'christian']
        vectorizer = TfidfVectorizer(lowercase=False)
        train_vectors = vectorizer.fit_transform(newsgroups_train.data)
        test_vectors = vectorizer.transform(newsgroups_test.data)
        nb = MultinomialNB(alpha=.01)
        nb.fit(train_vectors, newsgroups_train.target)
        pred = nb.predict(test_vectors)
        f1_score(newsgroups_test.target, pred, average='weighted')
        c = make_pipeline(vectorizer, nb)

        explainer = LimeTextExplainer(
            class_names=class_names, random_state=10)
        exp_1 = explainer.explain_instance(newsgroups_test.data[83],
                                           c.predict_proba, num_features=6)

        explainer = LimeTextExplainer(
            class_names=class_names, random_state=20)
        exp_2 = explainer.explain_instance(newsgroups_test.data[83],
                                           c.predict_proba, num_features=6)

        self.assertFalse(exp_1.as_map() == exp_2.as_map())

예제 #2

0

파일 보기

    def test_lime_text_tabular_not_equal_random_state(self):
        categories = ['alt.atheism', 'soc.religion.christian']
        newsgroups_train = fetch_20newsgroups(subset='train',
                                              categories=categories)
        newsgroups_test = fetch_20newsgroups(subset='test',
                                             categories=categories)
        class_names = ['atheism', 'christian']
        vectorizer = TfidfVectorizer(lowercase=False)
        train_vectors = vectorizer.fit_transform(newsgroups_train.data)
        test_vectors = vectorizer.transform(newsgroups_test.data)
        nb = MultinomialNB(alpha=.01)
        nb.fit(train_vectors, newsgroups_train.target)
        pred = nb.predict(test_vectors)
        f1_score(newsgroups_test.target, pred, average='weighted')
        c = make_pipeline(vectorizer, nb)

        explainer = LimeTextExplainer(class_names=class_names, random_state=10)
        exp_1 = explainer.explain_instance(newsgroups_test.data[83],
                                           c.predict_proba,
                                           num_features=6)

        explainer = LimeTextExplainer(class_names=class_names, random_state=20)
        exp_2 = explainer.explain_instance(newsgroups_test.data[83],
                                           c.predict_proba,
                                           num_features=6)

        self.assertFalse(exp_1.as_map() == exp_2.as_map())

예제 #3

0

파일 보기

파일: lime_explainer.py 프로젝트: shalevy1/fine-grained-sentiment-app

def explainer(method: str, path_to_file: str, text: str, lowercase: bool,
              num_samples: int) -> LimeTextExplainer:
    """Run LIME explainer on provided classifier"""

    model = explainer_class(method, path_to_file)
    predictor = model.predict
    # Lower case the input text if requested (for certain classifiers)
    if lowercase:
        text = text.lower()

    # Create a LimeTextExplainer
    explainer = LimeTextExplainer(
        # Specify split option
        split_expression=lambda x: x.split(),
        # Our classifer uses trigrams or contextual ordering to classify text
        # Hence, order matters, and we cannot use bag of words.
        bow=False,
        # Specify class names for this case
        class_names=[1, 2, 3, 4, 5])

    # Make a prediction and explain it:
    exp = explainer.explain_instance(
        text,
        classifier_fn=predictor,
        top_labels=1,
        num_features=20,
        num_samples=num_samples,
    )
    return exp

예제 #4

0

파일 보기

    def explain(self, docs):
        """Generate LIME Explanations for list of docs.

        Takes as input a list of strings that make up the documents where LIME
        should be applied to. Returns Explanation class instances.

        Parameters
        ----------
        docs : list of strings
            List of input documents.

        Returns
        -------
        exps : list of classes
            For each input document, an Explanation class object on which for
            example the .to_list, to_notebook etc functions can be called on.
        """
        explainer = LimeTextExplainer()
        experiments = []

        for doc in docs:  # NOTE: this might have messed up in a generator
            experiment = explainer.explain_instance(
                doc, self.pipeline.predict_proba, top_labels=self.n_classes)
            experiments.append(experiment)

        return experiments

예제 #5

0

파일 보기

 def test_lime_text_explainer_bad_regressor(self):
     newsgroups_train = fetch_20newsgroups(subset='train')
     newsgroups_test = fetch_20newsgroups(subset='test')
     # making class names shorter
     class_names = [
         x.split('.')[-1] if 'misc' not in x else '.'.join(
             x.split('.')[-2:]) for x in newsgroups_train.target_names
     ]
     class_names[3] = 'pc.hardware'
     class_names[4] = 'mac.hardware'
     vectorizer = TfidfVectorizer(lowercase=False)
     train_vectors = vectorizer.fit_transform(newsgroups_train.data)
     test_vectors = vectorizer.transform(newsgroups_test.data)
     nb = MultinomialNB(alpha=.01)
     nb.fit(train_vectors, newsgroups_train.target)
     pred = nb.predict(test_vectors)
     f1_score(newsgroups_test.target, pred, average='weighted')
     c = make_pipeline(vectorizer, nb)
     explainer = LimeTextExplainer(class_names=class_names)
     idx = 1340
     with self.assertRaises(TypeError):
         exp = explainer.explain_instance(  # noqa:F841
             newsgroups_test.data[idx],
             c.predict_proba,
             num_features=6,
             labels=[0, 17],
             model_regressor=Lasso())

예제 #6

0

파일 보기

class LSTMExplainer:
    def __init__(self, tokenizer, num_features):
        super().__init__()
        self.TEXT = torch.load(os.getcwd() + '/app/models/binaries' +
                               '/text_field.ptz')
        embedding_size = 300

        lstm_hidden = 200
        fc_hidden = [100, 50]
        self.model = NNet(self.TEXT.vocab.vectors, embedding_size, 2,
                          self.TEXT.vocab.stoi[self.TEXT.pad_token],
                          lstm_hidden, fc_hidden).to(device)

        self.model.load_state_dict(
            torch.load(os.getcwd() + '/app/models/binaries' + '/rnn_model.pt',
                       map_location=device))  # TODO change to Path
        self.predict = Predict(self.model, self.TEXT)
        self.explainer = LimeTextExplainer(
            class_names=['Negative', 'Positive'])
        self.tokenizer = tokenizer
        self.num_features = num_features

    def get_lime_exp(self, text):

        text = ' '.join(self.tokenizer(text))
        exp = self.explainer.explain_instance(text,
                                              self.predict,
                                              num_features=self.num_features,
                                              top_labels=2,
                                              num_samples=500)
        return exp.as_html(text=True, labels=(1, ))

예제 #7

0

파일 보기

def classify_lime(model, dataset, train_dataset, config_dict):
    explainer = LimeTextExplainer(
        class_names=(0, 1),
        bow=
        False,  # try with True as well: False causes masking to be done, True means removing words
        mask_string=tokenizer.mask_token
        if not config_dict.get("lime_mask_string_use_pad", False) else
        tokenizer.pad_token,
        feature_selection="none",  # use all features
        split_expression=r"\s",
    )
    classify_sentence_partial = partial(
        batch_predict,
        model=model,
        dataset=train_dataset,
        batch_size=config_dict["per_device_eval_batch_size"],
        method="lime",
    )

    res_list = []
    for i in range(0, len(dataset)):
        if i % 50 == 0:
            logger.info("lime_sample_idx:" + str(i) + "/" + str(len(dataset)))
        exp = explainer.explain_instance(
            " ".join(dataset.examples[i].words),
            classify_sentence_partial,
            labels=(1, ),
            num_samples=config_dict["lime_num_samples"],
        )
        lst = exp.as_map()[1]

        lst.sort(key=(lambda x: x[0]))
        dataset.examples[i].predictions = list(map(lambda x: x[1], lst))
    return dataset

예제 #8

0

파일 보기

def final_yok_classifing(sentence):
    class_names = ['욕설이 아님', '욕설']
    explainer = LimeTextExplainer(class_names=class_names)
    exp = explainer.explain_instance(sentence[0],
                                     yok_classifier_lime,
                                     num_features=100)
    return exp.as_list()

예제 #9

0

파일 보기

def go():
    # save user input in query
    query = request.args.get('query', '')
    #query = request.form['query']

    # use model to predict classification for query
    print(
        "generating classification prediction for message {}...".format(query))
    classification_labels = model.predict([query])[0]
    classification_results = dict(zip(df.columns[4:], classification_labels))

    # set-up Lime
    classes = df.columns[4:].to_list()
    print("classes = {}".format(classes))
    limeexplainer = LimeTextExplainer(class_names=classes)
    exp = limeexplainer.explain_instance(query,
                                         model.predict_proba,
                                         num_features=10,
                                         top_labels=3)

    # This will render the go.html Please see that file.
    return render_template('go.html',
                           query=query,
                           exp=exp.as_html(),
                           model=model[-1],
                           classification_result=classification_results)

예제 #10

0

파일 보기

파일: lime_eval.py 프로젝트: cmry/omesa

    def explain(self, docs):
        """Generate LIME Explanations for list of docs.

        Takes as input a list of strings that make up the documents where LIME
        should be applied to. Returns Explanation class instances.

        Parameters
        ----------
        docs : list of strings
            List of input documents.

        Returns
        -------
        exps : list of classes
            For each input document, an Explanation class object on which for
            example the .to_list, to_notebook etc functions can be called on.
        """
        explainer = LimeTextExplainer()
        experiments = []

        for doc in docs:  # NOTE: this might have messed up in a generator
            experiment = explainer.explain_instance(doc, self.pipeline.predict_proba, top_labels=self.n_classes)
            experiments.append(experiment)

        return experiments

예제 #11

0

파일 보기

파일: views.py 프로젝트: sameerank/verbiage

def classifier(request, format=None):
    tm_classifier = Pickle.objects.get(name='clf')
    classifier = tm_classifier.pickled_model
    tm_vectorizer = Pickle.objects.get(name='tfidf')
    vectorizer = tm_vectorizer.pickled_model
    input_text = request.data.get('description', 'ERROR')
    if not input_text:
        response = {'error': 'Input is an empty string'}
        return Response(response, status=status.HTTP_404_NOT_FOUND)
    standardized_text = standardize_text(input_text)
    explainer = LimeTextExplainer(class_names=GRADE_CATEGORIES)
    c = make_pipeline(vectorizer, classifier)
    exp = explainer.explain_instance(standardized_text,
                                     c.predict_proba,
                                     num_features=6,
                                     labels=[0, 1, 2, 3])
    predict_probas = dict(zip(exp.class_names, exp.predict_proba))
    prediction = max(predict_probas.items(), key=itemgetter(1))[0]
    response = {
        'final_prediction': prediction,
        'ordered_class_names': exp.class_names,
        'predict_probas': predict_probas,
        'as_list': {
            exp.class_names[lbl]: exp.as_list(label=lbl)
            for lbl in exp.available_labels()
        },
        'standardized_text': standardized_text
    }
    return Response(response, status=status.HTTP_201_CREATED)

예제 #12

0

파일 보기

def limevisual(pData, pDesc, Idx, pClassNames, pAccountName, pVec,
               nNumFeatures, nTopLabels, tLabels, pRootDir):
    try:
        pIntent = pData['Intent'][int(Idx)]
        _, pModels = loadmodel(pRootDir, pAccountName, pIntent)
        pPipeModel = make_pipeline(pVec, pModels)
        tokenizer = lambda doc: re.compile(r"(?u)\b\w\w+\b").findall(doc)
        pExplainer = LimeTextExplainer(class_names=pClassNames,
                                       split_expression=tokenizer)
        pExplainText = pExplainer.explain_instance(
            pData[pDesc][int(Idx)],
            classifier_fn=pPipeModel.predict_proba,
            num_features=int(nNumFeatures),
            top_labels=int(nTopLabels))
        pExplainText.show_in_notebook(text=pData[pDesc][int(Idx)],
                                      labels=tLabels)
        pExplainText.save_to_file(
            'C:\\Users\\tamohant\\Desktop\\Auto_synthesis_Training_data\\AutoSynthesisLite\\demo.html',
            labels=None,
            predict_proba=True,
            show_predicted_value=True)
    except Exception as e:
        print(
            '*** ERROR[001]: Error in visualiation file of Limevisual function: ',
            sys.exc_info()[0], str(e))
        print(traceback.format_exc())
        return (-1)
    return (0)

예제 #13

0

파일 보기

파일: save_combinations.py 프로젝트: xue-smile/feature-importance

def get_lime(model, test_tokens, model_name):
    explainer = LimeTextExplainer(class_names=["genuine", "deceptive"],
                                  split_expression=u'\s+')
    W = []
    for idx, text in enumerate(test_tokens):
        tmp_d = {}
        for i in text.split():
            tmp_d[i] = 1
        exp = explainer.explain_instance(text,
                                         partial(wrapper_clf_predict,
                                                 model=model,
                                                 model_name=model_name),
                                         num_features=len(text.split()),
                                         num_samples=1000)
        if len(tmp_d) != len(exp.as_list()):
            print(idx, len(tmp_d), len(dict(exp.as_list())))
        W.append(dict(exp.as_list()))
        if (idx + 1) % 10 == 0:
            print('{} instances have been processed..'.format(idx + 1))
    features_l, scores_l = [], []
    for d in W:
        features, scores = [], []
        for key, score in d.items():
            features.append(key)
            tmp = ' '.join(features)
            scores.append(score)  # abs value should be taken subsequently
        features_l.append(tmp)
        scores_l.append(scores)
    return features_l, scores_l

예제 #14

0

파일 보기

    def run(self, input_text, print_results=True):
        output = self.classify_text(input_text, True)
        outputlen = len(output)

        classes = output[0:int((outputlen / 2) - 1)]
        scores = output[int((outputlen / 2)):outputlen - 1]

        if print_results:
            print('Intents: ' + '\t'.join(map(str, classes)))
            print('Scores: ' + '\t'.join(map(str, scores)))

        # explain class
        explainer = LimeTextExplainer(class_names=classes)
        exp = explainer.explain_instance(input_text,
                                         self.classify_text,
                                         num_features=7,
                                         top_labels=3,
                                         num_samples=1000)

        # print explanation
        if print_results:
            print("")
            print('Explanation for class %s' % classes[0])
            print('\n'.join(map(str, exp.as_list(label=0))))

        return exp

예제 #15

0

파일 보기

    def explain_one_example(self, idx=None, num_features=5, print_out=True):
        '''Explaines predictions for a single datapoint with LIME.

        If the index of the datapoint is not specified, explaines random point
        from the validation data. Optionally prints out explanation.

        # Arguments:
            idx: int, index of a datapoint in the validation data (default=None)
            num_features: int, number of explanatory features (default=5)
            print_out: boolean (default=True)

        # Returns:
            exp: lime.explanation.Explanation object
        '''
        if idx is None:
            idx = np.random.choice(self.for_explanation.index)

        explainer = LimeTextExplainer(class_names=self.class_names)
        exp = explainer.explain_instance(self.for_explanation[idx],
                                         self.predict_proba,
                                         num_features=num_features)

        if print_out:
            print('Tweet {}: {}'.format(idx, self.for_explanation[idx]))
            print(self.predict_proba([self.for_explanation[idx]]))
            print(exp.as_pyplot_figure())
            plt.show()
        return exp

예제 #16

0

파일 보기

파일: ExplainerText.py 프로젝트: mindis/tql-Python

class ExplainerText(object):
    """
    X = df.review.astype(str).map(lambda x: ' '.join(jieba.cut(x)))
    y = df.label

    enlp = ExplainNLP()
    enlp.fit(X, y)
    enlp.explain(X[0])
    """

    def __init__(self, estimator=LogisticRegression(), class_names=None):
        self._baseline = BaselineBow(estimator)()
        self._explainer = LimeTextExplainer(verbose=True, class_names=class_names)

    def fit(self, X, y):
        self._baseline.fit(X, y)
        return self._baseline

    def explain(self, sentence, num_features=6):
        """
        :param sentence: '分词 空格 拼接'
        :param num_features:
        :return:
        """
        exp = self._explainer.explain_instance(
            sentence, self._baseline.predict_proba, num_features=num_features)

        exp.show_in_notebook(text=1 if len(sentence) < 256 else 0)

        return exp

예제 #17

0

파일 보기

파일: expLime.py 프로젝트: CCPrism/experiments

def finalExplain_n(codes):
    resData = []
    r = Rake()
    classNames = ['negative', 'positive']
    exp = LimeTextExplainer(class_names=classNames)
    for j, code in enumerate(codes):
        tmpResult = {}
        c = translate(code)
        com = ''
        for i in range(1, len(c)):
            if c[i] == '</s>':
                break
            com += c[i] + ' '
        tmpResult['code'] = code
        tmpResult['comment'] = com
        r.extract_keywords_from_text(com)
        comKeys = r.get_ranked_phrases()
        tmpResult['commentKeywords'] = comKeys

        tmpList = []
        for _key in comKeys:
            global key
            key = _key
            tmpExp = {
                'commentKeyword': key,
            }
            explanation = exp.explain_instance(code,
                                               predictorLime,
                                               num_features=6)
            print(explanation.as_list())
            tmpExp['lime'] = explanation.as_list
            tmpList.append(tmpExp)
        tmpResult['explanations'] = tmpList
        resData.append(tmpResult)
    return resData

예제 #18

0

파일 보기

파일: explainer_lime.py 프로젝트: BinhMinhs10/transformers-viz

def explainer(args, text, num_samples: int = 20):
    """Run LIME explainer on provided classifier"""

    model = WrapedSenti(args)
    predictor = model.predict

    # Create a LimeTextExplainer
    explainer = LimeTextExplainer(
        # Specify split option
        split_expression=lambda x: x.split(),
        # Our classifer uses bigrams or contextual ordering to classify text
        # Hence, order matters, and we cannot use bag of words.
        bow=False,
        class_names=["neutral", "positive", "negative"],
    )

    # Make a prediction and explain it:
    exp = explainer.explain_instance(
        text,
        classifier_fn=predictor,
        top_labels=1,
        num_features=20,
        num_samples=num_samples,
    )
    return exp

예제 #19

0

파일 보기

def prediction(txt, sentiment, logistic, num_features):
    ##LIME
    c = make_pipeline(sentiment.tfidf_vect, logistic)
    class_names = ['NEGATIVE', 'POSITIVE']
    explainer = LimeTextExplainer(class_names=class_names)
    exp = explainer.explain_instance(txt,
                                     c.predict_proba,
                                     num_features=num_features)
    output = "static/outputs/output.html"
    exp.save_to_file(output)
    exp.as_pyplot_figure(label=1)
    plt.savefig('static/outputs/lime_explanation_graph.png')

    # LOGISTIC REGRESSION
    list_of_words = re.sub("[^\w]", " ", txt).split()
    words_with_weights = defaultdict()
    for word in list_of_words:
        feats = sentiment.tfidf_vect.get_feature_names()
        coefs = logistic.coef_[0]
        if word in feats:
            ind = feats.index(word)
            words_with_weights[word] = coefs[ind]

    data = pd.DataFrame.from_dict(words_with_weights, orient='index')
    data[0].plot(kind='barh', color=(data[0] > 0).map({True: 'g', False: 'r'}))
    plt.savefig('static/outputs/log_explanation_graph.png')

예제 #20

0

파일 보기

파일: debug_parse.py 프로젝트: paper2code/bothub-nlp

 def get_result_per_word(self, text, num_samples):
     if not self.intention_names:
         return {}
     explainer = LimeTextExplainer(class_names=self.intention_names)
     labels = list(range(len(self.intention_names)))  # List
     try:
         exp = explainer.explain_instance(text,
                                          self.parse,
                                          num_features=6,
                                          labels=labels,
                                          num_samples=num_samples)
     except ValueError:
         labels = []
     result_per_word = {}
     for label in labels:
         for j in exp.as_list(label=label):
             if j[0] not in result_per_word:
                 result_per_word[j[0]] = []
             result_per_word[j[0]].append({
                 "intent":
                 self.intention_names[label],
                 "relevance":
                 j[1] * 100
             })
     for word in result_per_word:
         result_per_word[word] = sorted(result_per_word[word],
                                        key=lambda k: k.get("relevance"),
                                        reverse=True)
     return result_per_word

예제 #21

0

파일 보기

def explain_prediction(sent, file_name):
    #    vect=transform_inp_sent_to_vect(sent)
    labels = get_categories(sent, file_name)
    explainer = LimeTextExplainer(class_names=labels)

    exp = explainer.explain_instance(sent, spacy_prediction, labels=[0, 1])
    return exp.save_to_file(r'{}explanation.html'.format(DIRECTORY_PATH))

예제 #22

0

파일 보기

파일: debug_parse.py 프로젝트: paper2code/bothub-nlp

    def get_result_per_intent(self, text, num_samples):
        explainer = LimeTextExplainer(class_names=self.intention_names)
        labels = list(range(len(self.intention_names)))  # List
        exp = explainer.explain_instance(text,
                                         self.parse,
                                         num_features=6,
                                         labels=labels,
                                         num_samples=num_samples)
        result_per_intent = {}
        for intent in self.intention_names:
            result_per_intent[intent] = []
        for i in labels:
            intent_sum = 0
            for j in exp.as_list(label=i):
                result_per_intent[self.intention_names[i]].append({
                    "word":
                    j[0],
                    "relevance":
                    j[1] * 100
                })
                intent_sum += j[1]
            result_per_intent[self.intention_names[i]].append({
                "sum": intent_sum,
                "relevance": -1
            })
        for intent in result_per_intent:
            result_per_intent[intent] = sorted(
                result_per_intent[intent],
                key=lambda k: k.get("relevance"),
                reverse=True,
            )

        return result_per_intent

예제 #23

0

파일 보기

파일: manuscript_cpu.py 프로젝트: roamgom/manuscript_flask

def limer(example):
    # show in lime graph
    # TODO: ext -> html로 return
    # note가 아닌 html API 찾기
    explainer = LimeTextExplainer()
    exp = explainer.explain_instance(spacing_example(example), lambda s: do_inference(s, True).detach().numpy(),
                                     top_labels=1)
    exp.show_in_notebook()

예제 #24

0

파일 보기

def explain_instance(headline, body):
    text = combine_sentence(headline, body)
    explainer = LimeTextExplainer(class_names=CLASS_NAMES)
    return explainer.explain_instance(text,
                                      classifier_fn,
                                      labels=[0, 1, 2, 3],
                                      top_labels=4,
                                      num_samples=4)

예제 #25

0

파일 보기

class Lime:
    def __init__(self, class_names):
        self.class_names = class_names
        self.explainer = LimeTextExplainer(class_names=class_names)
        self.num_features = 20
        self.num_samples = 20
        self.use_top_labels = True
        self.top_labels = 2
        self.investigate_labels = list(range(len(class_names)))

    def explain_text(self, text_str, predict_fn):
        """
        Explain the outcome from a text
        
        :param text_str: text to investigate
        
        :param predict_fn: lambda function to predict, should like

            def predict(raw_str_arr):
                encoded = tokenizer(raw_str_arr, truncation=True, padding=True)
                tf_slice = tf.data.Dataset.from_tensor_slices((dict(encoded), [0 for i in range(len(raw_str_arr))]))
                prob_result = model.predict(tf_slice.batch(1))[0]
                return prob_result

        :return: explaination object that can be used as follows:

            print(exp.as_list())
            exp.show_in_notebook()
            exp.as_pyplot_figure()

        """
        if self.use_top_labels:
            exp = self.explainer.explain_instance(
                text_str,
                predict_fn,
                num_features=self.num_features,
                num_samples=self.num_samples,
                top_labels=self.top_labels)
        else:
            exp = self.explainer.explain_instance(
                text_str,
                predict_fn,
                num_features=self.num_features,
                num_samples=self.num_samples,
                labels=self.investigate_labels)
        return exp

예제 #26

0

파일 보기

파일: explanation.py 프로젝트: guillaumedsde/Harpocrates

def lime_explanation(classifier, data, features=MAX_FEATURES):
    explainer = LimeTextExplainer(class_names=CLASS_NAMES)
    explanation = explainer.explain_instance(
        text_instance=data,
        classifier_fn=classifier.predict_proba,
        num_features=features,
    )
    return explanation

예제 #27

0

파일 보기

파일: explainers.py 프로젝트: antonis19/nlp-robustness-evaluation

    def explain(self, text, nwords, return_weights=False):
        '''
        Use `LimeTextExplainer` to obtain the top `nwords` most important/polar words in the `text` as 
        an explanation.


        Parameters
        --------------
        text: str
            The text to explain.

        nwords: int
            The number of most important words to return (i.e. explanation size).

        return_weights: bool
            Set to True to return the weights assigned by LIME also.

        Returns
        ---------------
        word_ranking : list
            Indexes of the `nwords` top-ranked words in the text.
        
        ranked_words: list
            List of `nwords` top-ranked words in the text.

        weights: dict, optional
            The dictionary of weights (wordposition -> weight) assigned by LIME to the words
            in the text.

        explanation: optional
            The explanation object returned by `LimeTextExplainer`.
        '''
        text = preprocess_text(text)
        text_words = get_tokens(text)

        class_names = ['negative', 'positive']
        # bow is set to False because word order is important
        explainer = LimeTextExplainer(class_names=class_names,
                                      feature_selection='auto',
                                      bow=False,
                                      split_expression=' ',
                                      verbose=False)

        explanation = explainer.explain_instance(
            text_instance=text,
            labels=[0, 1],
            classifier_fn=self.predict_texts,
            num_features=nwords,
            num_samples=self.nsamples)
        # sort weights by decreasing absolute value
        weights = OrderedDict(
            sorted(explanation.as_map()[1],
                   key=lambda weight: -abs(weight[1])))
        word_ranking = np.array(list(weights.keys()))
        ranked_words = [text_words[i] for i in word_ranking]
        if return_weights:
            return word_ranking, ranked_words, weights, explanation
        return word_ranking, ranked_words

예제 #28

0

파일 보기

def model_load_and_explain(x_text_input):

    from lime import lime_text

    print(max_document_length)

    if x_text_input == 'default':
        x_text_instance = '" extreme ops " exceeds expectations . good fun , good action , good acting , good dialogue , good pace , good cinematography .'
    else:
        x_text_instance = x_text_input

    # print(x_text_instance)
    output = cnn.predict_text_instance([x_text_instance])  #batch -> instnace

    from lime.lime_text import LimeTextExplainer
    class_names = ['Negative', 'Positive']
    explainer = LimeTextExplainer(class_names=class_names)

    # print(x_text_instance)
    # print(type(x_text_instance))
    exp = explainer.explain_instance(x_text_instance,
                                     cnn.predict_text_instance,
                                     num_features=6)

    exp.as_list()

    print("")
    print("output prob (Negative, Positive)")
    print('Original prediction:',
          cnn.predict_text_instance([x_text_instance])[0])

    print("")
    x_text_removed = x_text_instance
    x_text_removed = x_text_removed.replace(exp.as_list()[0][0], '<unk>')
    x_text_removed = x_text_removed.replace(exp.as_list()[1][0], '<unk>')

    print("x_text_instance: ", x_text_instance)
    print("")
    print("x_text_removed: ", x_text_removed)
    print(exp.as_list()[0][0])
    print(exp.as_list()[1][0])
    print("")

    print('Prediction removing some features:',
          cnn.predict_text_instance([x_text_removed])[0])
    print(
        'Difference:',
        cnn.predict_text_instance([x_text_instance])[0] -
        cnn.predict_text_instance([x_text_removed])[0])

    timestamp = str(int(time.time()))
    static_dir = os.path.abspath(os.path.join(os.curdir, 'static'))
    oi_lime_dir = os.path.abspath(os.path.join(static_dir, 'oi_lime'))
    oi_file_path = os.path.abspath(
        os.path.join(oi_lime_dir, 'oi_' + timestamp + '.html'))
    exp.save_to_file(oi_file_path)

    return 'oi_' + timestamp + '.html'

예제 #29

0

파일 보기

def explain_prediction(sent,pipe,filename):
#    vect=transform_inp_sent_to_vect(sent)
    
    label_encoding=pickle.load(open(glob.glob(r'{}{}_label_encoding.pkl'.format(DIRECTORY_PATH,filename))[0],'rb'))
    labels=list(label_encoding.values())
    explainer = LimeTextExplainer(class_names=labels)
    
    exp = explainer.explain_instance(sent, pipe.predict_proba,labels=labels)
    return exp.save_to_file(r'{}explanation.html'.format(DIRECTORY_PATH))

예제 #30

0

파일 보기

def limeTextExplain(data, model, class_names):
    explainer = LimeTextExplainer(class_names=class_names)
    exp = explainer.explain_instance(data, model.predict_proba, num_features=6)

    probArray = model.predict_proba([data])

    return dict(exp=exp.as_list(),
                predictProbabilities=getPredictProbabilities(
                    [probArray[0][0], probArray[0][1]], class_names))

예제 #31

0

파일 보기

def explain(clf, X_train, y, instance, name, method):
    clf.fit(X_train, y)
    explainer = LimeTextExplainer(class_names=[-1, 0, 1])
    exp = explainer.explain_instance(instance,
                                     method,
                                     top_labels=1,
                                     num_features=10)
    exp.show_in_notebook()
    exp.save_to_file(f"../{name}_explanation.html")

예제 #32

0

파일 보기

def text_explanation_with_lime(x_train, instance_ind, model, class_name=None):
    try:
        instance = x_train.iloc[instance_ind]
        explainer = LimeTextExplainer(class_names=class_name)
        exp = explainer.explain_instance(instance, model.predict_proba)
        return exp.show_in_notebook(text=instance)
    except Exception as e:
        print('Model is not supported by LimeTextExplainer')
        print(e)

예제 #33

0

파일 보기

파일: text_perf.py 프로젝트: albahnsen/lime

def interpret_data(X, y, func, class_names):
    explainer = LimeTextExplainer(class_names=class_names)
    times, scores = [], []
    for r_idx in range(10):
        start_time = time.time()
        exp = explainer.explain_instance(newsgroups_test.data[r_idx], func, num_features=6)
        times.append(time.time() - start_time)
        scores.append(exp.score)
        print('...')

    return times, scores

예제 #34

0

파일 보기

파일: _prediction_explainer.py 프로젝트: googledatalab/pydatalab

    def explain_text(self, labels, instance, column_name=None, num_features=10, num_samples=5000):
        """Explain a text field of a prediction.

        It analyze the prediction by LIME, and returns a report of which words are most impactful
        in contributing to certain labels.

        Args:
          labels: a list of labels to explain.
          instance: the prediction instance. It needs to conform to model's input. Can be a csv
              line string, or a dict.
          column_name: which text column to explain. Can be None if there is only one text column
              in the model input.
          num_features: maximum number of words (features) to analyze. Passed to
              LIME LimeTextExplainer directly.
          num_samples: size of the neighborhood to learn the linear model. Passed to
              LIME LimeTextExplainer directly.

        Returns:
          A LIME's lime.explanation.Explanation.

        Throws:
          ValueError if the given text column is not found in model input or column_name is None
              but there are multiple text columns in model input.
        """

        from lime.lime_text import LimeTextExplainer

        if len(self._text_columns) > 1 and not column_name:
            raise ValueError('There are multiple text columns in the input of the model. ' +
                             'Please specify "column_name".')
        elif column_name and column_name not in self._text_columns:
            raise ValueError('Specified column_name "%s" not found in the model input.'
                             % column_name)

        text_column_name = column_name if column_name else self._text_columns[0]
        if isinstance(instance, six.string_types):
            instance = next(csv.DictReader([instance], fieldnames=self._headers))

        predict_fn = self._make_text_predict_fn(labels, instance, text_column_name)
        explainer = LimeTextExplainer(class_names=labels)
        exp = explainer.explain_instance(
            instance[text_column_name], predict_fn, labels=range(len(labels)),
            num_features=num_features, num_samples=num_samples)
        return exp

예제 #35

0

파일 보기

파일: test_lime_text.py 프로젝트: cwangED/lime

 def test_lime_text_explainer_good_regressor(self):
     from sklearn.datasets import fetch_20newsgroups
     newsgroups_train = fetch_20newsgroups(subset='train')
     newsgroups_test = fetch_20newsgroups(subset='test')
     # making class names shorter
     class_names = [x.split('.')[-1] if 'misc' not in x else '.'.join(x.split('.')[-2:]) for x in
                    newsgroups_train.target_names]
     class_names[3] = 'pc.hardware'
     class_names[4] = 'mac.hardware'
     vectorizer = TfidfVectorizer(lowercase=False)
     train_vectors = vectorizer.fit_transform(newsgroups_train.data)
     test_vectors = vectorizer.transform(newsgroups_test.data)
     nb = MultinomialNB(alpha=.01)
     nb.fit(train_vectors, newsgroups_train.target)
     pred = nb.predict(test_vectors)
     f1_score(newsgroups_test.target, pred, average='weighted')
     c = make_pipeline(vectorizer, nb)
     explainer = LimeTextExplainer(class_names=class_names)
     idx = 1340
     exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=6, labels=[0, 17], model_regressor=LinearRegression())

예제 #36

0

파일 보기

파일: test_lime_text.py 프로젝트: marcotcr/lime

 def test_lime_text_explainer_good_regressor(self):
     categories = ['alt.atheism', 'soc.religion.christian']
     newsgroups_train = fetch_20newsgroups(subset='train',
                                           categories=categories)
     newsgroups_test = fetch_20newsgroups(subset='test',
                                          categories=categories)
     class_names = ['atheism', 'christian']
     vectorizer = TfidfVectorizer(lowercase=False)
     train_vectors = vectorizer.fit_transform(newsgroups_train.data)
     test_vectors = vectorizer.transform(newsgroups_test.data)
     nb = MultinomialNB(alpha=.01)
     nb.fit(train_vectors, newsgroups_train.target)
     pred = nb.predict(test_vectors)
     f1_score(newsgroups_test.target, pred, average='weighted')
     c = make_pipeline(vectorizer, nb)
     explainer = LimeTextExplainer(class_names=class_names)
     idx = 83
     exp = explainer.explain_instance(newsgroups_test.data[idx],
                                      c.predict_proba, num_features=6)
     self.assertIsNotNone(exp)
     self.assertEqual(6, len(exp.as_list()))

예제 #37

0

파일 보기

파일: baisc_usage.py 프로젝트: yaochitc/learning_libraries

import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.datasets import fetch_20newsgroups
import matplotlib.pyplot as plt
from lime.lime_text import LimeTextExplainer

categories = ['alt.atheism', 'soc.religion.christian']
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)
class_names = ['atheism', 'christian']

vectorizer = sklearn.feature_extraction.text.TfidfVectorizer(lowercase=False)
train_vectors = vectorizer.fit_transform(newsgroups_train.data)

rf = RandomForestClassifier(n_estimators=500)
rf.fit(train_vectors, newsgroups_train.target)

c = make_pipeline(vectorizer, rf)

explainer = LimeTextExplainer(class_names=class_names)

idx = 81
exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=10)
print('Document id: %d' % idx)
print('Probability(christian) =', c.predict_proba([newsgroups_test.data[idx]])[0,1])
print('True class: %s' % class_names[newsgroups_test.target[idx]])

fig = exp.as_pyplot_figure()

plt.show()

예제 #38

0

파일 보기

파일: Explore.py 프로젝트: pramitchoudhary/Experiments

def TextInterpret(text, predict):
	lte = LimeTextExplainer()
	explanation = lte.explain_instance(text, predict)
	explanation.show_in_notebook()
	return explanation

예제 #39

0

파일 보기

파일: LIME_Explanation.py 프로젝트: pramitchoudhary/Experiments

# In[7]:

print(c.predict_proba([newsgroups_test.data[0]]))


# In[63]:

from lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names=class_names)


# In[64]:

idx = 83
exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=10)
print("value to be predicted")
print(newsgroups_test.data[idx])
print(newsgroups_test.target[idx])
print(newsgroups_test.target_names)


# In[17]:

print('Document id: %d' % idx)
print('Probability(christian) =', c.predict_proba([newsgroups_test.data[idx]])[0,1])
print('True class: %s' % class_names[newsgroups_test.target[idx]])


# In[18]: