コード例 #1
0
ファイル: bertmodel.py プロジェクト: jake-sippy/data-staining
    def __init__(self, model_path=None, cuda_device=1):
        # model_path = model_path or LSTM_MODEL_PATH
        model_path = model_path or ROBERTA_MODEL_PATH
        self.predictor = Predictor.from_path(model_path,
                                             cuda_device=cuda_device)

        _tokenizer = PretrainedTransformerTokenizer(
            model_name="roberta-base", max_length=TRANSFORMER_WORDPIECE_LIMIT)
        class_name_mapper = {"0": "Negative", "1": "Positive"}
        _model = self.predictor._model
        _label_namespace = _model._label_namespace
        class_names = [
            class_name_mapper[_model.vocab.get_index_to_token_vocabulary(
                _label_namespace).get(0)],
            class_name_mapper[_model.vocab.get_index_to_token_vocabulary(
                _label_namespace).get(1)]
        ]
        # reset the tokenizer to remove separators
        self.tokenizer = lambda s: [
            t.text.replace("Ġ", "").replace('Ċ', '').replace('ĉ', "")
            for t in _tokenizer.tokenize(s)
        ][1:-1]
        self.explainer_lime = LimeTextExplainer(
            class_names=class_names, split_expression=self.tokenizer)
        self.explainer_integrate = IntegratedGradient(self.predictor)
        self.explainer_simple = SimpleGradient(self.predictor)
コード例 #2
0
    def run(self, input_text, print_results=True):
        output = self.classify_text(input_text, True)
        outputlen = len(output)

        classes = output[0:int((outputlen / 2) - 1)]
        scores = output[int((outputlen / 2)):outputlen - 1]

        if print_results:
            print('Intents: ' + '\t'.join(map(str, classes)))
            print('Scores: ' + '\t'.join(map(str, scores)))

        # explain class
        explainer = LimeTextExplainer(class_names=classes)
        exp = explainer.explain_instance(input_text,
                                         self.classify_text,
                                         num_features=7,
                                         top_labels=3,
                                         num_samples=1000)

        # print explanation
        if print_results:
            print("")
            print('Explanation for class %s' % classes[0])
            print('\n'.join(map(str, exp.as_list(label=0))))

        return exp
コード例 #3
0
    def explain_one_example(self, idx=None, num_features=5, print_out=True):
        '''Explaines predictions for a single datapoint with LIME.

        If the index of the datapoint is not specified, explaines random point
        from the validation data. Optionally prints out explanation.

        # Arguments:
            idx: int, index of a datapoint in the validation data (default=None)
            num_features: int, number of explanatory features (default=5)
            print_out: boolean (default=True)

        # Returns:
            exp: lime.explanation.Explanation object
        '''
        if idx is None:
            idx = np.random.choice(self.for_explanation.index)

        explainer = LimeTextExplainer(class_names=self.class_names)
        exp = explainer.explain_instance(self.for_explanation[idx],
                                         self.predict_proba,
                                         num_features=num_features)

        if print_out:
            print('Tweet {}: {}'.format(idx, self.for_explanation[idx]))
            print(self.predict_proba([self.for_explanation[idx]]))
            print(exp.as_pyplot_figure())
            plt.show()
        return exp
コード例 #4
0
ファイル: views.py プロジェクト: sameerank/verbiage
def classifier(request, format=None):
    tm_classifier = Pickle.objects.get(name='clf')
    classifier = tm_classifier.pickled_model
    tm_vectorizer = Pickle.objects.get(name='tfidf')
    vectorizer = tm_vectorizer.pickled_model
    input_text = request.data.get('description', 'ERROR')
    if not input_text:
        response = {'error': 'Input is an empty string'}
        return Response(response, status=status.HTTP_404_NOT_FOUND)
    standardized_text = standardize_text(input_text)
    explainer = LimeTextExplainer(class_names=GRADE_CATEGORIES)
    c = make_pipeline(vectorizer, classifier)
    exp = explainer.explain_instance(standardized_text,
                                     c.predict_proba,
                                     num_features=6,
                                     labels=[0, 1, 2, 3])
    predict_probas = dict(zip(exp.class_names, exp.predict_proba))
    prediction = max(predict_probas.items(), key=itemgetter(1))[0]
    response = {
        'final_prediction': prediction,
        'ordered_class_names': exp.class_names,
        'predict_probas': predict_probas,
        'as_list': {
            exp.class_names[lbl]: exp.as_list(label=lbl)
            for lbl in exp.available_labels()
        },
        'standardized_text': standardized_text
    }
    return Response(response, status=status.HTTP_201_CREATED)
コード例 #5
0
ファイル: expLime.py プロジェクト: CCPrism/experiments
def finalExplain_n(codes):
    resData = []
    r = Rake()
    classNames = ['negative', 'positive']
    exp = LimeTextExplainer(class_names=classNames)
    for j, code in enumerate(codes):
        tmpResult = {}
        c = translate(code)
        com = ''
        for i in range(1, len(c)):
            if c[i] == '</s>':
                break
            com += c[i] + ' '
        tmpResult['code'] = code
        tmpResult['comment'] = com
        r.extract_keywords_from_text(com)
        comKeys = r.get_ranked_phrases()
        tmpResult['commentKeywords'] = comKeys

        tmpList = []
        for _key in comKeys:
            global key
            key = _key
            tmpExp = {
                'commentKeyword': key,
            }
            explanation = exp.explain_instance(code,
                                               predictorLime,
                                               num_features=6)
            print(explanation.as_list())
            tmpExp['lime'] = explanation.as_list
            tmpList.append(tmpExp)
        tmpResult['explanations'] = tmpList
        resData.append(tmpResult)
    return resData
コード例 #6
0
def get_lime(model, test_tokens, model_name):
    explainer = LimeTextExplainer(class_names=["genuine", "deceptive"],
                                  split_expression=u'\s+')
    W = []
    for idx, text in enumerate(test_tokens):
        tmp_d = {}
        for i in text.split():
            tmp_d[i] = 1
        exp = explainer.explain_instance(text,
                                         partial(wrapper_clf_predict,
                                                 model=model,
                                                 model_name=model_name),
                                         num_features=len(text.split()),
                                         num_samples=1000)
        if len(tmp_d) != len(exp.as_list()):
            print(idx, len(tmp_d), len(dict(exp.as_list())))
        W.append(dict(exp.as_list()))
        if (idx + 1) % 10 == 0:
            print('{} instances have been processed..'.format(idx + 1))
    features_l, scores_l = [], []
    for d in W:
        features, scores = [], []
        for key, score in d.items():
            features.append(key)
            tmp = ' '.join(features)
            scores.append(score)  # abs value should be taken subsequently
        features_l.append(tmp)
        scores_l.append(scores)
    return features_l, scores_l
コード例 #7
0
ファイル: test_lime_text.py プロジェクト: marcotcr/lime
    def test_lime_text_tabular_not_equal_random_state(self):
        categories = ['alt.atheism', 'soc.religion.christian']
        newsgroups_train = fetch_20newsgroups(subset='train',
                                              categories=categories)
        newsgroups_test = fetch_20newsgroups(subset='test',
                                             categories=categories)
        class_names = ['atheism', 'christian']
        vectorizer = TfidfVectorizer(lowercase=False)
        train_vectors = vectorizer.fit_transform(newsgroups_train.data)
        test_vectors = vectorizer.transform(newsgroups_test.data)
        nb = MultinomialNB(alpha=.01)
        nb.fit(train_vectors, newsgroups_train.target)
        pred = nb.predict(test_vectors)
        f1_score(newsgroups_test.target, pred, average='weighted')
        c = make_pipeline(vectorizer, nb)

        explainer = LimeTextExplainer(
            class_names=class_names, random_state=10)
        exp_1 = explainer.explain_instance(newsgroups_test.data[83],
                                           c.predict_proba, num_features=6)

        explainer = LimeTextExplainer(
            class_names=class_names, random_state=20)
        exp_2 = explainer.explain_instance(newsgroups_test.data[83],
                                           c.predict_proba, num_features=6)

        self.assertFalse(exp_1.as_map() == exp_2.as_map())
コード例 #8
0
ファイル: debug_parse.py プロジェクト: paper2code/bothub-nlp
 def get_result_per_word(self, text, num_samples):
     if not self.intention_names:
         return {}
     explainer = LimeTextExplainer(class_names=self.intention_names)
     labels = list(range(len(self.intention_names)))  # List
     try:
         exp = explainer.explain_instance(text,
                                          self.parse,
                                          num_features=6,
                                          labels=labels,
                                          num_samples=num_samples)
     except ValueError:
         labels = []
     result_per_word = {}
     for label in labels:
         for j in exp.as_list(label=label):
             if j[0] not in result_per_word:
                 result_per_word[j[0]] = []
             result_per_word[j[0]].append({
                 "intent":
                 self.intention_names[label],
                 "relevance":
                 j[1] * 100
             })
     for word in result_per_word:
         result_per_word[word] = sorted(result_per_word[word],
                                        key=lambda k: k.get("relevance"),
                                        reverse=True)
     return result_per_word
コード例 #9
0
ファイル: debug_parse.py プロジェクト: paper2code/bothub-nlp
    def get_result_per_intent(self, text, num_samples):
        explainer = LimeTextExplainer(class_names=self.intention_names)
        labels = list(range(len(self.intention_names)))  # List
        exp = explainer.explain_instance(text,
                                         self.parse,
                                         num_features=6,
                                         labels=labels,
                                         num_samples=num_samples)
        result_per_intent = {}
        for intent in self.intention_names:
            result_per_intent[intent] = []
        for i in labels:
            intent_sum = 0
            for j in exp.as_list(label=i):
                result_per_intent[self.intention_names[i]].append({
                    "word":
                    j[0],
                    "relevance":
                    j[1] * 100
                })
                intent_sum += j[1]
            result_per_intent[self.intention_names[i]].append({
                "sum": intent_sum,
                "relevance": -1
            })
        for intent in result_per_intent:
            result_per_intent[intent] = sorted(
                result_per_intent[intent],
                key=lambda k: k.get("relevance"),
                reverse=True,
            )

        return result_per_intent
コード例 #10
0
 def __init__(self,
              classifier,
              text,
              one_by_one=False,
              tokenise=lambda txt: txt.split(),
              class_names=[0, 1],
              mask=u"[mask]",
              threshold=0.2,
              reshape_predictions=True):
     """
     Given a classifier and a tokenisation method LimeUsd returns the toxic words and the respective offsets.
     This implementation is based on LIME.
     :param classifier: any toxicity classifier that predicts a text as toxic or not
     :param text: the textual input (sentence or document) as a string
     :param one_by_one: some classifiers may require one by one classification when scoring the "ablated" texts.
     :param tokenise: by default splits the words on empty space -- same as LIME
     :param class_names: by default "toxic" is represented by 1 and "civil" by 0
     :param mask: the pseudo token to mask the toxic word (for visualisation purposes)
     :param threshold: above this value the text is predicted toxic (default 0.2)
     :param reshape_predictions: flattens the output, some classifiers may required this to be set to False
     """
     self.class_names = class_names
     self.classifier = classifier
     self.mask = mask
     self.one_by_one = one_by_one
     self.reshape_predictions = reshape_predictions
     self.text = text
     self.initial_score = self.clf_predict([text])
     self.tokenise = tokenise
     self.explainer = LimeTextExplainer(class_names=self.class_names, split_expression=tokenise)
     self.words = self.tokenise(text)
     self.ablations, self.indices = self.create_ablations()
     self.scores_decrease = self.lime_explain(self.words)
     self.threshold = threshold
     self.black_list = self.get_black_list()
コード例 #11
0
    def explain(self, docs):
        """Generate LIME Explanations for list of docs.

        Takes as input a list of strings that make up the documents where LIME
        should be applied to. Returns Explanation class instances.

        Parameters
        ----------
        docs : list of strings
            List of input documents.

        Returns
        -------
        exps : list of classes
            For each input document, an Explanation class object on which for
            example the .to_list, to_notebook etc functions can be called on.
        """
        explainer = LimeTextExplainer()
        experiments = []

        for doc in docs:  # NOTE: this might have messed up in a generator
            experiment = explainer.explain_instance(
                doc, self.pipeline.predict_proba, top_labels=self.n_classes)
            experiments.append(experiment)

        return experiments
コード例 #12
0
def go():
    # save user input in query
    query = request.args.get('query', '')
    #query = request.form['query']

    # use model to predict classification for query
    print(
        "generating classification prediction for message {}...".format(query))
    classification_labels = model.predict([query])[0]
    classification_results = dict(zip(df.columns[4:], classification_labels))

    # set-up Lime
    classes = df.columns[4:].to_list()
    print("classes = {}".format(classes))
    limeexplainer = LimeTextExplainer(class_names=classes)
    exp = limeexplainer.explain_instance(query,
                                         model.predict_proba,
                                         num_features=10,
                                         top_labels=3)

    # This will render the go.html Please see that file.
    return render_template('go.html',
                           query=query,
                           exp=exp.as_html(),
                           model=model[-1],
                           classification_result=classification_results)
コード例 #13
0
def final_yok_classifing(sentence):
    class_names = ['욕설이 아님', '욕설']
    explainer = LimeTextExplainer(class_names=class_names)
    exp = explainer.explain_instance(sentence[0],
                                     yok_classifier_lime,
                                     num_features=100)
    return exp.as_list()
コード例 #14
0
def explainer(args, text, num_samples: int = 20):
    """Run LIME explainer on provided classifier"""

    model = WrapedSenti(args)
    predictor = model.predict

    # Create a LimeTextExplainer
    explainer = LimeTextExplainer(
        # Specify split option
        split_expression=lambda x: x.split(),
        # Our classifer uses bigrams or contextual ordering to classify text
        # Hence, order matters, and we cannot use bag of words.
        bow=False,
        class_names=["neutral", "positive", "negative"],
    )

    # Make a prediction and explain it:
    exp = explainer.explain_instance(
        text,
        classifier_fn=predictor,
        top_labels=1,
        num_features=20,
        num_samples=num_samples,
    )
    return exp
コード例 #15
0
def explain_prediction(sent, file_name):
    #    vect=transform_inp_sent_to_vect(sent)
    labels = get_categories(sent, file_name)
    explainer = LimeTextExplainer(class_names=labels)

    exp = explainer.explain_instance(sent, spacy_prediction, labels=[0, 1])
    return exp.save_to_file(r'{}explanation.html'.format(DIRECTORY_PATH))
コード例 #16
0
 def __init__(self):
     self.model = pickle.load(open("models/rf.pkl", 'rb'))
     self.class_names = [
         'negative', 'somewhat negative', 'neutral', 'somewhat positive',
         'positive'
     ]
     self.explainer = LimeTextExplainer(class_names=self.class_names)
コード例 #17
0
def classify_lime(model, dataset, train_dataset, config_dict):
    explainer = LimeTextExplainer(
        class_names=(0, 1),
        bow=
        False,  # try with True as well: False causes masking to be done, True means removing words
        mask_string=tokenizer.mask_token
        if not config_dict.get("lime_mask_string_use_pad", False) else
        tokenizer.pad_token,
        feature_selection="none",  # use all features
        split_expression=r"\s",
    )
    classify_sentence_partial = partial(
        batch_predict,
        model=model,
        dataset=train_dataset,
        batch_size=config_dict["per_device_eval_batch_size"],
        method="lime",
    )

    res_list = []
    for i in range(0, len(dataset)):
        if i % 50 == 0:
            logger.info("lime_sample_idx:" + str(i) + "/" + str(len(dataset)))
        exp = explainer.explain_instance(
            " ".join(dataset.examples[i].words),
            classify_sentence_partial,
            labels=(1, ),
            num_samples=config_dict["lime_num_samples"],
        )
        lst = exp.as_map()[1]

        lst.sort(key=(lambda x: x[0]))
        dataset.examples[i].predictions = list(map(lambda x: x[1], lst))
    return dataset
コード例 #18
0
def explainer(method: str, path_to_file: str, text: str, lowercase: bool,
              num_samples: int) -> LimeTextExplainer:
    """Run LIME explainer on provided classifier"""

    model = explainer_class(method, path_to_file)
    predictor = model.predict
    # Lower case the input text if requested (for certain classifiers)
    if lowercase:
        text = text.lower()

    # Create a LimeTextExplainer
    explainer = LimeTextExplainer(
        # Specify split option
        split_expression=lambda x: x.split(),
        # Our classifer uses trigrams or contextual ordering to classify text
        # Hence, order matters, and we cannot use bag of words.
        bow=False,
        # Specify class names for this case
        class_names=[1, 2, 3, 4, 5])

    # Make a prediction and explain it:
    exp = explainer.explain_instance(
        text,
        classifier_fn=predictor,
        top_labels=1,
        num_features=20,
        num_samples=num_samples,
    )
    return exp
コード例 #19
0
ファイル: explainers.py プロジェクト: mabahgat/phd-common
    def __init__(self,
                 class_names,
                 investigate_labels,
                 prediction_fn,
                 tokenize_fn=tokenize_for_lime,
                 num_features=20,
                 num_samples=20,
                 use_top_labels=True,
                 lower_case_bool=False):
        self.class_names = class_names

        self.__investigate_labels_lst = investigate_labels
        self.__num_features = num_features
        self.__num_samples = num_samples
        self.__use_top_labels_bool = use_top_labels

        self.__prediction_fn = prediction_fn

        if tokenize_fn:
            self.__explainer = LimeTextExplainer(class_names=class_names,
                                                 split_expression=tokenize_fn,
                                                 random_state=0)
        else:
            self.__explainer = LimeTextExplainer(class_names=class_names)
        super().__init__(tokenize_fn=tokenize_fn,
                         lower_case_bool=lower_case_bool)
コード例 #20
0
def limevisual(pData, pDesc, Idx, pClassNames, pAccountName, pVec,
               nNumFeatures, nTopLabels, tLabels, pRootDir):
    try:
        pIntent = pData['Intent'][int(Idx)]
        _, pModels = loadmodel(pRootDir, pAccountName, pIntent)
        pPipeModel = make_pipeline(pVec, pModels)
        tokenizer = lambda doc: re.compile(r"(?u)\b\w\w+\b").findall(doc)
        pExplainer = LimeTextExplainer(class_names=pClassNames,
                                       split_expression=tokenizer)
        pExplainText = pExplainer.explain_instance(
            pData[pDesc][int(Idx)],
            classifier_fn=pPipeModel.predict_proba,
            num_features=int(nNumFeatures),
            top_labels=int(nTopLabels))
        pExplainText.show_in_notebook(text=pData[pDesc][int(Idx)],
                                      labels=tLabels)
        pExplainText.save_to_file(
            'C:\\Users\\tamohant\\Desktop\\Auto_synthesis_Training_data\\AutoSynthesisLite\\demo.html',
            labels=None,
            predict_proba=True,
            show_predicted_value=True)
    except Exception as e:
        print(
            '*** ERROR[001]: Error in visualiation file of Limevisual function: ',
            sys.exc_info()[0], str(e))
        print(traceback.format_exc())
        return (-1)
    return (0)
コード例 #21
0
 def test_lime_text_explainer_bad_regressor(self):
     newsgroups_train = fetch_20newsgroups(subset='train')
     newsgroups_test = fetch_20newsgroups(subset='test')
     # making class names shorter
     class_names = [
         x.split('.')[-1] if 'misc' not in x else '.'.join(
             x.split('.')[-2:]) for x in newsgroups_train.target_names
     ]
     class_names[3] = 'pc.hardware'
     class_names[4] = 'mac.hardware'
     vectorizer = TfidfVectorizer(lowercase=False)
     train_vectors = vectorizer.fit_transform(newsgroups_train.data)
     test_vectors = vectorizer.transform(newsgroups_test.data)
     nb = MultinomialNB(alpha=.01)
     nb.fit(train_vectors, newsgroups_train.target)
     pred = nb.predict(test_vectors)
     f1_score(newsgroups_test.target, pred, average='weighted')
     c = make_pipeline(vectorizer, nb)
     explainer = LimeTextExplainer(class_names=class_names)
     idx = 1340
     with self.assertRaises(TypeError):
         exp = explainer.explain_instance(  # noqa:F841
             newsgroups_test.data[idx],
             c.predict_proba,
             num_features=6,
             labels=[0, 17],
             model_regressor=Lasso())
コード例 #22
0
ファイル: lime_eval.py プロジェクト: cmry/omesa
    def explain(self, docs):
        """Generate LIME Explanations for list of docs.

        Takes as input a list of strings that make up the documents where LIME
        should be applied to. Returns Explanation class instances.

        Parameters
        ----------
        docs : list of strings
            List of input documents.

        Returns
        -------
        exps : list of classes
            For each input document, an Explanation class object on which for
            example the .to_list, to_notebook etc functions can be called on.
        """
        explainer = LimeTextExplainer()
        experiments = []

        for doc in docs:  # NOTE: this might have messed up in a generator
            experiment = explainer.explain_instance(doc, self.pipeline.predict_proba, top_labels=self.n_classes)
            experiments.append(experiment)

        return experiments
コード例 #23
0
def prediction(txt, sentiment, logistic, num_features):
    ##LIME
    c = make_pipeline(sentiment.tfidf_vect, logistic)
    class_names = ['NEGATIVE', 'POSITIVE']
    explainer = LimeTextExplainer(class_names=class_names)
    exp = explainer.explain_instance(txt,
                                     c.predict_proba,
                                     num_features=num_features)
    output = "static/outputs/output.html"
    exp.save_to_file(output)
    exp.as_pyplot_figure(label=1)
    plt.savefig('static/outputs/lime_explanation_graph.png')

    # LOGISTIC REGRESSION
    list_of_words = re.sub("[^\w]", " ", txt).split()
    words_with_weights = defaultdict()
    for word in list_of_words:
        feats = sentiment.tfidf_vect.get_feature_names()
        coefs = logistic.coef_[0]
        if word in feats:
            ind = feats.index(word)
            words_with_weights[word] = coefs[ind]

    data = pd.DataFrame.from_dict(words_with_weights, orient='index')
    data[0].plot(kind='barh', color=(data[0] > 0).map({True: 'g', False: 'r'}))
    plt.savefig('static/outputs/log_explanation_graph.png')
コード例 #24
0
 def __init__(self, class_names, count_vectorizer):
     self.__class_name = class_names
     # Text Explainer for explaining the selected examples.
     # Reference: https://arxiv.org/abs/1602.04938
     # The Explanations help us to check the reliability and validity of the trained machine learning model.
     # The Explanations confirm that the model chooses the right label/class for the right reason (e.g. meaningful words/features).
     self.__explainer = LimeTextExplainer(class_names=class_names)
     self.__count_vectorizer = count_vectorizer
コード例 #25
0
def lime_explanation(classifier, data, features=MAX_FEATURES):
    explainer = LimeTextExplainer(class_names=CLASS_NAMES)
    explanation = explainer.explain_instance(
        text_instance=data,
        classifier_fn=classifier.predict_proba,
        num_features=features,
    )
    return explanation
コード例 #26
0
ファイル: lime_text.py プロジェクト: sofq/xdeep
    def set_parameters(self, **kwargs):
        """Parameter setter for lime_text.

        # Arguments
            **kwargs: Parameters setter. For more detail, please check https://lime-ml.readthedocs.io/en/latest/index.html.
        """
        class_names = kwargs.pop("class_names", self.class_names)
        self.explainer = LimeTextExplainer(class_names=class_names, **kwargs)
コード例 #27
0
def limer(example):
    # show in lime graph
    # TODO: ext -> html로 return
    # note가 아닌 html API 찾기
    explainer = LimeTextExplainer()
    exp = explainer.explain_instance(spacing_example(example), lambda s: do_inference(s, True).detach().numpy(),
                                     top_labels=1)
    exp.show_in_notebook()
コード例 #28
0
def explain_instance(headline, body):
    text = combine_sentence(headline, body)
    explainer = LimeTextExplainer(class_names=CLASS_NAMES)
    return explainer.explain_instance(text,
                                      classifier_fn,
                                      labels=[0, 1, 2, 3],
                                      top_labels=4,
                                      num_samples=4)
コード例 #29
0
 def __init__(self, class_names):
     self.class_names = class_names
     self.explainer = LimeTextExplainer(class_names=class_names)
     self.num_features = 20
     self.num_samples = 20
     self.use_top_labels = True
     self.top_labels = 2
     self.investigate_labels = list(range(len(class_names)))
コード例 #30
0
    def explain(self, text, nwords, return_weights=False):
        '''
        Use `LimeTextExplainer` to obtain the top `nwords` most important/polar words in the `text` as 
        an explanation.


        Parameters
        --------------
        text: str
            The text to explain.

        nwords: int
            The number of most important words to return (i.e. explanation size).

        return_weights: bool
            Set to True to return the weights assigned by LIME also.

        Returns
        ---------------
        word_ranking : list
            Indexes of the `nwords` top-ranked words in the text.
        
        ranked_words: list
            List of `nwords` top-ranked words in the text.

        weights: dict, optional
            The dictionary of weights (wordposition -> weight) assigned by LIME to the words
            in the text.

        explanation: optional
            The explanation object returned by `LimeTextExplainer`.
        '''
        text = preprocess_text(text)
        text_words = get_tokens(text)

        class_names = ['negative', 'positive']
        # bow is set to False because word order is important
        explainer = LimeTextExplainer(class_names=class_names,
                                      feature_selection='auto',
                                      bow=False,
                                      split_expression=' ',
                                      verbose=False)

        explanation = explainer.explain_instance(
            text_instance=text,
            labels=[0, 1],
            classifier_fn=self.predict_texts,
            num_features=nwords,
            num_samples=self.nsamples)
        # sort weights by decreasing absolute value
        weights = OrderedDict(
            sorted(explanation.as_map()[1],
                   key=lambda weight: -abs(weight[1])))
        word_ranking = np.array(list(weights.keys()))
        ranked_words = [text_words[i] for i in word_ranking]
        if return_weights:
            return word_ranking, ranked_words, weights, explanation
        return word_ranking, ranked_words
コード例 #31
0
def model_load_and_explain(x_text_input):

    from lime import lime_text

    print(max_document_length)

    if x_text_input == 'default':
        x_text_instance = '" extreme ops " exceeds expectations . good fun , good action , good acting , good dialogue , good pace , good cinematography .'
    else:
        x_text_instance = x_text_input

    # print(x_text_instance)
    output = cnn.predict_text_instance([x_text_instance])  #batch -> instnace

    from lime.lime_text import LimeTextExplainer
    class_names = ['Negative', 'Positive']
    explainer = LimeTextExplainer(class_names=class_names)

    # print(x_text_instance)
    # print(type(x_text_instance))
    exp = explainer.explain_instance(x_text_instance,
                                     cnn.predict_text_instance,
                                     num_features=6)

    exp.as_list()

    print("")
    print("output prob (Negative, Positive)")
    print('Original prediction:',
          cnn.predict_text_instance([x_text_instance])[0])

    print("")
    x_text_removed = x_text_instance
    x_text_removed = x_text_removed.replace(exp.as_list()[0][0], '<unk>')
    x_text_removed = x_text_removed.replace(exp.as_list()[1][0], '<unk>')

    print("x_text_instance: ", x_text_instance)
    print("")
    print("x_text_removed: ", x_text_removed)
    print(exp.as_list()[0][0])
    print(exp.as_list()[1][0])
    print("")

    print('Prediction removing some features:',
          cnn.predict_text_instance([x_text_removed])[0])
    print(
        'Difference:',
        cnn.predict_text_instance([x_text_instance])[0] -
        cnn.predict_text_instance([x_text_removed])[0])

    timestamp = str(int(time.time()))
    static_dir = os.path.abspath(os.path.join(os.curdir, 'static'))
    oi_lime_dir = os.path.abspath(os.path.join(static_dir, 'oi_lime'))
    oi_file_path = os.path.abspath(
        os.path.join(oi_lime_dir, 'oi_' + timestamp + '.html'))
    exp.save_to_file(oi_file_path)

    return 'oi_' + timestamp + '.html'
コード例 #32
0
def explain(clf, X_train, y, instance, name, method):
    clf.fit(X_train, y)
    explainer = LimeTextExplainer(class_names=[-1, 0, 1])
    exp = explainer.explain_instance(instance,
                                     method,
                                     top_labels=1,
                                     num_features=10)
    exp.show_in_notebook()
    exp.save_to_file(f"../{name}_explanation.html")
コード例 #33
0
ファイル: text_perf.py プロジェクト: albahnsen/lime
def interpret_data(X, y, func, class_names):
    explainer = LimeTextExplainer(class_names=class_names)
    times, scores = [], []
    for r_idx in range(10):
        start_time = time.time()
        exp = explainer.explain_instance(newsgroups_test.data[r_idx], func, num_features=6)
        times.append(time.time() - start_time)
        scores.append(exp.score)
        print('...')

    return times, scores
コード例 #34
0
    def explain_text(self, labels, instance, column_name=None, num_features=10, num_samples=5000):
        """Explain a text field of a prediction.

        It analyze the prediction by LIME, and returns a report of which words are most impactful
        in contributing to certain labels.

        Args:
          labels: a list of labels to explain.
          instance: the prediction instance. It needs to conform to model's input. Can be a csv
              line string, or a dict.
          column_name: which text column to explain. Can be None if there is only one text column
              in the model input.
          num_features: maximum number of words (features) to analyze. Passed to
              LIME LimeTextExplainer directly.
          num_samples: size of the neighborhood to learn the linear model. Passed to
              LIME LimeTextExplainer directly.

        Returns:
          A LIME's lime.explanation.Explanation.

        Throws:
          ValueError if the given text column is not found in model input or column_name is None
              but there are multiple text columns in model input.
        """

        from lime.lime_text import LimeTextExplainer

        if len(self._text_columns) > 1 and not column_name:
            raise ValueError('There are multiple text columns in the input of the model. ' +
                             'Please specify "column_name".')
        elif column_name and column_name not in self._text_columns:
            raise ValueError('Specified column_name "%s" not found in the model input.'
                             % column_name)

        text_column_name = column_name if column_name else self._text_columns[0]
        if isinstance(instance, six.string_types):
            instance = next(csv.DictReader([instance], fieldnames=self._headers))

        predict_fn = self._make_text_predict_fn(labels, instance, text_column_name)
        explainer = LimeTextExplainer(class_names=labels)
        exp = explainer.explain_instance(
            instance[text_column_name], predict_fn, labels=range(len(labels)),
            num_features=num_features, num_samples=num_samples)
        return exp
コード例 #35
0
ファイル: test_lime_text.py プロジェクト: cwangED/lime
 def test_lime_text_explainer_good_regressor(self):
     from sklearn.datasets import fetch_20newsgroups
     newsgroups_train = fetch_20newsgroups(subset='train')
     newsgroups_test = fetch_20newsgroups(subset='test')
     # making class names shorter
     class_names = [x.split('.')[-1] if 'misc' not in x else '.'.join(x.split('.')[-2:]) for x in
                    newsgroups_train.target_names]
     class_names[3] = 'pc.hardware'
     class_names[4] = 'mac.hardware'
     vectorizer = TfidfVectorizer(lowercase=False)
     train_vectors = vectorizer.fit_transform(newsgroups_train.data)
     test_vectors = vectorizer.transform(newsgroups_test.data)
     nb = MultinomialNB(alpha=.01)
     nb.fit(train_vectors, newsgroups_train.target)
     pred = nb.predict(test_vectors)
     f1_score(newsgroups_test.target, pred, average='weighted')
     c = make_pipeline(vectorizer, nb)
     explainer = LimeTextExplainer(class_names=class_names)
     idx = 1340
     exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=6, labels=[0, 17], model_regressor=LinearRegression())
コード例 #36
0
ファイル: test_lime_text.py プロジェクト: marcotcr/lime
 def test_lime_text_explainer_good_regressor(self):
     categories = ['alt.atheism', 'soc.religion.christian']
     newsgroups_train = fetch_20newsgroups(subset='train',
                                           categories=categories)
     newsgroups_test = fetch_20newsgroups(subset='test',
                                          categories=categories)
     class_names = ['atheism', 'christian']
     vectorizer = TfidfVectorizer(lowercase=False)
     train_vectors = vectorizer.fit_transform(newsgroups_train.data)
     test_vectors = vectorizer.transform(newsgroups_test.data)
     nb = MultinomialNB(alpha=.01)
     nb.fit(train_vectors, newsgroups_train.target)
     pred = nb.predict(test_vectors)
     f1_score(newsgroups_test.target, pred, average='weighted')
     c = make_pipeline(vectorizer, nb)
     explainer = LimeTextExplainer(class_names=class_names)
     idx = 83
     exp = explainer.explain_instance(newsgroups_test.data[idx],
                                      c.predict_proba, num_features=6)
     self.assertIsNotNone(exp)
     self.assertEqual(6, len(exp.as_list()))
コード例 #37
0
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.datasets import fetch_20newsgroups
import matplotlib.pyplot as plt
from lime.lime_text import LimeTextExplainer

categories = ['alt.atheism', 'soc.religion.christian']
newsgroups_train = fetch_20newsgroups(subset='train', categories=categories)
newsgroups_test = fetch_20newsgroups(subset='test', categories=categories)
class_names = ['atheism', 'christian']

vectorizer = sklearn.feature_extraction.text.TfidfVectorizer(lowercase=False)
train_vectors = vectorizer.fit_transform(newsgroups_train.data)

rf = RandomForestClassifier(n_estimators=500)
rf.fit(train_vectors, newsgroups_train.target)

c = make_pipeline(vectorizer, rf)

explainer = LimeTextExplainer(class_names=class_names)

idx = 81
exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=10)
print('Document id: %d' % idx)
print('Probability(christian) =', c.predict_proba([newsgroups_test.data[idx]])[0,1])
print('True class: %s' % class_names[newsgroups_test.target[idx]])

fig = exp.as_pyplot_figure()

plt.show()
コード例 #38
0
def TextInterpret(text, predict):
	lte = LimeTextExplainer()
	explanation = lte.explain_instance(text, predict)
	explanation.show_in_notebook()
	return explanation
コード例 #39
0
# In[6]:

from lime import lime_text
from sklearn.pipeline import make_pipeline
c = make_pipeline(vectorizer, rf)


# In[7]:

print(c.predict_proba([newsgroups_test.data[0]]))


# In[63]:

from lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names=class_names)


# In[64]:

idx = 83
exp = explainer.explain_instance(newsgroups_test.data[idx], c.predict_proba, num_features=10)
print("value to be predicted")
print(newsgroups_test.data[idx])
print(newsgroups_test.target[idx])
print(newsgroups_test.target_names)


# In[17]:

print('Document id: %d' % idx)