def do_iteration(uuid, testtype, data):
    start = time.time()
    print(f"Iteration@{uuid} started")

    model = eh.load_lstm_model(
        "/home/tomasmizera/school/diploma/src/raw-data/lstm-model-sigmoid")
    print(f"Iteration@{uuid} t1 {time.time() - start}")

    def _predict_proba_fn(_input):
        """
        Function accepting array of instances and returns a probability for each class
        _input - 1d array of instances
        Returns 2d array of [num of instances] x [num of classes] with probabilities
        """
        strt = time.time()
        prediction = model.predict(_input)
        outarr = np.append(prediction, 1 - prediction, axis=1)

        return outarr

    explainer = lime_text.LimeTextExplainer(
        class_names=['Positive', 'Negative'])
    print(f"Iteration@{uuid} t2 {time.time() - start}")

    compstart = time.time()

    maxn = len(data)

    if testtype == 'A':

        for i in range(maxn):
            explainer.explain_instance(data[i],
                                       _predict_proba_fn,
                                       num_features=100)

    elif testtype == 'B':

        for i in range(maxn):
            explainer = lime_text.LimeTextExplainer(
                class_names=['Positive', 'Negative'])
            explainer.explain_instance(data[i],
                                       _predict_proba_fn,
                                       num_features=100)

    else:
        raise TypeError("No such test type")

    compend = time.time() - compstart
    print(
        f'Iteration@{uuid} computation took {compend} secs, per iteration approx {compend/maxn}'
    )
예제 #2
0
    def __init__(self,
                 modelfn=None,
                 classnames=None,
                 language="english",
                 explainer=None,
                 summarizer=None,
                 fm=962,
                 topfeaturescount=100,
                 sentencescount=6,
                 logger=None):
        self.fm = fm
        self.modelfn = modelfn
        self.classnames = classnames
        self.topfeaturescount = topfeaturescount
        self.language = language
        self.sentencescount = sentencescount

        if explainer is not None:
            self.explainer = explainer
        else:
            self.explainer = lime_text.LimeTextExplainer(
                class_names=self.classnames)

        if summarizer is not None:
            self.summarizer = summarizer
        else:
            self.summarizer = TextRankSummarizer(Stemmer(self.language))
            self.summarizer.stop_words = get_stop_words(self.language)

        if logger is not None:
            self.log = logger
        else:
            self.log = logging.getLogger()
예제 #3
0
def lime_lyrics(lyrics, verbose=True):
    '''
    Funtion that computes output on billboard.html
    '''
    print(repr(lyrics))
    print('____________________' + lyrics + '____________________')
    with open('model.pkl', 'rb') as fp:
        model = pickle.load(fp)
    with open('counter.pkl', 'rb') as fp:
        counter = pickle.load(fp)
    if verbose:
        # one bug of lime not fixed yet
        # sklearn 0.20.0
        tokenizer = lambda doc: re.compile(r"(?u)\b\w\w+\b").findall(doc)
        #raise ValueError('Not implemented')
        pipe = make_pipeline(counter, model)
        class_names = ['Not on billboard', 'On billboard']
        explainer = lime_text.LimeTextExplainer(class_names=class_names,
                                                split_expression=tokenizer)

        exp = explainer.explain_instance(lyrics,
                                         pipe.predict_proba,
                                         num_features=12)
        return exp
    else:
        inst = counter.transform([lyrics])
        pred = model.predict_proba(inst)
        return pred
예제 #4
0
    def __init__(self, *argv, **kwargs):
        """
        Initialize lime text explainer object.
        """
        super(LimeTextExplainer, self).__init__(*argv, **kwargs)

        self.explainer = lime_text.LimeTextExplainer(*argv, **kwargs)
예제 #5
0
def precompute_explanations(data, cnames, modelpath, vectorizerpath, outpath, label, workerid):

    explanator = lime_text.LimeTextExplainer(class_names=cnames)
    print(f'Worker @{workerid} started operating')

    model = eh.load_pickle_object(modelpath)
    assert(model is not None)

    vectorizer = eh.load_pickle_object(vectorizerpath)
    assert(vectorizer is not None)

    def _predict_proba_fn(_input):
        """
        Function accepting array of instances and returns a probability for each class
        _input - 1d array of instances
        Returns 2d array of [num of instances] x [num of classes] with probabilities
        """
        return model.predict_proba(vectorizer.transform(_input))

    out = []
    for i in range(len(data)):
        explanation = explanator.explain_instance(data[i][0], _predict_proba_fn, num_features=100)
        out.append((label, data[i][1], explanation, data[i][0]))

    with open(os.path.join(outpath, f'{workerid}.pickle'), 'wb') as fout:
        pickle.dump(out, fout)

    print(f"Worker @{workerid} precomputed {len(data)} instances with label {label}")
    return workerid
예제 #6
0
def explainInput():
    # get text 2 from first form and set to lower case
    text = request.form['text2'].lower()

    explainer = lt.LimeTextExplainer(kernel_width=25,
                                     verbose=True,
                                     class_names=["positive", "negative"],
                                     feature_selection="lasso_path",
                                     split_expression=" ",
                                     bow=False)

    # still super hacky implementation in projectlib, yet running
    exp = explainer.explain_instance(text_instance=text,
                                     labels=[0, 1],
                                     classifier_fn=pl.predictFromText,
                                     num_features=5,
                                     num_samples=1000)

    htmlResult = exp.as_html(labels=[1],
                             predict_proba=True,
                             show_predicted_value=True)

    # add home button to end of file
    htmlResult = htmlResult.replace(
        "</body></html>",
        "<button type=\"button\" onclick=\"window.location.href=\'/home\';\">Home</button> \n </body></html>"
    )

    return htmlResult
예제 #7
0
def lime_lyrics2(lyrics):
    lyrics = lyrics.replace('\n', '')
    with open('model.pkl', 'rb') as fp:
        model = pickle.load(fp)
    with open('counter.pkl', 'rb') as fp:
        counter = pickle.load(fp)
    pipe = make_pipeline(counter, model)
    class_names = ['Not on billboard', 'On billboard']
    explainer = lime_text.LimeTextExplainer(class_names=class_names)
    exp = explainer\
    .explain_instance(lyrics,
                      pipe.predict_proba,
                     num_features = 12)
    return exp
예제 #8
0
파일: idf_lime.py 프로젝트: clover3/Chair
def explain_by_lime_idf(data: List[str], get_idf) -> List[Tuple[str, float]]:
    stemmer = CacheStemmer()

    def split(t):
        return t.split()

    explainer = lime_text.LimeTextExplainer(split_expression=split, bow=True)

    def evaluate_score(problems: List[str]):
        scores = []
        for problem in problems:
            score = solve(problem)
            scores.append([0, score])
        return np.array(scores)

    def solve(problem: str):
        tokens = split(problem)
        if "[SEP]" not in tokens:
            return 0
        e: QueryDoc = parse_problem(tokens)
        q_terms = lmap(stemmer.stem, e.query)
        doc_terms = lmap(stemmer.stem, e.doc)
        tf = Counter(doc_terms)
        q_terms_set = set(q_terms)
        score = 0
        for term, cnt in tf.items():
            if term in q_terms_set:
                idf = get_idf(term)
                score += log(1 + cnt) * idf
            # TODO add idf multiplication
        return score

    explains = []
    tick = TimeEstimator(len(data))
    for entry in data:
        assert type(entry) == str
        exp = explainer.explain_instance(entry,
                                         evaluate_score,
                                         num_features=512)
        # l = list(exp.local_exp[1])
        # l.sort(key=get_first)
        # indices, scores = zip(*l)
        l2 = exp.as_list()
        l2.sort(key=get_second, reverse=True)
        explains.append(l2)
        tick.tick()
    return explains
예제 #9
0
def precompute_explanations(*, data, cnames, modeltype, modelpath, exp_filter, outpath, workerid, partid):
    """
    data must be map of id:(string, int)
    data ~ list of tuples (instance, label)
    """

    explanator = lime_text.LimeTextExplainer(class_names=cnames)

    print(f"Worker @{workerid} started precomputing {len(data)} instances, ef {exp_filter} part {partid}")

    if modeltype == "lstm":
        with FileLock(os.path.join(modelpath, "iolock.lock")):
            model = eh.load_lstm_model(os.path.expanduser(modelpath))
    elif modeltype == "svm-lime":
        model, vectorizer = eh.load_religion_model(modelpath)
    else:
        raise ValueError("Unknown model! " + modeltype)

    def _predict_proba_fn(_input):
        """
        Function accepting array of instances and returns a probability for each class
        _input - 1d array of instances
        Returns 2d array of [num of instances] x [num of classes] with probabilities
        """
        if modeltype == "svm-lime":
            return model.predict_proba(vectorizer.transform(_input))
        elif modeltype == "lstm-imdb":
            prediction = model.predict(_input)
            return np.append(prediction, 1 - prediction, axis=1)

    assert (type(exp_filter) is int)

    out = []
    for text, label in data:
        explanation = explanator.explain_instance(text, _predict_proba_fn, num_features=exp_filter)
        out.append((text, explanation, label))

    outdir = os.path.join(outpath, f"expf:{exp_filter}")

    if not os.path.exists(outdir):
        os.makedirs(outdir, exist_ok=True)

    with open(os.path.join(outdir, f'filter:{exp_filter}-part:{partid}.pickle'), 'wb') as fout:
        pickle.dump(out, fout)

    print(f"Worker @{workerid} precomputed {len(data)} instances")
예제 #10
0
def predict(text):
    """
    Input: text
    -------------------------------
    Output: 
    - category_str: predicted category 
    - scores: probability score for the predicted category
    - score_pred: prob scores for all class in array of size [6,1]
    - viz: lime generated html for visualization
    """

    category_dict = {0:'sport',1:'business',2:'tech', \
        3:'entertainment',4:'politics',5:'food'}
    category_names = [
        'sport', 'business', 'tech', 'entertainment', 'politics', 'food'
    ]

    ## Recovering vectorizer and model
    model_path = './classifier/serving/model/model_v2.joblib'
    model = joblib.load(model_path)

    ## Predict Category and Probability Score
    category_pred = model.predict([text])
    score_pred = model.predict_proba([text])
    category_str = [category_dict.get(pred) for pred in category_pred][0]
    scores = np.max(score_pred)

    ## Create Lime Explanation HTML
    explainer = lime_text.LimeTextExplainer(class_names=category_names)
    explained = explainer.explain_instance(text,
                                           model.predict_proba,
                                           top_labels=3,
                                           num_features=10)

    viz = explained.as_html(text=False, predict_proba=True)

    return category_str, scores, score_pred, viz
예제 #11
0
    def run(
            self,
            inputs: List[JsonDict],
            model: lit_model.Model,
            dataset: lit_dataset.Dataset,
            model_outputs: Optional[List[JsonDict]] = None,
            config: Optional[JsonDict] = None,
            kernel_width: int = 25,  # TODO(lit-dev): make configurable in UI.
            mask_string:
        str = '[MASK]',  # TODO(lit-dev): make configurable in UI.
            num_samples: int = 256,  # TODO(lit-dev): make configurable in UI.
    ) -> Optional[List[JsonDict]]:
        """Run this component, given a model and input(s)."""

        # Find keys of input (text) segments to explain.
        # Search in the input spec, since it's only useful to look at ones that are
        # used by the model.
        text_keys = utils.find_spec_keys(model.input_spec(), types.TextSegment)
        if not text_keys:
            logging.warning('LIME requires text inputs.')
            return None
        logging.info('Found text fields for LIME attribution: %s',
                     str(text_keys))

        # Find the key of output probabilities field(s).
        pred_keys = utils.find_spec_keys(model.output_spec(),
                                         types.MulticlassPreds)
        if not pred_keys:
            logging.warning(
                'LIME did not find a multi-class predictions field.')
            return None

        pred_key = pred_keys[
            0]  # TODO(lit-dev): configure which prob field to use.
        pred_spec = cast(types.MulticlassPreds, model.output_spec()[pred_key])
        label_names = pred_spec.vocab

        # Create a LIME text explainer instance.
        explainer = lime_text.LimeTextExplainer(
            class_names=label_names,
            split_expression=str.split,
            kernel_width=kernel_width,
            mask_string=mask_string,  # This is the string used to mask words.
            bow=False
        )  # bow=False masks inputs, instead of deleting them entirely.

        all_results = []

        # Explain each input.
        for input_ in inputs:
            # Dict[field name -> interpretations]
            result = {}

            # Explain each text segment in the input, keeping the others constant.
            for text_key in text_keys:
                input_string = input_[text_key]
                logging.info('Explaining: %s', input_string)

                # Use the number of words as the number of features.
                num_features = len(input_string.split())

                def _predict_proba(strings: List[Text]):
                    """Given raw strings, return probabilities. Used by `explainer`."""
                    input_examples = [
                        new_example(input_, text_key, s) for s in strings
                    ]
                    model_outputs = model.predict(input_examples)
                    probs = np.array(
                        [output[pred_key] for output in model_outputs])
                    return probs  # <float32>[len(strings), num_labels]

                # Perturbs the input string, gets model predictions, fits linear model.
                explanation = explainer.explain_instance(
                    input_string,
                    _predict_proba,
                    num_features=num_features,
                    num_samples=num_samples)

                # Turn the LIME explanation into a list following original word order.
                scores = explanation_to_array(explanation)
                result[text_key] = dtypes.SalienceMap(input_string.split(),
                                                      scores)

            all_results.append(result)

        return all_results
예제 #12
0
    # Reading from index number up to index-1. i.e. value at last index is not copied (e.g. for instance =1, reading happens from 0 to n_samples-1)
    seg_buffer = audio_buffer[(instance_idx - 1) *
                              n_samples:(instance_idx - 1) * n_samples +
                              n_samples]
    # save the instance
    #librosa.output.write_wav('input_audio_instance.wav', seg_buffer, SR)

    ss_buffer = []
    # creating a list of all temporal segmentations (ndarrays)
    for i in range(0, n_ss):
        ss_buffer.append(seg_buffer[i * n_samples_ss:n_samples_ss * (i + 1)])

    # Using LIME/ Sound-LIME to generate temporal explanations
    class_names = ['music', 'singing']

    explainer = lime_text.LimeTextExplainer(class_names=class_names,
                                            verbose=True)

    exp = explainer.explain_instance(ss_buffer,
                                     clf.predict_proba,
                                     num_features=3,
                                     num_samples=1000,
                                     mean=mean,
                                     stddev=std)

    # generating explanations for 'singing voice' class
    exp_temporal_label_1 = exp.as_list(label=1)

    print()
    print('True class: %s' % class_names[int(y_testing[0][instance_idx - 1])])
    print('Predicted class:%s' % class_names[class_pred])
    print('Prediction confidence: %f' % prob)
예제 #13
0
  def test_explain_matches_original_lime(self, sentence, num_samples,
                                         num_classes, class_to_explain):
    """Tests if Citrus LIME matches the original implementation."""

    # Assign some weight to each token a-z.
    # Each token contributes positively/negatively to the prediction.
    rs = np.random.RandomState(seed=0)
    token_weights = {token: rs.normal() for token in sentence.split()}
    token_weights[lime.DEFAULT_MASK_TOKEN] = 0.

    def _predict_fn(sentences):
      """Mock prediction function."""
      rs = np.random.RandomState(seed=0)
      predictions = []
      for sentence in sentences:
        probs = rs.normal(0., 0.1, size=num_classes)
        # To check if LIME finds the right positive/negative correlations.
        for token in sentence.split():
          probs[class_to_explain] += token_weights[token]
        predictions.append(probs)
      return np.stack(predictions, axis=0)

    # Explain the prediction using Citrus LIME.
    explanation = lime.explain(
        sentence,
        _predict_fn,
        class_to_explain=class_to_explain,
        num_samples=num_samples,
        tokenizer=str.split,
        mask_token=lime.DEFAULT_MASK_TOKEN,
        kernel=functools.partial(
            lime.exponential_kernel, kernel_width=lime.DEFAULT_KERNEL_WIDTH))
    scores = explanation.feature_importance  # <float32>[seq_len]
    scores = utils.normalize_scores(scores, make_positive=False)

    # Explain the prediction using original LIME.
    original_lime_explainer = lime_text.LimeTextExplainer(
        class_names=map(str, np.arange(num_classes)),
        mask_string=lime.DEFAULT_MASK_TOKEN,
        kernel_width=lime.DEFAULT_KERNEL_WIDTH,
        split_expression=str.split,
        bow=False)
    num_features = len(sentence.split())
    original_explanation = original_lime_explainer.explain_instance(
        sentence,
        _predict_fn,
        labels=(class_to_explain,),
        num_features=num_features,
        num_samples=num_samples)

    # original_explanation.local_exp is a dict that has a key class_to_explain,
    # which gives a sequence of (index, score) pairs.
    # We convert it to an array <float32>[seq_len] with a score per position.
    original_scores = np.zeros(num_features)
    for index, score in original_explanation.local_exp[class_to_explain]:
      original_scores[index] = score
    original_scores = utils.normalize_scores(
        original_scores, make_positive=False)

    # Test that Citrus LIME and original LIME match.
    np.testing.assert_allclose(scores, original_scores, atol=0.01)
예제 #14
0
        predStorage.append(pred)

    # convert to dxk ndarray
    return (np.hstack(predStorage).reshape(-1, 2))


# this works, yields an array with probabilities for both classes
#print(predictFromText(textInputList = listTexts))
#print(predictFromText(textInputList=inputText))

# Lime Explainer
# bow controls if words are perturbed or overwritten with UNKWORDZ
# False makes sense, if location of words is important as in this classifier
explainer = lt.LimeTextExplainer(kernel_width=25,
                                 verbose=True,
                                 class_names=["positive", "negative"],
                                 feature_selection="highest_weights",
                                 split_expression=" ",
                                 bow=False)

print("yo")
exp = explainer.explain_instance(text_instance=inputText,
                                 labels=[0, 1],
                                 classifier_fn=predictFromText,
                                 num_features=8,
                                 num_samples=5000)

print(exp)

html = exp.as_html(labels=[0, 1],
                   predict_proba=True,
                   show_predicted_value=True)
예제 #15
0
파일: lime.py 프로젝트: clover3/Chair
def explain_by_lime_notag(data, forward_run):

    x0, x1, x2 = data[0]
    len_seq = len(x0)

    def split(s):
        return s.split()

    explainer = lime_text.LimeTextExplainer(split_expression=split, bow=False)
    token_map = {}
    token_idx = 3

    def forward_wrap(entry):
        nonlocal token_idx
        x0, x1, x2 = entry
        virtual_tokens = []
        for loc in range(len_seq):
            rt = x0[loc], x1[loc], x2[loc]
            if rt in token_map:
                vt = token_map[rt]
            else:
                token_map[rt] = token_idx
                vt = token_idx
                token_idx = token_idx + 1
            virtual_tokens.append(str(vt))
        return " ".join(virtual_tokens)

    print("Virtualizing data")
    v_data = list([forward_wrap(e) for e in data])
    rev_token_map = dict_reverse(token_map)

    def virtual_forward_run(vtokens_vector):
        def reform(t):
            if t == 'UNKWORDZ':
                return 2
            else:
                return int(t)

        new_inputs = []
        for vstr in vtokens_vector:
            x0 = []
            x1 = []
            x2 = []
            vtokens = [reform(t) for t in vstr.split()]
            for token_idx in vtokens:
                if token_idx == 2:
                    a = OOV_ID
                    b = x1[-1] if x1 else 0
                    c = x2[-1] if x1 else 1
                else:
                    a, b, c = rev_token_map[token_idx]
                x0.append(a)
                x1.append(b)
                x2.append(c)

            new_inputs.append((x0, x1, x2))
        return forward_run(new_inputs)

    explains = []

    print("running lime")
    tick = TimeEstimator(len(v_data))
    for entry in v_data:
        exp = explainer.explain_instance(entry,
                                         virtual_forward_run,
                                         num_features=len_seq)
        _, scores = zip(*list(exp.local_exp[0]))
        explains.append(scores)
        tick.tick()
    return explains