Beispiel #1
0
    def get_similarity(text_1, text_2):
    #text_1 and text_2 are xml data that uses spans to seperate boundaries
    #e.g. BOSTON, MA ... <span class="highlighted" id="634541">Steven L.
    #Davis pled guilty yesterday to federal charges that he stole and disclosed trade secrets of The Gillette Company</span>.

        if text_1 == '' or text_2 == '':
            return 'Error Text Input Is Empty'
        else:

            xml_soup_1 = BeautifulSoup(text_1)
            xml_soup_2 = BeautifulSoup(text_2)
            xml_soup_1 = remove_html_tags(xml_soup_1)
            xml_soup_2 = remove_html_tags(xml_soup_2)

            segements_1 = get_segements(xml_soup_1)
            segements_2 = get_segements(xml_soup_2)

            seg_check = check_segment_length(segements_1, segements_2)

            if not seg_check:
                return 'Error Source Text Was Different'

            masses_1 = segeval.convert_positions_to_masses(segements_1)
            masses_2 = segeval.convert_positions_to_masses(segements_2)

            ss = segeval.segmentation_similarity(masses_1, masses_2)
            ss = float(ss)
            pk = segeval.pk(masses_1, masses_2)
            pk = 1 - float(pk)
            win_diff = segeval.window_diff(masses_1, masses_2)
            win_diff = 1 - float(win_diff)

            return ss, pk, win_diff
Beispiel #2
0
def evaluateSegments(reference, hypothesis):
    ref, hyp = __initialization(reference, hypothesis)
    score=np.array([__getscores(reference,hypothesis)[2],\
           float(segeval.pk(ref, hyp)),\
           float(segeval.window_diff(ref, hyp)),\
           float(segeval.boundary_similarity(ref, hyp)),\
           float(segeval.segmentation_similarity(ref, hyp))])
    # Return pk, windiff, boundary_sim, segmentation_sim and F_1 score.
    return score
Beispiel #3
0
    def evaluate(self, batch, preds, sent=True, word=True):
        """ For a given batch and its corresponding preds, get metrics 
        
        batch: Batch instance
        preds: list
        
        Usage:
            >> from loader import *
            >> from modules import *
            >>
            >> model = TextSeg(lstm_dim=200, score_dim=200, bidir=True, num_layers=2)
            >> trainer = Trainer(model=model,
                                  train_dir='../data/wiki_727/train', 
                                  val_dir='../data/wiki_50/test',
                                  batch_size=10,
                                  lr=1e-3)  
            >> evalu = Metrics()
            >>
            >> batch = sample_and_batch(trainer.train_dir, trainer.batch_size, TRAIN=True)
            >> preds = trainer.predict_batch(batch)
            >> evalu(batch, preds)
        """
        metric_dict = {}

        assert (sent or
                word), 'Missing: choose sent- and / or word-level evaluation.'

        # Word level
        if word:
            w_true, w_pred = self._word(batch, preds)

            metric_dict['w_pk'] = seg.pk(w_pred, w_true)
            metric_dict['w_wd'] = seg.window_diff(w_pred,
                                                  w_true,
                                                  lamprier_et_al_2007_fix=True)
            metric_dict['w_ss'] = seg.segmentation_similarity(w_pred, w_true)
            metric_dict['w_bs'] = seg.boundary_similarity(w_pred, w_true)

            w_confusion = seg.boundary_confusion_matrix(w_pred, w_true)

            metric_dict['w_precision'] = seg.precision(w_confusion)
            metric_dict['w_recall'] = seg.recall(w_confusion)
            metric_dict['w_f1'] = seg.fmeasure(w_confusion)

        # Sentence level
        if sent:
            s_true, s_pred = self._sent(batch, preds)

            metric_dict['s_pk'] = seg.pk(s_pred, s_true)
            metric_dict['s_wd'] = seg.window_diff(s_pred,
                                                  s_true,
                                                  lamprier_et_al_2007_fix=True)
            metric_dict['s_ss'] = seg.segmentation_similarity(s_pred, s_true)
            metric_dict['s_bs'] = seg.boundary_similarity(s_pred, s_true)

            s_confusion = seg.boundary_confusion_matrix(s_pred, s_true)

            metric_dict['s_precision'] = seg.precision(s_confusion)
            metric_dict['s_recall'] = seg.recall(s_confusion)
            metric_dict['s_f1'] = seg.fmeasure(s_confusion)

        return metric_dict
Beispiel #4
0
def get_Segmentation_similarity(reference, hypothesis):
    ref, hyp = __initialization(reference, hypothesis)
    # Evaluate algorithm using S (segmentation similarity) metric
    return segeval.segmentation_similarity(ref, hyp)