Exemple #1
0
    def __init__(self):
        self.syntactic_parses = {}

        for debate in DEBATES:
            parsed = open("../../data/parses/" + CONFIG[debate.name] +
                          "_parsed.txt")
            sentences = read_debates(debate)
            for sentence in sentences:
                parse = [
                    float(x) for x in parsed.readline().strip().split()[1:]
                ]

                self.syntactic_parses[sentence.debate.name +
                                      sentence.id] = parse
Exemple #2
0
    def annotate(self, debate):
        """
        :param debate: debate (Debate enum) to get scores of sentences for.
        """
        debate_sentences = read_debates(debate)

        cb_output_name = join(CONFIG['tr_cb_anns'],
                              CONFIG[debate.name] + self.FILE_EXT)

        cb_output = open(cb_output_name, 'w')
        cb_output.write("ID" + self.SEP + "Speaker" + self.SEP + "CB" +
                        self.SEP + "Text" + self.NL)
        for sentence in debate_sentences:
            cb_score = self.cb_scraper.get_score(sentence.text)

            new_line = sentence.id+self.SEP + \
                sentence.speaker+self.SEP + \
                str(cb_score)+self.SEP + \
                sentence.text+self.NL
            print(new_line)

            cb_output.write(new_line)

        cb_output.close()
            av_p_th.append(average_precision_score(y_true=y_score, y_score=y_test))

        av_p.append(mean(av_p_th))
        precision.append(mean(precision_th))
        recall.append(mean(recall_th))

    return av_p, precision, recall, thresholds

if __name__ == '__main__':
        serialize = False
        if serialize:
            all_debates = []
            trainable_feats = counting_feat.BagOfTfIDF.FEATS + knn_similarity.TrainSearch.FEATS

            for debate in DEBATES:
                all_debates += read_debates(debate)
            all_feats = get_experimential_pipeline(all_debates, to_matrix=False).fit_transform(all_debates)
            for feat_name in all_feats[0].features.keys():
                if feat_name in trainable_feats:
                    continue
                feat_dict = {}
                for _x in all_feats:
                    feat_dict[str(_x.id) + _x.debate.name] = _x.features[feat_name]
                if os.path.isfile(CONFIG['features_dump_dir'] + feat_name):
                    old_dict = json.loads(open(CONFIG['features_dump_dir'] + feat_name).read())
                else:
                    old_dict = {}
                old_dict.update(feat_dict)
                with open(CONFIG['features_dump_dir'] + feat_name, "w") as out:
                    out.write(json.dumps(old_dict))
        else: