def __init__(self): self.syntactic_parses = {} for debate in DEBATES: parsed = open("../../data/parses/" + CONFIG[debate.name] + "_parsed.txt") sentences = read_debates(debate) for sentence in sentences: parse = [ float(x) for x in parsed.readline().strip().split()[1:] ] self.syntactic_parses[sentence.debate.name + sentence.id] = parse
def annotate(self, debate): """ :param debate: debate (Debate enum) to get scores of sentences for. """ debate_sentences = read_debates(debate) cb_output_name = join(CONFIG['tr_cb_anns'], CONFIG[debate.name] + self.FILE_EXT) cb_output = open(cb_output_name, 'w') cb_output.write("ID" + self.SEP + "Speaker" + self.SEP + "CB" + self.SEP + "Text" + self.NL) for sentence in debate_sentences: cb_score = self.cb_scraper.get_score(sentence.text) new_line = sentence.id+self.SEP + \ sentence.speaker+self.SEP + \ str(cb_score)+self.SEP + \ sentence.text+self.NL print(new_line) cb_output.write(new_line) cb_output.close()
av_p_th.append(average_precision_score(y_true=y_score, y_score=y_test)) av_p.append(mean(av_p_th)) precision.append(mean(precision_th)) recall.append(mean(recall_th)) return av_p, precision, recall, thresholds if __name__ == '__main__': serialize = False if serialize: all_debates = [] trainable_feats = counting_feat.BagOfTfIDF.FEATS + knn_similarity.TrainSearch.FEATS for debate in DEBATES: all_debates += read_debates(debate) all_feats = get_experimential_pipeline(all_debates, to_matrix=False).fit_transform(all_debates) for feat_name in all_feats[0].features.keys(): if feat_name in trainable_feats: continue feat_dict = {} for _x in all_feats: feat_dict[str(_x.id) + _x.debate.name] = _x.features[feat_name] if os.path.isfile(CONFIG['features_dump_dir'] + feat_name): old_dict = json.loads(open(CONFIG['features_dump_dir'] + feat_name).read()) else: old_dict = {} old_dict.update(feat_dict) with open(CONFIG['features_dump_dir'] + feat_name, "w") as out: out.write(json.dumps(old_dict)) else: