def ranker_instances(self): for f in self._files: print >> sys.stderr, f feat_map = FeatureMap(f, encoding=self._in_encoding) # feature filtering feat_map.filter_feats(self._features2filter, mode="lose", continuous=False) # WARNING: add hoc!!! edu_ante_list = {} # build attachment point correct/incorrect candidate lists for instance in feat_map._all: (ana, cand), (cl, feats) = feat_map2_class_instance( instance, formatting=self._formatting, features2cross=self._features2cross) # print ana, cand, cl edu_ante_list[ana] = edu_ante_list.get( ana, []) + [(cand, cl, feats)] # make sure data always get ordered the same way edu_ante_list = edu_ante_list.items() edu_ante_list.sort() # generate ranking instances for edu, cand_list in edu_ante_list: correct_cands = [(cand, cl, feats) for (cand, cl, feats) in cand_list if cl == 1] if len(correct_cands) == 0: print >> sys.stderr, "Warning: no attachment point for EDU %s! Skipping." % edu #print >> sys.stderr, edu, (cand_list) #elif len(correct_cands) > 1: # print >> sys.stderr, "Warning: multiple attachment points for EDU %s! Skipping." %edu else: yield edu, cand_list, f
def classifier_instances(self): for f in self._files: print >> sys.stderr, f feat_map = FeatureMap(f, encoding=self._in_encoding) # feature filtering feat_map.filter_feats(self._features2filter, mode="lose", continuous=False) # WARNING: add hoc!!! for instance in feat_map._all: edu_pair, (cl, feats) = feat_map2_class_instance( instance, formatting=self._formatting, features2cross=self._features2cross) # print edu_pair[0], edu_pair[1], cl yield (edu_pair, f), cl, feats
test_dir = sys.argv[1] test_files = [ os.path.join(test_dir, f) for f in os.listdir(test_dir) if f.endswith(IN_SUFFIX) ] print >> sys.stderr, "Nber test files: %s." % len(test_files) correct1 = 0.0 # including multiple correct cases total1 = 0 correct2 = 0.0 # not including them total2 = 0 for tf in test_files: print >> sys.stderr, tf feat_map = FeatureMap(tf, encoding=DEF_ENCODING) correct_attachments = defaultdict(list) last_attachments = {} for instance in feat_map._all: edu2attach = instance.pop(ANA_NODE) attach_point = instance.pop(ANTE_NODE) cl = int(eval(instance.pop(CLASS_ATTR))) if cl == 1: correct_attachments[edu2attach].append(attach_point) is_last = eval(instance.get("D#LAST", "False")) if is_last: last_attachments[edu2attach] = attach_point # print edu2attach, attach_point, cl, is_last for edu, pts in correct_attachments.items(): try: