def ranker_instances(self): for f in self._files: print >> sys.stderr, f feat_map = FeatureMap(f, encoding=self._in_encoding) # feature filtering feat_map.filter_feats(self._features2filter, mode="lose", continuous=False) # WARNING: add hoc!!! edu_ante_list = {} # build attachment point correct/incorrect candidate lists for instance in feat_map._all: (ana, cand), (cl, feats) = feat_map2_class_instance( instance, formatting=self._formatting, features2cross=self._features2cross ) # print ana, cand, cl edu_ante_list[ana] = edu_ante_list.get(ana,[]) + [(cand,cl,feats)] # make sure data always get ordered the same way edu_ante_list = edu_ante_list.items() edu_ante_list.sort() # generate ranking instances for edu, cand_list in edu_ante_list: correct_cands = [(cand,cl,feats) for (cand,cl,feats) in cand_list if cl == 1] if len(correct_cands) == 0: print >> sys.stderr, "Warning: no attachment point for EDU %s! Skipping." %edu #print >> sys.stderr, edu, (cand_list) #elif len(correct_cands) > 1: # print >> sys.stderr, "Warning: multiple attachment points for EDU %s! Skipping." %edu else: yield edu, cand_list, f
def ranker_instances(self): for f in self._files: print >> sys.stderr, f feat_map = FeatureMap(f, encoding=self._in_encoding) # feature filtering feat_map.filter_feats(self._features2filter, mode="lose", continuous=False) # WARNING: add hoc!!! edu_ante_list = {} # build attachment point correct/incorrect candidate lists for instance in feat_map._all: (ana, cand), (cl, feats) = feat_map2_class_instance( instance, formatting=self._formatting, features2cross=self._features2cross) # print ana, cand, cl edu_ante_list[ana] = edu_ante_list.get( ana, []) + [(cand, cl, feats)] # make sure data always get ordered the same way edu_ante_list = edu_ante_list.items() edu_ante_list.sort() # generate ranking instances for edu, cand_list in edu_ante_list: correct_cands = [(cand, cl, feats) for (cand, cl, feats) in cand_list if cl == 1] if len(correct_cands) == 0: print >> sys.stderr, "Warning: no attachment point for EDU %s! Skipping." % edu #print >> sys.stderr, edu, (cand_list) #elif len(correct_cands) > 1: # print >> sys.stderr, "Warning: multiple attachment points for EDU %s! Skipping." %edu else: yield edu, cand_list, f
def classifier_instances(self): for f in self._files: print >> sys.stderr, f feat_map = FeatureMap(f, encoding=self._in_encoding) # feature filtering feat_map.filter_feats(self._features2filter, mode="lose", continuous=False) # WARNING: add hoc!!! for instance in feat_map._all: edu_pair, (cl, feats) = feat_map2_class_instance( instance, formatting=self._formatting, features2cross=self._features2cross ) # print edu_pair[0], edu_pair[1], cl yield (edu_pair, f), cl, feats
def classifier_instances(self): for f in self._files: print >> sys.stderr, f feat_map = FeatureMap(f, encoding=self._in_encoding) # feature filtering feat_map.filter_feats(self._features2filter, mode="lose", continuous=False) # WARNING: add hoc!!! for instance in feat_map._all: edu_pair, (cl, feats) = feat_map2_class_instance( instance, formatting=self._formatting, features2cross=self._features2cross) # print edu_pair[0], edu_pair[1], cl yield (edu_pair, f), cl, feats