def candidate_index_sets(self, found_indices): candidates = [] abstractions = [] for index in found_indices: abstractions = abstractions + self.all_abstractions(index) for index in abstractions: candidates = candidates + self.index_sets.get(index, []) # print 'candidates for %s are %s' % (found_indices, candidates) return utils.remove_duplicates(candidates)
def parse(self, text, debug=0): """Parses a string. Returns the list of valid parses.""" if not isinstance(text, basestring): raise TypeError('%r is not a string.' % (text, )) results = self.parse_tokens(self.tokenize(text), debug=debug) if len(results) > 1: results = utils.remove_duplicates(results) for result in results: result.text = text return results
def parse(self, text, debug=0): """Parses a string. Returns the list of valid parses.""" if not isinstance(text, basestring): raise TypeError('%r is not a string.' % (text,)) results = self.parse_tokens(self.tokenize(text), debug=debug) if len(results) > 1: results = utils.remove_duplicates(results) for result in results: result.text = text return results
def parse_tokens(self, tokens, debug=0): self.debug = debug self.cp_parser.debug = debug indices = self.find_indices(tokens, self.match_function) if debug > 0: print 'ICP parsing tokens %s' % (tokens,) if debug > 1: print 'ICP found indices %s' % (indices,) results = self.score_index_sets(indices) results.sort(key=lambda x: x.score, reverse=True) results = [result for result in results if result.score > CUTOFF_ICP_SCORE] results = utils.remove_duplicates( results, lambda r1, r2: r1.target_concept == r2.target_concept) if debug > 0: print 'ICP results: %s' % (results,) return results
def parse_tokens(self, tokens, debug=0): self.debug = debug self.cp_parser.debug = debug indices = self.find_indices(tokens, self.match_function) if debug > 0: print 'ICP parsing tokens %s' % (tokens, ) if debug > 1: print 'ICP found indices %s' % (indices, ) results = self.score_index_sets(indices) results.sort(key=lambda x: x.score, reverse=True) results = [ result for result in results if result.score > CUTOFF_ICP_SCORE ] results = utils.remove_duplicates( results, lambda r1, r2: r1.target_concept == r2.target_concept) if debug > 0: print 'ICP results: %s' % (results, ) return results