def extract(query, choices, processor=None, scorer=None, limit=5): # choices = a list of objects we are attempting to extract values from # query = an object representing the thing we want to find # scorer f(OBJ, QUERY) --> INT. We will return the objects with the highest score # by default, we use score.WRatio() and both OBJ and QUERY should be strings # processor f(OBJ_A) --> OBJ_B, where the output is an input to scorer # for example, "processor = lambda x: x[0]" would return the first element in a collection x (of, say, strings) # this would then be used in the scoring collection if choices is None or len(choices) == 0: return [] # default, turn whatever the choice is into a string if processor is None: processor = lambda x: utils.asciidammit(x) # default: wratio if scorer is None: scorer = WRatio sl = list() for choice in choices: processed = processor(choice) score = scorer(query, processed) tuple = (choice, score) sl.append(tuple) sl.sort(key=lambda i: -1 * i[1]) return sl[:limit]
def extract(query, choices, processor=None, scorer=None, limit=5): # choices = a list of objects we are attempting to extract values from # query = an object representing the thing we want to find # scorer f(OBJ, QUERY) --> INT. We will return the objects with the highest score # by default, we use score.WRatio() and both OBJ and QUERY should be strings # processor f(OBJ_A) --> OBJ_B, where the output is an input to scorer # for example, "processor = lambda x: x[0]" would return the first element in a collection x (of, say, strings) # this would then be used in the scoring collection if choices is None or len(choices) == 0: return [] # default, turn whatever the choice is into a string if processor is None: processor = lambda x: utils.asciidammit(x) # default: wratio if scorer is None: scorer = WRatio sl = list() for choice in choices: processed = processor(choice) score = scorer(query, processed) tuple = (choice, score) sl.append(tuple) sl.sort(key=lambda i: -1*i[1]) return sl[:limit]
def extractFirst(query, choices, score_cutoff, processor=None, scorer=None): #extracts first match over score_cutoff from choices if choices is None or len(choices) == 0: return [] # default, turn whatever the choice is into a string if processor is None: processor = lambda x: utils.asciidammit(x) # default: wratio if scorer is None: scorer = WRatio for choice in choices: processed = processor(choice) score = scorer(query, processed) tuple_ = (choice, score) if score >= score_cutoff: return tuple_ return None
def test_asciionly(self): for s in self.mixed_strings: # ascii only only runs on strings s = utils.asciidammit(s) utils.asciionly(s)
def test_asciidammit(self): for s in self.mixed_strings: utils.asciidammit(s)
def test_asciidammit(self): tester = "Here's a string to test! Wooooooop\r\n\t" tester = utils.asciidammit(tester) self.assertEquals(tester, unicode(tester))