def traverse_span(span, entities_set): """ traverse span of word tokens until we find a word which isn't any entity :return: entity found in the span and number of words in the entity """ candidate = span[0] index = 1 while index < len(span) and join_strings(candidate, span[index]) in entities_set: candidate = join_strings(candidate, span[index]) index += 1 return index, candidate
def traverse_span(span, entities_set): """ traverse span of word tokens and concatenate it until the actual concatenation of words isn't part of the entities_set :return: entity found in the span and number of words in the entity """ candidate = span[0] index = 1 while index < len(span) and join_strings(candidate, span[index]) in entities_set: candidate = join_strings(candidate, span[index]) index += 1 return index, candidate
def look(self, player): # show name and description if self.name: player.tell('*** {name} ***'.format(name=self.name)) player.tell(self.description or 'You see nothing here.') # show exits if self.exits: directions = self.exits.keys() player.tell('You can go {directions}.'.format( directions=join_strings(directions, 'or'))) # show other players players = [p.name for p in self.players if p != player] if players: player.tell('{players} {are} here.'.format( players=join_strings(players, 'and'), are=len(players) > 1 and 'are' or 'is')) # show room contents things = [o.name for o in self.things] if things: player.tell('There is {names} here.'.format( names=join_strings(things, 'and')))
def get_entities_from_summary(self, entities_set): """ Traverse the summary and try to extract all the named entities present in it - problem: all the substrings present in the summary must be in the entities_set, therefore if we search for "Luc Mbah a Moute" then {"Luc", "Luc Mbah", "Luc Mbah a", "Luc Mbah a Moute"} must be a subset of the entities set :return: list with all the extracted named entities """ summary = join_strings(*self._list_of_words) extracted = [] for s in nltk_tok.sent_tokenize(summary): extracted += self.extract_entities(s, entities_set) return extracted
def get_entities_from_summary(self, entities_set): """ Traverse the summary and try to extract all the named entities present in it - problem: all the substrings present in the summary must be in the entities_set, therefore if we search for "Stephen Curry" both "Stephen" and "Stephen Curry" must be present in the entities_set ----- :return: list with all the extracted named entities """ summary = join_strings(*self._list_of_words) extracted = [] for s in nltk_tok.sent_tokenize(summary): extracted += self.extract_entities(s, entities_set) return extracted
def _transform_words(list_of_words, words_limit=None): """ Traverse through the summary and transform dataset faults E.g. we transform Barea’s to Barea ’s, all the version of name Luc Mbah A Moute to Moute, all the number words to numerals etc. """ summary = join_strings(*list_of_words) sentences = [ Summary.transform_numbers(s) for s in nltk_tok.sent_tokenize(summary) ] result = [] for s in sentences: tokens = [] # transform possessives for token in s.strip().split(): if token.endswith('’s'): tokens.append(token.replace('’s', '')) tokens.append("’s") else: tokens.append(token) ix = 0 candidate_sentence = [] # transform dataset faults while ix < len(tokens): found = False for r in range(5, 0, -1): multi_tokens = " ".join(tokens[ix:ix + r]) if multi_tokens in name_transformations: candidate_sentence += name_transformations[ multi_tokens] found = True ix += r break if not found: candidate_sentence.append(tokens[ix]) ix += 1 if (words_limit is not None) and ( len(result) + len(candidate_sentence) > words_limit): break else: result += candidate_sentence return result
def look(self, command): obj = command.direct_object # speical case "look at object" if command.preposition == Preposition.AT and command.indirect_object: obj = command.indirect_object # check if the object was named but not found if not obj and command.direct_object_str: self.tell('There is no {name} here.'.format( name=command.direct_object_str)) return # look at the object if not obj or obj == self.location: self.location.look(self) elif obj == self: self.tell(self.description or 'You see nothing special.') thing_names = [o.name for o in self.things] if thing_names: self.tell('You have {names}.'.format( names=join_strings(thing_names, 'and'))) else: self.tell(obj.description or 'You see nothing special.')
def ngram(n, string, minlen=3, maxlen=25): return tlz.pipe(string, ngram_tuples(n, minlen=minlen, maxlen=maxlen), map_c(utils.join_strings("_")))