def _insertfull(iterable): for w in iterable: w = Dictionary.normalize(w) for c in w: c = Dictionary.normalize(c) if valid_chars and c not in valid_chars: continue chars.add(c)
def _insert(iterable): for cs in iterable: for c in cs: c = Dictionary.normalize(c) if valid_chars and c not in valid_chars: continue chars.add(c)
def index_embedding_words(embedding_file): """Put all the words in embedding_file into a set.""" words = set() with open(embedding_file) as f: for line in f: w = Dictionary.normalize(line.rstrip().split(' ')[0]) words.add(w) return words
def index_embedding_chars(char_embedding_file): """Put all the chars in char_embedding_file into a set.""" chars = set() with open(char_embedding_file) as f: for line in f: c = Dictionary.normalize(line.rstrip().split(' ')[0]) chars.add(c) return chars
def top_question_words(args, examples, word_dict): """Count and return the most common question words in provided examples.""" word_count = Counter() for ex in examples: for w in ex['question']: w = Dictionary.normalize(w) if w in word_dict: word_count.update([w]) return word_count.most_common(args.tune_partial)
def index_embedding_words(embedding_file): """Put all the words in embedding_file into a set.""" words = set() counter = 0 try: with open(embedding_file, encoding="utf-8") as f: for line in f: counter += 1 w = Dictionary.normalize(line.rstrip().split(' ')[0]) words.add(w) except: print("An exception occurred on the " + counter + " word") return words
def _insert(iterable): for w in iterable: w = Dictionary.normalize(w) if valid_words and w not in valid_words: continue words.add(w)