def int_words(words): with open('../Model/wordDictionary.pkl', 'rb') as file: one_hot_dict = pickle.load(file).one_hot_dict str_words = Helper.splitSentence(words) int_words = [] dismiss_cnt = 0 total_cnt = 0 for word in str_words: total_cnt += 1 if word not in one_hot_dict: int_words.append(0) dismiss_cnt += 1 else: int_words.append(one_hot_dict[word]) int_words = pad_sequence(int_words, Meta.max_string_len) Helper.debug('[WARNING] dismiss: %d\ttotal: %d' % (dismiss_cnt, total_cnt)) return int_words
def one_hot(self, filename, label): with open(filename, "r", encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: words = Helper.splitSentence(row['text']) features = [] for word in words: # 去除标点符号 # word = word.strip(string.punctuation) if 'http' in word: continue if self.wordDictionary.isFull(): Helper.debug("[WARNING] Missing word:" + word) continue features.append(self.wordDictionary.lookup(word)) self.X.append(features) self.Y.append(label) Helper.debug("[INFORMATION] Total One Hot: %d" % self.wordDictionary.count())