def featurize_windows(data, start, end, window_size=1): """Uses the input sequences in @data to construct new windowed data points.""" ret = [] for sentence, labels in data: from util import window_iterator sentence_ = [] for window in window_iterator(sentence, window_size, beg=start, end=end): sentence_.append(sum(window, [])) ret.append((sentence_, labels)) return ret
def featurize_windows(data, start, end, window_size = 1): """Uses the input sequences in @data to construct new windowed data points. """ ret = [] for sentence, labels in data: from util import window_iterator sentence_ = [] for window in window_iterator(sentence, window_size, beg=start, end=end): sentence_.append(sum(window, [])) ret.append((sentence_, labels)) return ret
def featurize_windows(data, start, end): """窗口取词,每个句子前后加<s>,</s>的编号,保证窗口长度统一 """ ret = [] for sentence, labels in data: # 此时都是数字编号 sentence_ = [] for window in window_iterator(sentence, window_size, beg=start, end=end): sentence_.append(sum(window, [])) ret.append((sentence_, labels)) return ret