def get_wiki_curated_NDWdata(): X, y = get_wiki_curated_data() train_idx = [] dd = Dictionary() for idx, label in enumerate(y): if label == '': continue if not dd.isDW(label): train_idx.append(idx) y = [y[i] for i in train_idx] X = X[train_idx] return X, y
def get_wiki_bow_DWdata(): X, y = get_wiki_bow_data() train_idx = [] NDW_idx = [] dd = Dictionary() for idx, label in enumerate(y): if label == '': continue if dd.isDW(label): train_idx.append(idx) else: NDW_idx.append(idx) NDW_y = [y[i] for i in NDW_idx] NDW_X = X[NDW_idx] y = [y[i] for i in train_idx] X = X[train_idx] return X, y, NDW_X, NDW_y