def preprocess_data(input_list): insts = [] for line in input_list: line = TextUtils.remove_blank(line) token_seq = TextUtils.tokenize(line) insts.append(Instance(token_seq, tag=-1)) return insts