def tokenizing(csv_import_path, csv_export_path): csv_obj = CSV(csv_import_path) csv_data = csv_obj.get_data() sentence_arr = [] for row in csv_data: for cell in row: sentence_arr.append(cell) tokenizer = MeCabTokenizer(tagger='-Ochasen') output_arr = [] stop_words = ['。', '、', '・'] for sentence in sentence_arr: tokens = tokenizer.parse_to_node(sentence) surface = [] while tokens: if tokens.surface and tokens.surface not in stop_words: surface.append(tokens.surface) tokens = tokens.next if len(surface) > 0: output_arr.append([sentence, " ".join(surface)]) csv_obj.export(csv_export_path, output_arr)