# load tours data path = args.path if not path: path = os.path.join(os.path.dirname(__file__), "../data/spots.json") cityspots = CitySpots.load(path) # create city information data cityspotsj = [] for cs in cityspots: j = { "id": cs.city.code, "spots": [s.code for s in cs.spots] } cityspotsj.append(j) # save as document cityspots_doc = CitySpots.to_doc(cityspots) print("show city spots corpus") cityspots_doc.show_vocab(limit=20) doc_path = os.path.join(os.path.dirname(path), "./cityspots_doc.pickle") p = PickleResource(doc_path) p.save(cityspots_doc) # save as json file j = json.dumps(cityspotsj, indent=2, ensure_ascii=False) data_path = os.path.join(os.path.dirname(path), "./cityspots.json") with open(data_path, "wb") as f: f.write(j.encode("utf-8"))
p = PickleResource(path) doc = p.load() if args.freq > 0: doc.cut_frequent(args.freq) doc.cut_pos({"pos": ["動詞", "副詞"], "class1": ["接尾", "副詞可能"], "class2": ["人名", "地域", "副詞可能"]}) if args.under > 0: doc.cut_under(args.under) if args.above > 0: doc.cut_above(args.above) if args.ignore: ig_path = os.path.join(os.path.dirname(path), args.ignore) ig = FileResource(ig_path) words = ig.load() for w in words: doc.remove_vocab(w[0]) doc.show_vocab(show_pos=True) if args.save: fname = os.path.basename(path) doc_fname = os.path.splitext(fname)[0] + "_edited.pickle" doc_path = os.path.join(os.path.dirname(path), "./" + doc_fname) pe = PickleResource(doc_path) pe.save(doc)