def ttl_to_txt(cli, args): doc = read_ttl(args.path, ttl_format=args.ttl_format) print("In doc: {} | Sentences: {}".format(args.path, len(doc))) lines = [s.text for s in doc] if args.output: chio.write_file(args.output, '\n'.join(lines)) print("Written {} lines to {}".format(len(lines), args.output)) print("Done")
def _ensure_config(config_path='~/.jamdict/config.json', mkdir=True): _path = Path(os.path.expanduser(config_path)) # auto create config dir if mkdir: _path.parent.mkdir(exist_ok=True) if not _path.exists(): default_config = read_file(CONFIG_TEMPLATE) logging.getLogger(__name__).warning( f"Jamdict configuration file could not be found. A new configuration file will be generated at {_path}" ) logging.getLogger(__name__).debug(f"Default config: {default_config}") write_file(_path, default_config)
def test_io_with_pathlib(self): print("Make sure that io functions works with pathlib.Path") # test read & write TXT data = [['name', 'foo'], ['age', '18']] json_path = Path(TEST_DATA) / 'temp.json' chio.write_file(json_path, json.dumps(data)) json_data = json.loads(chio.read_file(json_path)) self.assertEqual(json_data, data) # test read & write CSV filepath = Path(TEST_DATA) / 'temp.csv' chio.write_tsv(filepath, data) actual = chio.read_tsv(filepath) self.assertEqual(actual, data)
def read_config(): if not __app_config.config and not __app_config.locate_config(): # need to create a config config_dir = os.path.expanduser('~/.jamdict/') if not os.path.exists(config_dir): os.makedirs(config_dir) cfg_loc = os.path.join(config_dir, 'config.json') default_config = read_file(CONFIG_TEMPLATE) getLogger().warning("Jamdict configuration file could not be found. A new configuration file will be generated at {}".format(cfg_loc)) getLogger().debug("Default config: {}".format(default_config)) write_file(cfg_loc, default_config) # read config config = __app_config.config return config
def save(self, path, encoding='utf-8', xml_declaration=None, default_namespace=None, short_empty_elements=True, *args, **kwargs): """ Write ELAN Doc to an EAF file """ _content = self.to_xml_bin(encoding=encoding, xml_declaration=xml_declaration, default_namespace=default_namespace, short_empty_elements=short_empty_elements, *args, **kwargs) chio.write_file(path, _content, encoding=encoding)
def test_file_rw(self): tmpfile = os.path.join(TEST_DATA, 'test.txt') tmpgzfile = os.path.join(TEST_DATA, 'test.txt.gz') txt = 'ユニコード大丈夫だよ。' txtz = 'This is a zipped text file.' chio.write_file(content=txt, mode='wb', path=tmpfile) # write content as bytes chio.write_file(tmpgzfile, content=txtz) # ensure that tmpgzfile is actually a gzip file with gzip.open(tmpgzfile, mode='rt') as infile: self.assertEqual(infile.read(), txtz) # verify written content self.assertTrue(chio.is_file(tmpfile)) self.assertTrue(chio.is_file(tmpgzfile)) self.assertEqual(chio.read_file(tmpfile), txt) self.assertEqual(chio.read_file(tmpgzfile), txtz) self.assertEqual(chio.read_file(tmpfile, mode='r'), txt) self.assertEqual(chio.read_file(tmpgzfile, mode='r'), txtz) self.assertIsInstance(chio.read_file(tmpfile, mode='rb'), bytes) self.assertIsInstance(chio.read_file(tmpgzfile, mode='rb'), bytes)
def patch_sids(cli, args): # rp = TextReport(args.output) if args.output else TextReport() if args.gold: print("Gold MRS file: {}".format(args.gold)) sent_ids = [] if args.idfile: print("ID file: {}".format(args.idfile)) idlines = chio.read_file(args.idfile).splitlines() for line in idlines: idx, text = line.split('\t', maxsplit=1) sent_ids.append((idx, text)) print("Found {} sentences in ID file".format(len(sent_ids))) sents = Document.from_file(args.gold) if sent_ids: if len(sent_ids) != len(sents): print("Wrong sent ID files - Found ID: {} | Found MRS: {}".format(len(sent_ids), len(sents))) print("Verifying sentences' text") for ((sid, stext), mrs_sent) in zip(sent_ids, sents): if stext and stext != mrs_sent.text: print("Invalid sentence text: sentID: {} | {} <> {}".format(sid, stext, mrs_sent.text)) exit() print("Sentences are verified, proceed to patch sent idents") for ((sid, stext), mrs_sent) in zip(sent_ids, sents): mrs_sent.ident = sid if args.both: mrs_sent.ID = sid else: patch_gold_sid(sents) if args.output: print("Sentence idents are patched, writing to output XML file to: {}...".format(args.output)) chio.write_file(args.output, sents.to_xml_str()) else: print(sents.to_xml_str()) print("Done") else: print("No document to patch")
"./test_data/processed/test.m4a") # ----------------------------------------------------------------------------- # cutting audio file by timestamps # ----------------------------------------------------------------------------- media.cut("./test_data/processed/test.m4a", "./test_data/processed/test_before_10.ogg", to_ts="00:00:10") media.cut("./test_data/processed/test.m4a", "./test_data/processed/test_after_10.ogg", from_ts="00:00:15") media.cut("./test_data/processed/test.m4a", "./test_data/processed/test_10-15.ogg", from_ts="00:00:10", to_ts="00:00:15") # -------------------------------------------------------------------------------------------- # More complex use case # Read an ELAN transcription file and: # 1. Cut all utterances into separated ogg files # 2. Write annotation text into separated text files # 3. Write all utterances into a CSV file with annotation IDs and individual audio filenames # -------------------------------------------------------------------------------------------- eaf = elan.read_eaf("./test_data/fables_01_03_aesop_64kb.eaf") csv_rows = [["annID", "Text", "Filename"]] for ann in eaf["Story"]: csv_rows.append([ann.ID, ann.text, f"test_{ann.ID}.ogg"]) chio.write_file(f"./test_data/processed/test_{ann.ID}.txt", ann.text) eaf.cut(ann, f"./test_data/processed/test_{ann.ID}.ogg") chio.write_csv("./test_data/processed/test_sentences.csv", csv_rows)