Esempio n. 1
0
 def _convert_transcription(self, in_file, out_file):
     lines = open(in_file).read().splitlines()
     output = []
     for line in lines:
         fileid, word = line.split("|")
         phone = text.word2phone(word)
         content = "<s> {} </s> ({})".format(phone, fileid)
         output.append(content)
     content = "\n".join(output)
     open(out_file, "w").write(content)
 def _make_cleaned_text(self):
     in_file = "{}/train/text".format(self.corpus_folder)
     out_file = "{}/etc/text".format(self.tmp_folder)
     lines = open(in_file).read().splitlines()[:N]
     output = []
     for line in lines:
         fileid, word = line.split("|")
         phone = text.word2phone(word)
         content = "<s> {} </s>".format(phone, fileid)
         output.append(content)
     content = "\n".join(output)
     open(out_file, "w").write(content)
 def _make_dictionary(self):
     lines = open("{}/train/text".format(
         self.corpus_folder)).read().splitlines()[:N]
     phones = []
     for line in lines:
         fileid, word = line.split("|")
         p = text.word2phone(word).split()
         phones += p
     phones = sorted(set(phones))
     # create .dic files
     lines = []
     phone_units = []
     for p in phones:
         units = list(p)
         phone_units += units
         units = " ".join(units)
         line = "{:20s}{}".format(p, units)
         lines.append(line)
     open("{}/etc/tmp.dic".format(self.tmp_folder),
          "w").write("\n".join(lines))
     phone_units = sorted(set(phone_units))
     phone_units.append("SIL")
     open("{}/etc/tmp.phone".format(self.tmp_folder),
          "w").write("\n".join(phone_units))