Esempio n. 1
0
   labs = io.load_txt_dir(args.txtdir, args.comma_is_pause)
   if args.dict == None:
     raise SiReError("No path to dictionary. Please use -dict option.")
   args.dictionary = dictionary.Dictionary(args.dict[1], args.dict[0])
   #The phoneme set used must match the dictionary.
   args.phoneme_features = args.dictionary.phoneme_feats
 elif args.intype == "hts_lab":
   labs = io.open_labdir_line_by_line(args.inpath)
   args.intype = "hts_mlf"
 elif args.intype == "sire_lab":
   labs = io.open_labdir_line_by_line(args.inpath)
 else:
   if not os.path.exists(args.inpath):
     raise SiReError("Input path to mlf does no exist!")
   mlf = open(args.inpath, "r").readlines()
   labs = io.parse_mlf(mlf, args.intype)
 
 #Used if we make questions fitted to a dataset
 #We use qfile as the path to the file later.
 if args.questions == True:
   parser.add_argument('-qfile', type=str, help="A variable used to store the question file.", default=None)
   args.qfile = open(args.qpath, "w")
   parser.add_argument('-qfileutt', type=str, help="A variable used to store the GV question file.", default=None)
   args.qfileutt = open(args.qpath+"_utt", "w")
 
 for lab in labs:
   print "Making full context label for {0}".format(lab[0])
   #Make an utt
   utt = utterance.Utterance(lab, args)
   #This writes out the label and also the questions
   write_context_utt(utt, args)
Esempio n. 2
0
        nargs=argparse.REMAINDER,
        default='-order 4 -interpolate -gt3min 1 -wbdiscount -debug 3'.split())
    parser.add_argument(
        '-f',
        action='store_true',
        help="Force overwrite of outputpath file if it exists.")
    parser.add_argument(
        '-no_syll_stress',
        action='store_true',
        help="Replace syllable stress markers with a boundary marker sb.")
    args = parser.parse_args()

    wf = io.open_writefile_safe(os.path.join(args.outpath, "sents.txt"),
                                args.f)

    labs = io.parse_mlf(io.open_file_line_by_line(args.input_mlf), "align_mlf")

    labs = get_phoneme_strings(labs, args.no_syll_stress)

    for lab in labs:
        wf.write(" ".join(lab) + "\n")
    wf.close()

    txtpath = os.path.join(args.outpath, "sents.txt")

    lmpath = os.path.join(args.outpath, "ngram.lm")

    #This allows for people to pass their own options to the ngram binary
    options = " " + " ".join(args.lm_binary_options)
    subprocess.call(args.ngram_binary_path + " -text " + txtpath + " -lm " +
                    lmpath + options,
Esempio n. 3
0
        '-collapse_closure',
        action="store_true",
        help=
        "Collapses stops split into closure and release into one when merging state_align_labs with full_context_labs."
    )
    parser.add_argument('-f',
                        action="store_true",
                        help="Force overwrite of files in output dir.")
    args = parser.parse_args()

    if args.merge_hvite_state_with_sp_align_mlf != None:
        state_mlf = sire_io.open_file_line_by_line(
            args.merge_hvite_state_with_sp_align_mlf[0])
        phone_mlf = sire_io.open_file_line_by_line(
            args.merge_hvite_state_with_sp_align_mlf[1])
        state_utts = sire_io.parse_mlf(state_mlf, "state_align_mlf")
        phone_utts = sire_io.parse_mlf(phone_mlf, "hts_mlf")
        merged_utts = merge_hvite_state_with_sp_align_mlf(
            state_utts, phone_utts)
        if args.f == True:
            wf = sire_io.open_writefile_safe(
                args.merge_hvite_state_with_sp_align_mlf[2], overwrite=True)
        else:
            wf = sire_io.open_writefile_safe(
                args.merge_hvite_state_with_sp_align_mlf[2])
        wf.write("#!MLF!#\n")
        for utt in merged_utts:
            wf.write("\"*/" + utt.pop(0) + ".rec\"\n")
            for phone in utt:
                for state in phone:
                    wf.write(" ".join(state) + "\n")
Esempio n. 4
0
            raise SiReError("No path to dictionary. Please use -dict option.")
        args.dictionary = dictionary.Dictionary(args.dict[1], args.dict[0])
        #The phoneme set used must match the dictionary.
        args.phoneme_features = args.dictionary.phoneme_feats
    elif args.intype == "hts_lab":
        labs = io.open_labdir_line_by_line(args.inpath)
        # print "This is a lab", len(labs[0])
        # labs is a list of lists. Each list within the list of one of the labs
        args.intype = "hts_mlf"
    elif args.intype == "sire_lab":
        labs = io.open_labdir_line_by_line(args.inpath)
    else:
        if not os.path.exists(args.inpath):
            raise SiReError("Input path to mlf does no exist!")
        mlf = open(args.inpath, "r").readlines()
        labs = io.parse_mlf(mlf, args.intype)

    #Used if we make questions fitted to a dataset
    #We use qfile as the path to the file later.
    if args.questions == True:
        parser.add_argument('-qfile',
                            type=str,
                            help="A variable used to store the question file.",
                            default=None)
        args.qfile = open(args.qpath, "w")
        parser.add_argument(
            '-qfileutt',
            type=str,
            help="A variable used to store the GV question file.",
            default=None)
        args.qfileutt = open(args.qpath + "_utt", "w")
Esempio n. 5
0
        s_lab_count += 1
    merged.append(c_merge)
  return merged

if __name__ == "__main__":
  parser = argparse.ArgumentParser(description='Utility file convertion related methods.')
  parser.add_argument('-merge_hvite_state_with_full_context', nargs=3, help="Merge an HVite state level alignment MLF with full-context labels in a directory and output state-level full-context labels to another.", metavar=('mlf_path', 'lab_dir', 'out_dir'))
  parser.add_argument('-merge_hvite_state_with_sp_align_mlf', nargs=3, help="Merge an HVite state level alignment MLF which does not contain SP and syllable stress information with a phone level alignment ready mlf which does and output a state-level with SP and syllable stress.", metavar=('state_mlf_path', 'phone_mlf_path', 'out_mlf_path'))
  parser.add_argument('-collapse_closure', action="store_true", help="Collapses stops split into closure and release into one when merging state_align_labs with full_context_labs.")
  parser.add_argument('-f', action="store_true", help="Force overwrite of files in output dir.")
  args = parser.parse_args()
  
  if args.merge_hvite_state_with_sp_align_mlf != None:
    state_mlf = sire_io.open_file_line_by_line(args.merge_hvite_state_with_sp_align_mlf[0])
    phone_mlf = sire_io.open_file_line_by_line(args.merge_hvite_state_with_sp_align_mlf[1])
    state_utts = sire_io.parse_mlf(state_mlf, "state_align_mlf")
    phone_utts = sire_io.parse_mlf(phone_mlf, "hts_mlf")
    merged_utts = merge_hvite_state_with_sp_align_mlf(state_utts, phone_utts)
    if args.f == True:
      wf = sire_io.open_writefile_safe(args.merge_hvite_state_with_sp_align_mlf[2], overwrite=True)
    else:
      wf = sire_io.open_writefile_safe(args.merge_hvite_state_with_sp_align_mlf[2])
    wf.write("#!MLF!#\n")
    for utt in merged_utts:
      wf.write("\"*/"+utt.pop(0)+".rec\"\n")
      for phone in utt:
        for state in phone:
          wf.write(" ".join(state)+"\n")
      wf.write(".\n")
    wf.close()