Example #1
0
def write_word_score_files(scores, args):
    #First split on empty lines and then split on lines
    scores = [x.split("\n") for x in scores.split("\n\n")]

    #Pop the overall stats
    scores.pop(-1)

    #Take away unnecessaries
    scores = [x[1:-3] for x in scores]

    #Fix each line in each entry
    for i, e in enumerate(scores):
        for n, l in enumerate(e):
            l = l.split()
            scores[i][n] = l[1] + " " + l[-2]

    #Match up each scored sent with a txt file.
    #This relies on positions when listing txt dir.
    #Will fail if other files than txt files in dir or txtdir has been modified since scoring.
    #Could also rely on content <- safer see TODO.
    txt = os.listdir(args.txtdir)
    n = 0
    for t in txt:
        if ".txt" in t:
            wf = io.open_writefile_safe(
                os.path.join(args.outdirpath, t[:-3] + "scored"), args.f)
            for l in scores[n]:
                wf.write(l + "\n")
            wf.close()
            n += 1
Example #2
0
def write_word_score_files(scores, args):
    # First split on empty lines and then split on lines
    scores = [x.split("\n") for x in scores.split("\n\n")]

    # Pop the overall stats
    scores.pop(-1)

    # Take away unnecessaries
    scores = [x[1:-3] for x in scores]

    # Fix each line in each entry
    for i, e in enumerate(scores):
        for n, l in enumerate(e):
            l = l.split()
            scores[i][n] = l[1] + " " + l[-2]

    # Match up each scored sent with a txt file.
    # This relies on positions when listing txt dir.
    # Will fail if other files than txt files in dir or txtdir has been modified since scoring.
    # Could also rely on content <- safer see TODO.
    txt = os.listdir(args.txtdir)
    n = 0
    for t in txt:
        if ".txt" in t:
            wf = io.open_writefile_safe(os.path.join(args.outdirpath, t[:-3] + "scored"), args.f)
            for l in scores[n]:
                wf.write(l + "\n")
            wf.close()
            n += 1
Example #3
0
def combine_txt(indirpath, outfilepath, overwrite=False):
    lines = io.load_txt_dir(indirpath)

    wf = io.open_writefile_safe(outfilepath, overwrite)

    for line in lines:
        wf.write(" ".join(line[1:]) + "\n")

    wf.close()
Example #4
0
def create_lattices_and_list(txtlist, outdirpath, dictionary, overwrite=False):
    path_list = []
    for txt in txtlist:
        path = os.path.join(outdirpath, txt[0] + ".phoneme_slf")
        # Make the slf
        slf = lattice_tools.make_phoneme_slf(txt[1:], dictionary, pronoun_variant=True, no_syll_stress=True)
        # Write it out
        wf = io.open_writefile_safe(path, overwrite)
        for l in slf:
            wf.write(l)
        wf.close()
        # Everything has gone well so we add the path
        path_list.append(path)
    # Write out the path file.
    wf = io.open_writefile_safe(os.path.join(outdirpath, "lattices.list"))
    for p in path_list:
        wf.write(p + "\n")
    wf.close()
Example #5
0
def combine_txt(indirpath, outfilepath, overwrite=False):
    lines = io.load_txt_dir(indirpath)

    wf = io.open_writefile_safe(outfilepath, overwrite)

    for line in lines:
        wf.write(" ".join(line[1:]) + "\n")

    wf.close()
Example #6
0
def create_lattices_and_list(txtlist, outdirpath, dictionary, overwrite=False):
    path_list = []
    for txt in txtlist:
        path = os.path.join(outdirpath, txt[0] + ".phoneme_slf")
        #Make the slf
        slf = lattice_tools.make_phoneme_slf(txt[1:],
                                             dictionary,
                                             pronoun_variant=True,
                                             no_syll_stress=True)
        #Write it out
        wf = io.open_writefile_safe(path, overwrite)
        for l in slf:
            wf.write(l)
        wf.close()
        #Everything has gone well so we add the path
        path_list.append(path)
    #Write out the path file.
    wf = io.open_writefile_safe(os.path.join(outdirpath, "lattices.list"))
    for p in path_list:
        wf.write(p + "\n")
    wf.close()
Example #7
0
        type=str,
        help=
        "Additional arguments to be sent to the ngram binary as options. Overwrites the defaults options: -order 4 -interpolate -gt3min 1 -wbdiscount -debug 3",
        nargs=argparse.REMAINDER,
        default='-order 4 -interpolate -gt3min 1 -wbdiscount -debug 3'.split())
    parser.add_argument(
        '-f',
        action='store_true',
        help="Force overwrite of outputpath file if it exists.")
    parser.add_argument(
        '-no_syll_stress',
        action='store_true',
        help="Replace syllable stress markers with a boundary marker sb.")
    args = parser.parse_args()

    wf = io.open_writefile_safe(os.path.join(args.outpath, "sents.txt"),
                                args.f)

    labs = io.parse_mlf(io.open_file_line_by_line(args.input_mlf), "align_mlf")

    labs = get_phoneme_strings(labs, args.no_syll_stress)

    for lab in labs:
        wf.write(" ".join(lab) + "\n")
    wf.close()

    txtpath = os.path.join(args.outpath, "sents.txt")

    lmpath = os.path.join(args.outpath, "ngram.lm")

    #This allows for people to pass their own options to the ngram binary
    options = " " + " ".join(args.lm_binary_options)
Example #8
0
  mlf = ["#!MLF!#\n"]
  
  for slf in slfs:
    if ".slf" in slf:
      bestpath = subprocess.check_output(args.latticetoolpath+" -in-lattice "+os.path.join(args.inpath, slf)+" -lm "+args.lmpath+" "+args.options, stderr=subprocess.STDOUT, shell=True)
      bestpath = bestpath.split()
      mlf.append("\"*/"+bestpath.pop(0)+".rec\"\n")
      faketime = 0
      for p in bestpath:
        if p == "<s>" or p == "</s>":
          pass
        elif p in [".", "sp"]:
          mlf.append(str(faketime)+" "+str(faketime)+" "+p+" 0.0 "+p+"\n")
        elif p in ["#1", "#2"]:
          # We first add the stress marker and then a "." to mark the boundary.
          # Without the dot load_utterance from align mlf will not detect the syllable boundary.
          mlf.append(str(faketime)+" "+str(faketime)+" "+p+" 0.0 "+p+"\n")
          mlf.append(str(faketime)+" "+str(faketime)+" . 0.0 .\n")
        else:
          mlf.append(str(faketime)+" "+str(faketime+10000)+" "+p+" 0.0 "+p+"\n")
          faketime+=10000
      mlf.append(".\n")
  
  
  wf = sire_io.open_writefile_safe(args.outmlfpath)
  
  for l in mlf:
    wf.write(l)
  
  wf.close()
Example #9
0
            mlf.append("\"*/" + bestpath.pop(0) + ".rec\"\n")
            faketime = 0
            for p in bestpath:
                if p == "<s>" or p == "</s>":
                    pass
                elif p in [".", "sp"]:
                    mlf.append(
                        str(faketime) + " " + str(faketime) + " " + p +
                        " 0.0 " + p + "\n")
                elif p in ["#1", "#2"]:
                    # We first add the stress marker and then a "." to mark the boundary.
                    # Without the dot load_utterance from align mlf will not detect the syllable boundary.
                    mlf.append(
                        str(faketime) + " " + str(faketime) + " " + p +
                        " 0.0 " + p + "\n")
                    mlf.append(
                        str(faketime) + " " + str(faketime) + " . 0.0 .\n")
                else:
                    mlf.append(
                        str(faketime) + " " + str(faketime + 10000) + " " + p +
                        " 0.0 " + p + "\n")
                    faketime += 10000
            mlf.append(".\n")

    wf = sire_io.open_writefile_safe(args.outmlfpath)

    for l in mlf:
        wf.write(l)

    wf.close()
Example #10
0
    parser.add_argument('-f',
                        action="store_true",
                        help="Force overwrite of files in output dir.")
    args = parser.parse_args()

    if args.merge_hvite_state_with_sp_align_mlf != None:
        state_mlf = sire_io.open_file_line_by_line(
            args.merge_hvite_state_with_sp_align_mlf[0])
        phone_mlf = sire_io.open_file_line_by_line(
            args.merge_hvite_state_with_sp_align_mlf[1])
        state_utts = sire_io.parse_mlf(state_mlf, "state_align_mlf")
        phone_utts = sire_io.parse_mlf(phone_mlf, "hts_mlf")
        merged_utts = merge_hvite_state_with_sp_align_mlf(
            state_utts, phone_utts)
        if args.f == True:
            wf = sire_io.open_writefile_safe(
                args.merge_hvite_state_with_sp_align_mlf[2], overwrite=True)
        else:
            wf = sire_io.open_writefile_safe(
                args.merge_hvite_state_with_sp_align_mlf[2])
        wf.write("#!MLF!#\n")
        for utt in merged_utts:
            wf.write("\"*/" + utt.pop(0) + ".rec\"\n")
            for phone in utt:
                for state in phone:
                    wf.write(" ".join(state) + "\n")
            wf.write(".\n")
        wf.close()

    if args.merge_hvite_state_with_full_context != None:
        full_context_labs = sire_io.open_labdir_line_by_line(
            args.merge_hvite_state_with_full_context[1])
Example #11
0
if __name__ == "__main__":
  parser = argparse.ArgumentParser(description='Utility file convertion related methods.')
  parser.add_argument('-merge_hvite_state_with_full_context', nargs=3, help="Merge an HVite state level alignment MLF with full-context labels in a directory and output state-level full-context labels to another.", metavar=('mlf_path', 'lab_dir', 'out_dir'))
  parser.add_argument('-merge_hvite_state_with_sp_align_mlf', nargs=3, help="Merge an HVite state level alignment MLF which does not contain SP and syllable stress information with a phone level alignment ready mlf which does and output a state-level with SP and syllable stress.", metavar=('state_mlf_path', 'phone_mlf_path', 'out_mlf_path'))
  parser.add_argument('-collapse_closure', action="store_true", help="Collapses stops split into closure and release into one when merging state_align_labs with full_context_labs.")
  parser.add_argument('-f', action="store_true", help="Force overwrite of files in output dir.")
  args = parser.parse_args()
  
  if args.merge_hvite_state_with_sp_align_mlf != None:
    state_mlf = sire_io.open_file_line_by_line(args.merge_hvite_state_with_sp_align_mlf[0])
    phone_mlf = sire_io.open_file_line_by_line(args.merge_hvite_state_with_sp_align_mlf[1])
    state_utts = sire_io.parse_mlf(state_mlf, "state_align_mlf")
    phone_utts = sire_io.parse_mlf(phone_mlf, "hts_mlf")
    merged_utts = merge_hvite_state_with_sp_align_mlf(state_utts, phone_utts)
    if args.f == True:
      wf = sire_io.open_writefile_safe(args.merge_hvite_state_with_sp_align_mlf[2], overwrite=True)
    else:
      wf = sire_io.open_writefile_safe(args.merge_hvite_state_with_sp_align_mlf[2])
    wf.write("#!MLF!#\n")
    for utt in merged_utts:
      wf.write("\"*/"+utt.pop(0)+".rec\"\n")
      for phone in utt:
        for state in phone:
          wf.write(" ".join(state)+"\n")
      wf.write(".\n")
    wf.close()
  
  if args.merge_hvite_state_with_full_context != None:
    full_context_labs = sire_io.open_labdir_line_by_line(args.merge_hvite_state_with_full_context[1])
    mlf = sire_io.open_file_line_by_line(args.merge_hvite_state_with_full_context[0])
    state_labs = sire_io.parse_mlf(mlf, "align_mlf")