Esempio n. 1
0
def read_stanford_dependency_parses(dirpath):
    pdct = {}
    for f in os.listdir(dirpath):
        if ".relations" in f:
            pdct[f.split(".rel")[0]] = io.open_file_line_by_line(
                os.path.join(dirpath, f))
    return pdct
Esempio n. 2
0
def read_stanford_dependency_parses(dirpath):
  pdct = {}
  for f in os.listdir(dirpath):
    if ".relations" in f:
      pdct[f.split(".rel")[0]] = io.open_file_line_by_line(os.path.join(dirpath, f))
  return pdct
Esempio n. 3
0
        nargs=argparse.REMAINDER,
        default='-order 4 -interpolate -gt3min 1 -wbdiscount -debug 3'.split())
    parser.add_argument(
        '-f',
        action='store_true',
        help="Force overwrite of outputpath file if it exists.")
    parser.add_argument(
        '-no_syll_stress',
        action='store_true',
        help="Replace syllable stress markers with a boundary marker sb.")
    args = parser.parse_args()

    wf = io.open_writefile_safe(os.path.join(args.outpath, "sents.txt"),
                                args.f)

    labs = io.parse_mlf(io.open_file_line_by_line(args.input_mlf), "align_mlf")

    labs = get_phoneme_strings(labs, args.no_syll_stress)

    for lab in labs:
        wf.write(" ".join(lab) + "\n")
    wf.close()

    txtpath = os.path.join(args.outpath, "sents.txt")

    lmpath = os.path.join(args.outpath, "ngram.lm")

    #This allows for people to pass their own options to the ngram binary
    options = " " + " ".join(args.lm_binary_options)
    subprocess.call(args.ngram_binary_path + " -text " + txtpath + " -lm " +
                    lmpath + options,
Esempio n. 4
0
        help=
        "Merge an HVite state level alignment MLF which does not contain SP and syllable stress information with a phone level alignment ready mlf which does and output a state-level with SP and syllable stress.",
        metavar=('state_mlf_path', 'phone_mlf_path', 'out_mlf_path'))
    parser.add_argument(
        '-collapse_closure',
        action="store_true",
        help=
        "Collapses stops split into closure and release into one when merging state_align_labs with full_context_labs."
    )
    parser.add_argument('-f',
                        action="store_true",
                        help="Force overwrite of files in output dir.")
    args = parser.parse_args()

    if args.merge_hvite_state_with_sp_align_mlf != None:
        state_mlf = sire_io.open_file_line_by_line(
            args.merge_hvite_state_with_sp_align_mlf[0])
        phone_mlf = sire_io.open_file_line_by_line(
            args.merge_hvite_state_with_sp_align_mlf[1])
        state_utts = sire_io.parse_mlf(state_mlf, "state_align_mlf")
        phone_utts = sire_io.parse_mlf(phone_mlf, "hts_mlf")
        merged_utts = merge_hvite_state_with_sp_align_mlf(
            state_utts, phone_utts)
        if args.f == True:
            wf = sire_io.open_writefile_safe(
                args.merge_hvite_state_with_sp_align_mlf[2], overwrite=True)
        else:
            wf = sire_io.open_writefile_safe(
                args.merge_hvite_state_with_sp_align_mlf[2])
        wf.write("#!MLF!#\n")
        for utt in merged_utts:
            wf.write("\"*/" + utt.pop(0) + ".rec\"\n")
Esempio n. 5
0
      else:
        c_merge.append(s_lab[s_lab_count])
        s_lab_count += 1
    merged.append(c_merge)
  return merged

if __name__ == "__main__":
  parser = argparse.ArgumentParser(description='Utility file convertion related methods.')
  parser.add_argument('-merge_hvite_state_with_full_context', nargs=3, help="Merge an HVite state level alignment MLF with full-context labels in a directory and output state-level full-context labels to another.", metavar=('mlf_path', 'lab_dir', 'out_dir'))
  parser.add_argument('-merge_hvite_state_with_sp_align_mlf', nargs=3, help="Merge an HVite state level alignment MLF which does not contain SP and syllable stress information with a phone level alignment ready mlf which does and output a state-level with SP and syllable stress.", metavar=('state_mlf_path', 'phone_mlf_path', 'out_mlf_path'))
  parser.add_argument('-collapse_closure', action="store_true", help="Collapses stops split into closure and release into one when merging state_align_labs with full_context_labs.")
  parser.add_argument('-f', action="store_true", help="Force overwrite of files in output dir.")
  args = parser.parse_args()
  
  if args.merge_hvite_state_with_sp_align_mlf != None:
    state_mlf = sire_io.open_file_line_by_line(args.merge_hvite_state_with_sp_align_mlf[0])
    phone_mlf = sire_io.open_file_line_by_line(args.merge_hvite_state_with_sp_align_mlf[1])
    state_utts = sire_io.parse_mlf(state_mlf, "state_align_mlf")
    phone_utts = sire_io.parse_mlf(phone_mlf, "hts_mlf")
    merged_utts = merge_hvite_state_with_sp_align_mlf(state_utts, phone_utts)
    if args.f == True:
      wf = sire_io.open_writefile_safe(args.merge_hvite_state_with_sp_align_mlf[2], overwrite=True)
    else:
      wf = sire_io.open_writefile_safe(args.merge_hvite_state_with_sp_align_mlf[2])
    wf.write("#!MLF!#\n")
    for utt in merged_utts:
      wf.write("\"*/"+utt.pop(0)+".rec\"\n")
      for phone in utt:
        for state in phone:
          wf.write(" ".join(state)+"\n")
      wf.write(".\n")