예제 #1
0
def main(options, args):
    if options.phoneme_to_phoneme:
        loadSample = loadP2PSample
    else:
        loadSample = loadG2PSample

    if options.fakeTranslator:
        translator = MemoryTranslator(loadSample(options.fakeTranslator))
    else:
        model = SequiturTool.procureModel(options, loadSample, log=stdout)
        if not model:
            return 1
        if options.testSample or options.applySample:
            translator = Translator(model)
            if options.stack_limit:
                translator.setStackLimit(options.stack_limit)
        del model

    if options.testSample:
        mainTest(translator, loadSample(options.testSample), options)
        translator.reportStats(sys.stdout)

    if options.applySample:
        mainApply(translator, options)
        translator.reportStats(sys.stderr)
예제 #2
0
def main(options, args):
    if options.phoneme_to_phoneme:
        loadSample = loadP2PSample
    else:
        loadSample = loadG2PSample

    if options.fakeTranslator:
        translator = MemoryTranslator(loadSample(options.fakeTranslator))
    else:
        model = SequiturTool.procureModel(options, loadSample, log=stdout)
        if not model:
            return 1
        if options.testSample or options.applySample:
            translator = Translator(model)
            if options.stack_limit:
                translator.setStackLimit(options.stack_limit)
        del model

    if options.testSample:
        mainTest(translator, loadSample(options.testSample), options)
        translator.reportStats(sys.stdout)

    if options.applySample:
        mainApply(translator, options)
        translator.reportStats(sys.stderr)
예제 #3
0
    def __init__(self, dict_path=__dict_path__, model_path=__model_path__):
        self._dict_ = dict()
        dict_path = os.path.expanduser(dict_path)
        model_path = os.path.expanduser(model_path)
        self.__dict_path__ = dict_path
        self.__model_path__ = model_path

        sequitur_options = Values()
        sequitur_options.resume_from_checkpoint = False
        sequitur_options.modelFile = model_path
        sequitur_options.shouldRampUp = False
        sequitur_options.trainSample = False
        sequitur_options.shouldTranspose = False
        sequitur_options.newModelFile = False
        sequitur_options.shouldSelfTest = False
        self.__model__ = SequiturTool.procureModel(sequitur_options, None)
        if not self.__model__:
            logger.error('Can\'t load g2p model.')
            return None
        self.__model__ = Translator(self.__model__)

        a = open(dict_path).readlines()
        a = [i.strip('\n') for i in a]
        for i in a:
            i = i.split(' ')
            self._dict_[i[0]] = i[1:]
예제 #4
0
def g2pMain(options, args):
    import locale
    loadSample = loadG2PSample

    enc = locale.getpreferredencoding()
    if hasattr(sys.stdout, 'buffer'):
        log_stdout = codecs.getwriter(enc)(sys.stdout.buffer,
                                           errors='backslashreplace')
    else:
        log_stdout = codecs.getwriter(enc)(sys.stdout,
                                           errors='backslashreplace')

    if hasattr(sys.stderr, 'buffer'):
        log_stderr = codecs.getwriter(enc)(sys.stderr.buffer,
                                           errors='backslashreplace')
    else:
        log_stderr = codecs.getwriter(enc)(sys.stderr,
                                           errors='backslashreplace')

    if options.fakeTranslator:
        translator = MemoryTranslator(loadSample(options.fakeTranslator))
    else:
        model = SequiturTool.procureModel(options, loadSample, log=log_stdout)
        if not model:
            return 1
        if options.testSample or options.applySample or options.applyWord:
            translator = Translator(model)
            if options.stack_limit:
                translator.setStackLimit(options.stack_limit)
        del model

    if options.applyWord:
        return g2pApplyWord(translator, options, log_stdout)
예제 #5
0
def transliterate(model, word):

  class Struct:
      def __init__(self, **entries):
          self.__dict__.update(entries)

  model_path = {
    'pythainlp_lexicon': './lib/model-7', 
    'wiktionary_phonemic': './lib/tha-pt-b-7'
  }

  connector_dict = {
    'pythainlp_lexicon': '', 
    'wiktionary_phonemic': '-'
  }


  modelFile = model_path[model]
  connector = connector_dict[model]

  options = Struct(**{'profile': None, 'resource_usage': None, 'psyco': None, 'tempdir': None, 'trainSample': None, 'develSample': None, 'testSample': None, 'checkpoint': None, 'resume_from_checkpoint': None, 'shouldTranspose': None, 'modelFile': modelFile , 'newModelFile': None, 'shouldTestContinuously': None, 'shouldSelfTest': None, 'lengthConstraints': None, 'shouldSuppressNewMultigrams': None, 'viterbi': None, 'shouldRampUp': None, 'shouldWipeModel': None, 'shouldInitializeWithCounts': None, 'minIterations': 20, 'maxIterations': 100, 'eager_discount_adjustment': None, 'fixed_discount': None, 'encoding': 'UTF-8', 'phoneme_to_phoneme': None, 'test_segmental': None, 'testResult': None, 'applySample': None, 'applyWord': word, 'variants_mass': None, 'variants_number': None, 'fakeTranslator': None, 'stack_limit': None})

  loadSample = g2p.loadG2PSample

  model = SequiturTool.procureModel(options, loadSample)
  if not model:
      return 1
  translator = g2p.Translator(model)
  del model

  return connector.join(translator(tuple(word)))
예제 #6
0
def main(options, args):
    model = SequiturTool.procureModel(options, loadSample)
    if options.applySample:
        lines = gopen(options.applySample).readlines()
        words = Set([word for line in lines for word in line.split()])
        addUnknowns(model, words)
        translator = Translator(model)
        for line in lines:
            left = tuple(line.split())
            try:
                result = translator(left)
                print(" ".join(result))
            except translator.TranslationFailure:
                print("<translation-failed/>")
예제 #7
0
def main(options, args):
    model = SequiturTool.procureModel(options, loadSample)
    if options.applySample:
        lines = gopen(options.applySample).readlines()
        words = Set([ word for line in lines for word in line.split() ])
        addUnknowns(model, words)
        translator = Translator(model)
        for line in lines:
            left = tuple(line.split())
            try:
                result = translator(left)
                print ' '.join(result)
            except translator.TranslationFailure:
                print '<translation-failed/>'
예제 #8
0
def main(options, args):
    import locale
    if options.phoneme_to_phoneme:
        loadSample = loadP2PSample
    else:
        loadSample = loadG2PSample

    enc = locale.getpreferredencoding()
    if hasattr(sys.stdout, 'buffer'):
        log_stdout = codecs.getwriter(enc)(sys.stdout.buffer,
                                           errors='backslashreplace')
    else:
        log_stdout = codecs.getwriter(enc)(sys.stdout,
                                           errors='backslashreplace')

    if hasattr(sys.stderr, 'buffer'):
        log_stderr = codecs.getwriter(enc)(sys.stderr.buffer,
                                           errors='backslashreplace')
    else:
        log_stderr = codecs.getwriter(enc)(sys.stderr,
                                           errors='backslashreplace')

    #the encoding relates to the lexicon, not the standard IO
    #log_stdout = codecs.getwriter(options.encoding, errors='backslashreplace')(sys.stdout) if options.encoding else sys.stdout;
    #log_stderr = codecs.getwriter(options.encoding, errors='backslashreplace')(sys.stderr) if options.encoding else sys.stderr;

    if options.fakeTranslator:
        translator = MemoryTranslator(loadSample(options.fakeTranslator))
    else:
        model = SequiturTool.procureModel(options, loadSample, log=log_stdout)
        if not model:
            return 1
        if options.testSample or options.applySample or options.applyWord:
            translator = Translator(model)
            if options.stack_limit:
                translator.setStackLimit(options.stack_limit)
        del model

    if options.testSample:
        mainTest(translator, loadSample(options.testSample), options,
                 log_stdout)
        translator.reportStats(log_stdout)

    if options.applySample:
        mainApply(translator, options,
                  gOpenOut('-', options.encoding or defaultEncoding))
        translator.reportStats(log_stderr)

    if options.applyWord:
        mainApplyWord(translator, options, log_stdout)
예제 #9
0
def load_g2p(model_path):
    sequitur_options = Values()
    sequitur_options.modelFile = model_path
    sequitur_options.resume_from_checkpoint = False
    sequitur_options.shouldRampUp = False
    sequitur_options.trainSample = False
    sequitur_options.shouldTranspose = False
    sequitur_options.shouldSelfTest = False
    sequitur_options.newModelFile = False
    model = SequiturTool.procureModel(sequitur_options, None)
    if not model:
        print('Can\'t load g2p model.')
        sys.exit(1)
    return model
예제 #10
0
    def __init__(self, modelfn=SEQUITUR_MODEL):

        options = SeqOptionsObject()
        options.resume_from_checkpoint = False
        options.modelFile              = modelfn
        options.shouldRampUp           = False
        options.trainSample            = None
        options.shouldTranspose        = False
        options.newModelFile           = None
        options.shouldSelfTest         = False

        self.model = SequiturTool.procureModel(options, loadG2PSample, log=sys.stdout)

        self.translator = Translator(self.model)
예제 #11
0
 def __init__(self, model_path):
     class options(object):
         pass  
     options = options()
     options.testSample = None
     options.modelFile = model_path  
     options.trainSample = None  
     options.encoding = 'ISO-8859-15'  
     options.shouldInitializeWithCounts = None  
     options.psyco = None  
     options.stack_limit = None  
     options.shouldTranspose = None  
     options.applySample = 'args.txt'  
     options.shouldRampUp = None  
     options.resume_from_checkpoint = None  
     options.lengthConstraints = None  
     options.checkpoint = None  
     options.eager_discount_adjustment = None  
     options.fakeTranslator = None  
     options.tempdir = None  
     options.profile = None  
     options.variants_number = None  
     options.maxIterations = 100  
     options.testResult = None  
     options.variants_mass = None  
     options.shouldSuppressNewMultigrams = None  
     options.develSample = None  
     options.shouldWipeModel = None  
     options.resource_usage = None  
     options.test_segmental = None  
     options.fixed_discount = None  
     options.newModelFile = None  
     options.minIterations = 20  
     options.shouldSelfTest = None  
     options.viterbi = None  
     options.shouldTestContinuously = None  
     options.phoneme_to_phoneme = None
     
     import codecs
     global defaultEncoding
     defaultEncoding = options.encoding
     global stdout, stderr
     encoder, decoder, streamReader, streamWriter = codecs.lookup(options.encoding)
     stdout = streamWriter(sys.stdout)
     stderr = streamWriter(sys.stderr)
     loadSample = loadG2PSample
     model = SequiturTool.procureModel(options, loadSample, log=stdout)
     self.translator = Translator(model)