Example #1
0
def train_model(args):
    train_begin = time.time()
    trace('making vocaburaries ...')
    vocab = Vocabulary.new(gens.letter_list(args.corpus), args.vocab) 

    trace('begin training ...')
    model = TransSegmentationModel.new(vocab, args.context, args.hidden, args.labels, args.eta)

    for epoch in range(args.epoch):
        epoch_beg = time.time() 
        trace('START epoch %d/%d: ' % (epoch + 1, args.epoch))
        trained = 0
        total_loss = 0

        model.init_optimizer()

        with open(args.corpus) as fp:
            for text in fp:
                word_list = text.split()
                if not word_list:
                    continue

                text = ' '.join(word_list)
                letters = ''.join(word_list)
                labels, accum_loss_f = model.train(text)
                total_loss += accum_loss_f
                trained += 1
                hyp = make_hyp(letters, labels)
                
                """for 1sentence output
                trace("accum_loss : %lf"% (accum_loss_f))
                trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
                trace('trained %d: '% trained)
                trace(text)
                trace(hyp)
                """
                """
                if trained % 100 == 0:
                    trace('  %8d' % trained)
                """
        trace('FINISHED epoch %d/%d: ' % (epoch + 1, args.epoch))
        trace('total_loss : %lf'%total_loss)
        trace('saving model ...')
        model.save(args.model + '.%03d' % (epoch + 1))
        epoch_time = time.time() - epoch_beg
        trace('elapsed_time/1epoch : %lf'%epoch_time)

    trace('finished.')
    elapsed_time = time.time() - train_begin
    trace('train_time : %lf'%elapsed_time)
    trace('')
Example #2
0
def train_model(args):
    train_begin = time.time()
    trace('making vocaburaries ...')
    vocab = Vocabulary.new(gens.letter_list(args.corpus), args.vocab)

    trace('begin training ...')
    model = TransSegmentationModel.new(vocab, args.context, args.hidden,
                                       args.labels, args.eta)

    for epoch in range(args.epoch):
        epoch_beg = time.time()
        trace('START epoch %d/%d: ' % (epoch + 1, args.epoch))
        trained = 0
        total_loss = 0

        model.init_optimizer()

        with open(args.corpus) as fp:
            for text in fp:
                word_list = text.split()
                if not word_list:
                    continue

                text = ' '.join(word_list)
                letters = ''.join(word_list)
                labels, accum_loss_f = model.train(text)
                total_loss += accum_loss_f
                trained += 1
                hyp = make_hyp(letters, labels)
                """for 1sentence output
                trace("accum_loss : %lf"% (accum_loss_f))
                trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
                trace('trained %d: '% trained)
                trace(text)
                trace(hyp)
                """
                """
                if trained % 100 == 0:
                    trace('  %8d' % trained)
                """
        trace('FINISHED epoch %d/%d: ' % (epoch + 1, args.epoch))
        trace('total_loss : %lf' % total_loss)
        trace('saving model ...')
        model.save(args.model + '.%03d' % (epoch + 1))
        epoch_time = time.time() - epoch_beg
        trace('elapsed_time/1epoch : %lf' % epoch_time)

    trace('finished.')
    elapsed_time = time.time() - train_begin
    trace('train_time : %lf' % elapsed_time)
    trace('')
def train_model(args):
    trace('making vocabularies ...')
    vocab = Vocabulary.new(gens.letter_list(args.corpus), args.vocab)

    trace('start training ...')
    model = RNNSegmentationModel.new(vocab, args.embed, args.hidden)

    for epoch in range(args.epoch):
        trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
        trained = 0

        model.init_optimizer()

        with open(args.corpus) as fp:
            for text in fp:
                word_list = text.split()
                if not word_list:
                    continue

                text = ' '.join(word_list)
                letters = ''.join(word_list)
                scores = model.train(text)
                trained += 1
                hyp = make_hyp(letters, scores)
                
                trace(trained)
                trace(text)
                trace(hyp)
                trace(' '.join('%+.1f' % x for x in scores))
                
                if trained % 100 == 0:
                    trace('  %8d' % trained)

        trace('saveing model ...')
        model.save(args.model + '.%03d' % (epoch + 1))

    trace('finished.')
Example #4
0
def train_model(args):
    trace("making vocabularies ...")
    vocab = Vocabulary.new(gens.letter_list(args.corpus), args.vocab)

    trace("start training ...")
    model = SegmentationModel.new(vocab, args.context, args.hidden)

    for epoch in range(args.epoch):
        trace("epoch %d/%d: " % (epoch + 1, args.epoch))
        trained = 0

        model.init_optimizer()

        with open(args.corpus) as fp:
            for text in fp:
                word_list = text.split()
                if not word_list:
                    continue

                text = " ".join(word_list)
                letters = "".join(word_list)
                scores = model.train(text)
                trained += 1
                hyp = make_hyp(letters, scores)

                trace(trained)
                trace(text)
                trace(hyp)
                trace(" ".join("%+.1f" % x for x in scores))

                if trained % 100 == 0:
                    trace("  %8d" % trained)

        trace("saveing model ...")
        model.save(args.model + ".%03d" % (epoch + 1))

    trace("finished.")