コード例 #1
0
def test_mobilenetv2_quant():
    set_seed(1)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    config = config_ascend_quant
    print("training configure: {}".format(config))

    epoch_size = config.epoch_size

    # define network
    network = mobilenetV2(num_classes=config.num_classes)
    # define loss
    if config.label_smooth > 0:
        loss = CrossEntropyWithLabelSmooth(
            smooth_factor=config.label_smooth, num_classes=config.num_classes)
    else:
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    # define dataset
    dataset = create_dataset(dataset_path=dataset_path,
                             config=config,
                             repeat_num=1,
                             batch_size=config.batch_size)
    step_size = dataset.get_dataset_size()

    # convert fusion network to quantization aware network
    quantizer = QuantizationAwareTraining(bn_fold=True,
                                          per_channel=[True, False],
                                          symmetric=[True, False])
    network = quantizer.quantize(network)

    # get learning rate
    lr = Tensor(get_lr(global_step=config.start_epoch * step_size,
                       lr_init=0,
                       lr_end=0,
                       lr_max=config.lr,
                       warmup_epochs=config.warmup_epochs,
                       total_epochs=epoch_size + config.start_epoch,
                       steps_per_epoch=step_size))

    # define optimization
    opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum,
                      config.weight_decay)
    # define model
    model = Model(network, loss_fn=loss, optimizer=opt)

    print("============== Starting Training ==============")
    monitor = Monitor(lr_init=lr.asnumpy(),
                      step_threshold=config.step_threshold)
    callback = [monitor]
    model.train(epoch_size, dataset, callbacks=callback,
                dataset_sink_mode=False)
    print("============== End Training ==============")

    export_time_used = 650
    train_time = monitor.step_mseconds
    print('train_time_used:{}'.format(train_time))
    assert train_time < export_time_used
    expect_avg_step_loss = 2.32
    avg_step_loss = np.mean(np.array(monitor.losses))
    print("average step loss:{}".format(avg_step_loss))
    assert avg_step_loss < expect_avg_step_loss
コード例 #2
0
def test_resnet50_quant():
    set_seed(1)
    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    config = config_quant
    print("training configure: {}".format(config))
    epoch_size = config.epoch_size

    # define network
    net = resnet50_quant(class_num=config.class_num)
    net.set_train(True)

    # define loss
    if not config.use_label_smooth:
        config.label_smooth_factor = 0.0
    loss = CrossEntropy(smooth_factor=config.label_smooth_factor,
                        num_classes=config.class_num)
    #loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)

    # define dataset
    dataset = create_dataset(dataset_path=dataset_path,
                             config=config,
                             repeat_num=1,
                             batch_size=config.batch_size)
    step_size = dataset.get_dataset_size()

    # convert fusion network to quantization aware network
    net = quant.convert_quant_network(net,
                                      bn_fold=True,
                                      per_channel=[True, False],
                                      symmetric=[True, False])

    # get learning rate
    lr = Tensor(
        get_lr(lr_init=config.lr_init,
               lr_end=0.0,
               lr_max=config.lr_max,
               warmup_epochs=config.warmup_epochs,
               total_epochs=config.epoch_size,
               steps_per_epoch=step_size,
               lr_decay_mode='cosine'))

    # define optimization
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr,
                   config.momentum, config.weight_decay, config.loss_scale)

    # define model
    #model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'})
    model = Model(net, loss_fn=loss, optimizer=opt)

    print("============== Starting Training ==============")
    monitor = Monitor(lr_init=lr.asnumpy(),
                      step_threshold=config.step_threshold)

    callbacks = [monitor]
    model.train(epoch_size,
                dataset,
                callbacks=callbacks,
                dataset_sink_mode=False)
    print("============== End Training ==============")

    expect_avg_step_loss = 2.40
    avg_step_loss = np.mean(np.array(monitor.losses))

    print("average step loss:{}".format(avg_step_loss))
    assert avg_step_loss < expect_avg_step_loss
コード例 #3
0
    try:

        email_msgs_study = []

        (_, _, monitor_files) = next(walk(monitor_path))

        mlog.info(
            'The following monitor configs are to be processed (count = {}) in the monitor folder {}: {}'
            .format(len(monitor_files), monitor_path, monitor_files))

        for mnt_config in monitor_files:
            mlog.info('-------------------------------------------')
            mlog.info('Start processing monitor config file: "{}".'.format(
                mnt_config))
            try:
                mnt = Monitor(Path(monitor_path) / mnt_config, mlog)
                mnt.start_monitor()
            except Exception as ex:
                # report unexpected error to log file
                _str = 'Unexpected Error "{}" occurred during processing monitor config file: {}\n{} ' \
                    .format(ex, mnt_config, traceback.format_exc())
                mlog.error(_str)
            mlog.info('Finish processing monitor config file: "{}".'.format(
                mnt_config))
            # create a dictionary to feed into template for preparing an email body
            template_feeder = {
                'source_file_path':
                '{}/{}'.format(str(mnt.mtr_source_dir),
                               str(mnt.mtr_source_file)),
                'source_identified':
                str(mnt.mtr_source_path),
コード例 #4
0
ファイル: models.py プロジェクト: Yevgnen/seq2seq
    def train(self, train_x, train_y, epoch=100, batch_size=128,
              validation_data=None, valid_freq=100, patience=10,
              monitor=False):

        # Define the symbolic train model
        batch_index = T.iscalar('batch_index')
        x = T.matrix('x', dtype=train_x.get_value(borrow=True).dtype)
        y = T.vector('y', dtype=train_y.get_value(borrow=True).dtype)
        loss = self.loss(x, y)
        updates = self.optimizer.get_updates(loss, self.params)

        sample_num = train_x.get_value(borrow=True).shape[0]
        train_batch_num = int(np.ceil(sample_num / batch_size))
        train_fn = theano.function(
            inputs=[batch_index],
            outputs=loss,
            updates=updates,
            givens={
                x: train_x[batch_index * batch_size: (batch_index + 1) * batch_size],
                y: train_y[batch_index * batch_size: (batch_index + 1) * batch_size]
            }
        )
        train_acc_fn = theano.function([], self.score(train_x, train_y))

        # Initialization for validation
        if validation_data is not None:
            valid = True
            (valid_x, valid_y) = validation_data
            valid_fn = theano.function(
                inputs=[batch_index],
                outputs=loss,
                updates=updates,
                givens={
                    x: valid_x[batch_index * batch_size: (batch_index + 1) * batch_size],
                    y: valid_y[batch_index * batch_size: (batch_index + 1) * batch_size]
                }
            )
            valid_acc_fn = theano.function([], self.score(valid_x, valid_y))
            valid_batch_num = int(np.ceil(valid_x.get_value(borrow=True).shape[0] / batch_size))
            valid_losses = []
            valid_acces = []
            best_valid_acc = 0
            p = 0
        else:
            valid = False
            valid_losses = None
            valid_acces = None

        # Initialization for monitor
        if monitor:
            m = Monitor(monitor_acc=True)

        train_losses = []
        train_acces = []
        stop = False
        iterations = epoch * train_batch_num
        for iter in range(iterations):
            i = int(iter / train_batch_num)  # current epoch
            j = iter % train_batch_num       # batch index

            # Train on a batch
            train_loss = train_fn(j)
            train_losses.append(train_loss)
            self.logger.info('TRAINING - Epoch({0:4d} / {1:4d}), train loss: {2}'.format(i + 1, epoch, train_loss))

            # Validating
            if valid and iter % valid_freq == 0:
                valid_loss = np.mean([valid_fn(k) for k in range(valid_batch_num)])
                valid_losses.append(valid_loss)

                train_acc = train_acc_fn()
                train_acces.append(train_acc)
                valid_acc = valid_acc_fn()
                valid_acces.append(valid_acc)
                self.logger.info('VALIDATING - Iteration ({0}), valid loss: {1}'.format(iter, valid_loss))
                self.logger.info('VALIDATING - Iteration ({0}), train acc: {1}'.format(iter, train_acc))
                self.logger.info('VALIDATING - Iteration ({0}), valid acc: {1}'.format(iter, valid_acc))

                # Be patient if get lower validation losss
                if valid_acc > best_valid_acc:
                    best_valid_acc = valid_acc
                    p = 0
                else:
                    p += 1
                    if p >= patience:
                        stop = True

            if monitor:
                m.update(train_losses, valid_losses, valid_freq, train_acces, valid_acces)

            if stop:
                break

        if monitor:
            m.save()
コード例 #5
0
ファイル: models.py プロジェクト: Yevgnen/seq2seq
    def train(self, train_x, mask_train_x, train_y, mask_train_y, epoch=10, batch_size=128,
              validation_data=None, valid_freq=100, patience=10,
              monitor=False, epoch_end_callback=None):

        # Define the symbolic train model
        batch_index = T.iscalar('batch_index')
        x = T.imatrix('x')
        y = T.imatrix('y')
        m_x = T.imatrix('m_x')
        m_y = T.imatrix('m_y')
        loss = self.loss(x, m_x, y, m_y)
        updates = self.optimizer.get_updates(loss, self.params)

        sample_num = train_x.get_value(borrow=True).shape[0]
        train_batch_num = int(np.ceil(sample_num / batch_size))
        train_fn = theano.function(
            inputs=[batch_index],
            outputs=loss,
            updates=updates,
            givens={
                x: train_x[batch_index * batch_size: (batch_index + 1) * batch_size],
                y: train_y[batch_index * batch_size: (batch_index + 1) * batch_size],
                m_x: mask_train_x[:, batch_index * batch_size: (batch_index + 1) * batch_size],
                m_y: mask_train_y[:, batch_index * batch_size: (batch_index + 1) * batch_size]
            }
        )

        # Initialization for validation
        if validation_data is not None:
            valid = True
            (valid_x, mask_valid_x, valid_y, mask_valid_y) = validation_data
            valid_fn = theano.function(
                inputs=[batch_index],
                outputs=loss,
                updates=updates,
                givens={
                    x: valid_x[batch_index * batch_size: (batch_index + 1) * batch_size],
                    y: valid_y[batch_index * batch_size: (batch_index + 1) * batch_size],
                    m_x: mask_valid_x[:, batch_index * batch_size: (batch_index + 1) * batch_size],
                    m_y: mask_valid_y[:, batch_index * batch_size: (batch_index + 1) * batch_size]
                }
            )
            valid_losses = []
            valid_batch_num = int(np.ceil(valid_x.get_value(borrow=True).shape[0] / batch_size))
            best_valid_loss = np.inf
            p = 0
        else:
            valid = False
            valid_losses = None

        # Initialization for monitor
        if monitor:
            m = Monitor(monitor_acc=False)

        train_losses = []
        stop = False
        iterations = epoch * train_batch_num
        for iter in range(iterations):
            i = int(iter / train_batch_num)  # current epoch
            j = iter % train_batch_num       # batch index

            # Train on a batch
            train_loss = train_fn(j)
            train_losses.append(train_loss)
            self.logger.info('TRAINING - Epoch({0:4d} / {1:4d}), train loss: {2}'.format(i + 1, epoch, train_loss))

            if valid and iter % valid_freq == 0:
                valid_loss = np.mean([valid_fn(k) for k in range(valid_batch_num)])
                valid_losses.append(valid_loss)

                self.logger.info('VALIDATING - Iteration ({0}), valid loss: {1}'.format(iter, valid_loss))

                # Be patient if get lower validation losss
                if valid_loss < best_valid_loss:
                    best_valid_loss = valid_loss
                    p = 0
                else:
                    p += 1
                    if p >= patience:
                        stop = True

            if monitor:
                m.update(train_losses, valid_losses, valid_freq)

            if stop:
                break

            if iter % train_batch_num == 0 and epoch_end_callback and callable(epoch_end_callback):
                epoch_end_callback()

        if monitor:
            m.save()

        return np.asarray(train_losses).reshape(epoch, train_batch_num)
コード例 #6
0
def run_simultrans(model,
                   options_file=None,
                   config=None,
                   policy=None,
                   id=None,
                   remote=False):

    WORK = config['workspace']

    # check hidden folders
    paths = [
        '.policy', '.pretrained', '.log', '.config', '.images', '.translate'
    ]
    for p in paths:
        p = WORK + p
        if not os.path.exists(p):
            os.mkdir(p)

    if id is not None:
        fcon = WORK + '.config/{}.conf'.format(id)
        if os.path.exists(fcon):
            print 'load config files'
            policy, config = pkl.load(open(fcon, 'r'))

    # ============================================================================== #
    # load model model_options
    # ============================================================================== #
    _model = model.split('/')[-1]

    if options_file is not None:
        with open(options_file, 'rb') as f:
            options = pkl.load(f)
    else:
        with open('%s.pkl' % model, 'rb') as f:
            options = pkl.load(f)

    print 'merge configuration into options'
    for w in config:
        if (w in options) and (config[w] is not None):
            options[w] = config[w]

    print 'load options...'
    for w, p in sorted(options.items(), key=lambda x: x[0]):
        print '{}: {}'.format(w, p)

    # load detail settings from option file:
    dictionary, dictionary_target = options['dictionaries']

    def _iter(fname):
        with open(fname, 'r') as f:
            for line in f:
                words = line.strip().split()
                x = map(lambda w: word_dict[w] if w in word_dict else 1, words)
                x = map(lambda ii: ii if ii < options['n_words'] else 1, x)
                x += [0]
                yield x

    def _check_length(fname):
        f = open(fname, 'r')
        count = 0
        for _ in f:
            count += 1
        f.close()

        return count

    # load source dictionary and invert
    with open(dictionary, 'rb') as f:
        word_dict = pkl.load(f)
    word_idict = dict()
    for kk, vv in word_dict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # load target dictionary and invert
    with open(dictionary_target, 'rb') as f:
        word_dict_trg = pkl.load(f)
    word_idict_trg = dict()
    for kk, vv in word_dict_trg.iteritems():
        word_idict_trg[vv] = kk
    word_idict_trg[0] = '<eos>'
    word_idict_trg[1] = 'UNK'

    ## use additional input for the policy network
    options['pre'] = config['pre']

    # ================================================================================= #
    # Build a Simultaneous Translator
    # ================================================================================= #

    # allocate model parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # print 'build the model for computing cost (full source sentence).'
    trng, use_noise, \
    _x, _x_mask, _y, _y_mask, \
    opt_ret, \
    cost, f_cost = build_model(tparams, options)
    print 'done'

    # functions for sampler
    f_sim_ctx, f_sim_init, f_sim_next = build_simultaneous_sampler(
        tparams, options, trng)

    # function for finetune
    if config['finetune'] != 'nope':
        f_fine_init, f_fine_cost, f_fine_update = build_fine(
            tparams,
            options,
            fullmodel=True if config['finetune'] == 'full' else False)

    def _translate(src,
                   trg,
                   train=False,
                   samples=config['sample'],
                   greedy=False):
        ret = simultaneous_decoding(
            f_sim_ctx,
            f_sim_init,
            f_sim_next,
            f_cost,
            _policy,
            src,
            trg,
            word_idict_trg,
            step=config['step'],
            peek=config['peek'],
            sidx=config['s0'],
            n_samples=samples,
            reward_config={
                'target': config['target'],
                'gamma': config['gamma'],
                'Rtype': config['Rtype'],
                'maxsrc': config['maxsrc'],
                'greedy': greedy,
                'upper': config['upper']
            },
            train=train,
            use_forget=config['forget'],
            use_newinput=config['pre'],
            use_coverage=config['coverage'],
            on_groundtruth=0 if config['finetune'] == 'nope' else 10)

        return ret

    # check the ID:
    policy['base'] = _model
    _policy = Policy(trng,
                     options,
                     policy,
                     config,
                     n_in=options['readout_dim'] +
                     1 if config['coverage'] else options['readout_dim'],
                     n_out=3 if config['forget'] else 2,
                     recurrent=policy['recurrent'],
                     id=id)

    # make the dataset ready for training & validation
    # train_    = options['datasets'][0]
    # train_num = _check_length
    trainIter = TextIterator(options['datasets'][0],
                             options['datasets'][1],
                             options['dictionaries'][0],
                             options['dictionaries'][1],
                             n_words_source=options['n_words_src'],
                             n_words_target=options['n_words'],
                             batch_size=config['batchsize'],
                             maxlen=options['maxlen'])

    train_num = trainIter.num

    validIter = TextIterator(options['valid_datasets'][0],
                             options['valid_datasets'][1],
                             options['dictionaries'][0],
                             options['dictionaries'][1],
                             n_words_source=options['n_words_src'],
                             n_words_target=options['n_words'],
                             batch_size=64,
                             cache=10,
                             maxlen=1000000)

    valid_num = validIter.num

    valid_ = options['valid_datasets'][0]
    valid_num = _check_length(valid_)
    print 'training set {} lines / validation set {} lines'.format(
        train_num, valid_num)
    print 'use the reward function {}'.format(chr(config['Rtype'] + 65))

    # ================================================================================= #
    # Main Loop: Run
    # ================================================================================= #
    print 'Start Simultaneous Translator...'
    probar = Progbar(train_num / config['batchsize'], with_history=False)
    monitor = None
    if remote:
        monitor = Monitor(root='http://localhost:9000')

    # freqs
    save_freq = 200
    sample_freq = 10
    valid_freq = 200
    valid_size = 200
    display_freq = 50
    finetune_freq = 5

    history, last_it = _policy.load()
    action_space = ['W', 'C', 'F']
    Log_avg = {}
    time0 = timer()
    pipe = PIPE(['x', 'x_mask', 'y', 'y_mask', 'c_mask'])

    for it, (srcs,
             trgs) in enumerate(trainIter):  # only one sentence each iteration
        if it < last_it:  # go over the scanned lines.
            continue

        # for validation
        # doing the whole validation!!
        reference = []
        system = []

        reference2 = []
        system2 = []

        if it % valid_freq == 0:
            print 'start validation'

            collections = [[], [], [], [], []]
            probar_v = Progbar(valid_num / 64 + 1)
            for ij, (srcs, trgs) in enumerate(validIter):

                # new_srcs, new_trgs = [], []

                # for src, trg in zip(srcs, trgs):
                #     if len(src) < config['s0']:
                #         continue  # ignore when the source sentence is less than sidx. we don't use the policy\
                #     else:
                #         new_srcs += [src]
                #         new_trgs += [trg]

                # if len(new_srcs) == 0:
                #     continue
                # srcs, trgs = new_srcs, new_trgs

                statistics = _translate(srcs,
                                        trgs,
                                        train=False,
                                        samples=1,
                                        greedy=True)

                quality, delay, reward = zip(*statistics['track'])
                reference += statistics['Ref']
                system += statistics['Sys']

                # print ' '.join(reference[-1][0])
                # print ' '.join(system[-1])

                # compute the average consective waiting length
                def _consective(action):
                    waits = []
                    temp = 0
                    for a in action:
                        if a == 0:
                            temp += 1
                        elif temp > 0:
                            waits += [temp]
                            temp = 0

                    if temp > 0:
                        waits += [temp]

                    mean = numpy.mean(waits)
                    gec = numpy.max(
                        waits)  # numpy.prod(waits) ** (1./len(waits))
                    return mean, gec

                def _max_length(action):
                    _cur = 0
                    _end = 0
                    _max = 0
                    for it, a in enumerate(action):
                        if a == 0:
                            _cur += 1
                        elif a == 2:
                            _end += 1

                        temp = _cur - _end
                        if temp > _max:
                            _max = temp
                    return _max

                maxlen = [
                    _max_length(action) for action in statistics['action']
                ]
                means, gecs = zip(*(_consective(action)
                                    for action in statistics['action']))

                collections[0] += quality
                collections[1] += delay
                collections[2] += means
                collections[3] += gecs
                collections[4] += maxlen

                values = [('quality', numpy.mean(quality)),
                          ('delay', numpy.mean(delay)),
                          ('wait_mean', numpy.mean(means)),
                          ('wait_max', numpy.mean(gecs)),
                          ('max_len', numpy.mean(maxlen))]
                probar_v.update(ij + 1, values=values)

            validIter.reset()
            valid_bleu, valid_delay, valid_wait, valid_wait_gec, valid_mx = [
                numpy.mean(a) for a in collections
            ]
            print 'Iter = {}: AVG BLEU = {}, DELAY = {}, WAIT(MEAN) = {}, WAIT(MAX) = {}, MaxLen={}'.format(
                it, valid_bleu, valid_delay, valid_wait, valid_wait_gec,
                valid_mx)

            print 'Compute the Corpus BLEU={} (greedy)'.format(
                corpus_bleu(reference, system))

            with open(WORK + '.translate/test.txt', 'w') as fout:
                for sys in system:
                    fout.write('{}\n'.format(' '.join(sys)))

            with open(WORK + '.translate/ref.txt', 'w') as fout:
                for ref in reference:
                    fout.write('{}\n'.format(' '.join(ref[0])))

        if config['upper']:
            print 'done'
            import sys
            sys.exit(-1)

        # training set sentence tuning
        new_srcs, new_trgs = [], []
        for src, trg in zip(srcs, trgs):
            if len(src) <= config['s0']:
                continue  # ignore when the source sentence is less than sidx. we don't use the policy\
            else:
                new_srcs += [src]
                new_trgs += [trg]

        if len(new_srcs) == 0:
            continue

        srcs, trgs = new_srcs, new_trgs
        try:
            statistics, info, pipe_t = _translate(srcs, trgs, train=True)
        except Exception:
            print 'translate a empty sentence. bug.'
            continue

        # samples, scores, actions, rewards, info, pipe_t = _translate(srcs, trgs, train=True)
        # print pipe_t

        if config['finetune'] != 'nope':

            for idx, act in enumerate(pipe_t['action']):
                _start = 0
                _end = 0
                _mask = [0 for _ in srcs[0]]
                _cmask = []

                pipe.messages['x'] += srcs
                pipe.messages['y'] += [pipe_t['sample'][idx]]

                for a in act:
                    # print _start, _end
                    if a == 0:
                        _mask[_start] = 1
                        _start += 1
                    elif a == 2:
                        _mask[_end] = 0
                        _end += 1
                    else:
                        _cmask.append(_mask)
                # print numpy.asarray(_cmask).shape

                pipe.messages['c_mask'].append(_cmask)

            if it % finetune_freq == (finetune_freq - 1):
                num = len(pipe.messages['x'])
                max_x = max([len(v) for v in pipe.messages['x']])
                max_y = max([len(v) for v in pipe.messages['y']])

                xx, xx_mask = _padding(pipe.messages['x'],
                                       shape=(max_x, num),
                                       return_mask=True,
                                       dtype='int64')
                yy, yy_mask = _padding(pipe.messages['y'],
                                       shape=(max_y, num),
                                       return_mask=True,
                                       dtype='int64')
                cc_mask = _padding(pipe.messages['c_mask'],
                                   shape=(max_y, num,
                                          max_x)).transpose([0, 2, 1])

                # fine-tune the EncDec of translation
                if config['finetune'] == 'full':
                    cost = f_fine_cost(xx, xx_mask, yy, yy_mask, cc_mask)
                elif config['finetune'] == 'decoder':
                    cost = f_fine_cost(xx, xx_mask, yy, yy_mask, cc_mask)
                else:
                    raise NotImplementedError

                print '\nIter={} || cost = {}'.format(it, cost[0])
                f_fine_update(0.00001)
                pipe.reset()

        if it % sample_freq == 0:

            print '\nModel:{} has been trained for {} hours'.format(
                _policy.id, (timer() - time0) / 3600.)
            print 'source: ', _bpe2words(_seqs2words([srcs[0]], word_idict))[0]
            print 'target: ', _bpe2words(_seqs2words([trgs[0]],
                                                     word_idict_trg))[0]

            # obtain the translation results
            samples = _bpe2words(
                _seqs2words(statistics['sample'], word_idict_trg))

            # obtain the delay (normalized)
            # delays = _action2delay(srcs[0], statistics['action'])

            c = 0
            for j in xrange(len(samples)):

                if statistics['secs'][j][0] == 0:
                    if c < 5:
                        c += 1

                    print '---ID: {}'.format(_policy.id)
                    print 'sample: ', samples[j]
                    # print 'action: ', ','.join(
                    #     ['{}({})'.format(action_space[t], f)
                    #      for t, f in
                    #          zip(statistics['action'][j], statistics['forgotten'][j])])

                    print 'action: ', ','.join([
                        '{}'.format(action_space[t])
                        for t in statistics['action'][j]
                    ])

                    print 'quality:', statistics['track'][j][0]
                    print 'delay:', statistics['track'][j][1]
                    # print 'score:', statistics['score'][j]
                    break

        values = [(w, info[w]) for w in info]
        probar.update(it + 1, values=values)

        # NaN detector
        for w in info:
            if numpy.isnan(info[w]) or numpy.isinf(info[w]):
                raise RuntimeError, 'NaN/INF is detected!! {} : ID={}'.format(
                    w, id)

        # remote display
        if remote:
            logs = {
                'R': info['R'],
                'Q': info['Q'],
                'D': info['D'],
                'P': float(info['P'])
            }
            # print logs
            for w in logs:
                Log_avg[w] = Log_avg.get(w, 0) + logs[w]

            if it % display_freq == (display_freq - 1):
                for w in Log_avg:
                    Log_avg[w] /= display_freq

                monitor.display(it + 1, Log_avg)
                Log_avg = dict()

        # save the history & model
        history += [info]
        if it % save_freq == 0:
            _policy.save(history, it)
コード例 #7
0
def run_simultrans(model,
                   options_file=None,
                   config=None,
                   id=None,
                   remote=False):

    WORK = config['workspace']

    # check hidden folders
    paths = [
        '.policy', '.pretrained', '.log', '.config', '.images', '.translate'
    ]
    for p in paths:
        p = WORK + p
        if not os.path.exists(p):
            os.mkdir(p)

    if id is not None:
        fcon = WORK + '.config/{}.conf'.format(id)
        if os.path.exists(fcon):
            print 'load config files'
            policy, config = pkl.load(open(fcon, 'r'))

    # ============================================================================== #
    # load model model_options
    # ============================================================================== #
    _model = model.split('/')[-1]

    if options_file is not None:
        with open(options_file, 'rb') as f:
            options = pkl.load(f)
    else:
        with open('%s.pkl' % model, 'rb') as f:
            options = pkl.load(f)

    print 'merge configuration into options'
    for w in config:
        # if (w in options) and (config[w] is not None):
        options[w] = config[w]

    print 'load options...'
    for w, p in sorted(options.items(), key=lambda x: x[0]):
        print '{}: {}'.format(w, p)

    # load detail settings from option file:
    dictionary, dictionary_target = options['dictionaries']

    # load source dictionary and invert
    with open(dictionary, 'rb') as f:
        word_dict = pkl.load(f)
    word_idict = dict()
    for kk, vv in word_dict.iteritems():
        word_idict[vv] = kk
    word_idict[0] = '<eos>'
    word_idict[1] = 'UNK'

    # load target dictionary and invert
    with open(dictionary_target, 'rb') as f:
        word_dict_trg = pkl.load(f)
    word_idict_trg = dict()
    for kk, vv in word_dict_trg.iteritems():
        word_idict_trg[vv] = kk
    word_idict_trg[0] = '<eos>'
    word_idict_trg[1] = 'UNK'

    options['pre'] = config['pre']

    # ========================================================================= #
    # Build a Simultaneous Translator
    # ========================================================================= #

    # allocate model parameters
    params = init_params(options)
    params = load_params(model, params)
    tparams = init_tparams(params)

    # print 'build the model for computing cost (full source sentence).'
    trng, use_noise, \
    _x, _x_mask, _y, _y_mask, \
    opt_ret, \
    cost, f_cost = build_model(tparams, options)
    print 'done'

    # functions for sampler
    f_sim_ctx, f_sim_init, f_sim_next = build_simultaneous_sampler(
        tparams, options, trng)

    # function for finetune the underlying model
    if options['finetune']:
        ff_init, ff_cost, ff_update = build_simultaneous_model(tparams,
                                                               options,
                                                               rl=True)
        funcs = [
            f_sim_ctx, f_sim_init, f_sim_next, f_cost, ff_init, ff_cost,
            ff_update
        ]

    else:
        funcs = [f_sim_ctx, f_sim_init, f_sim_next, f_cost]

    # build a res-predictor
    if options['predict']:
        params_act = get_actor('gru')[0](options,
                                         prefix='pdt',
                                         nin=options['dim'])
        pass

    # check the ID:
    options['base'] = _model
    agent = Policy(trng,
                   options,
                   n_in=options['readout_dim'] +
                   1 if options['coverage'] else options['readout_dim'],
                   n_out=3 if config['forget'] else 2,
                   recurrent=options['recurrent'],
                   id=id)

    # make the dataset ready for training & validation
    trainIter = TextIterator(options['datasets'][0],
                             options['datasets'][1],
                             options['dictionaries'][0],
                             options['dictionaries'][1],
                             n_words_source=options['n_words_src'],
                             n_words_target=options['n_words'],
                             batch_size=config['batchsize'],
                             maxlen=options['maxlen'])

    train_num = trainIter.num

    validIter = TextIterator(options['valid_datasets'][0],
                             options['valid_datasets'][1],
                             options['dictionaries'][0],
                             options['dictionaries'][1],
                             n_words_source=options['n_words_src'],
                             n_words_target=options['n_words'],
                             batch_size=20,
                             cache=10,
                             maxlen=1000000)

    valid_num = validIter.num
    print 'training set {} lines / validation set {} lines'.format(
        train_num, valid_num)
    print 'use the reward function {}'.format(chr(config['Rtype'] + 65))

    # ========================================================================== #
    # Main Loop: Run
    # ========================================================================== #
    print 'Start Simultaneous Translator...'
    monitor = None
    if remote:
        monitor = Monitor(root='http://localhost:9000')

    # freqs
    save_freq = 200
    sample_freq = 10
    valid_freq = 200
    valid_size = 200
    display_freq = 50
    finetune_freq = 5

    history, last_it = agent.load()
    action_space = ['W', 'C', 'F']
    Log_avg = {}
    time0 = timer()

    pipe = OrderedDict()
    for key in ['x', 'x_mask', 'y', 'y_mask', 'c_mask']:
        pipe[key] = []

    def _translate(src,
                   trg,
                   samples=None,
                   train=False,
                   greedy=False,
                   show=False,
                   full=False):
        time0 = time.time()
        if full:
            options1 = copy.copy(options)
            options1['upper'] = True
        else:
            options1 = options

        ret = simultaneous_decoding(funcs, agent, options1, src, trg,
                                    word_idict_trg, samples, greedy, train)

        if show:
            info = ret[1]
            values = [(w, float(info[w])) for w in info if w != 'advantages']
            print ' , '.join(['{}={:.3f}'.format(k, f) for k, f in values]),
            print '...{}s'.format(time.time() - time0)

        return ret

    for it, (srcs,
             trgs) in enumerate(trainIter):  # only one sentence each iteration
        if it < last_it:  # go over the scanned lines.
            continue

        # for validation
        # doing the whole validation!!
        reference = []
        system = []

        if it % valid_freq == (valid_freq - 1):
            print 'start validation'

            collections = [[], [], [], [], []]
            probar_v = Progbar(valid_num / 20 + 1)
            for ij, (srcs, trgs) in enumerate(validIter):

                statistics = _translate(srcs,
                                        trgs,
                                        samples=1,
                                        train=False,
                                        greedy=True)

                quality, delay, reward = zip(*statistics['track'])
                reference += statistics['Ref']
                system += statistics['Sys']

                # compute the average consective waiting length
                def _consective(action):
                    waits = []
                    temp = 0
                    for a in action:
                        if a == 0:
                            temp += 1
                        elif temp > 0:
                            waits += [temp]
                            temp = 0

                    if temp > 0:
                        waits += [temp]

                    mean = numpy.mean(waits)
                    gec = numpy.max(
                        waits)  # numpy.prod(waits) ** (1./len(waits))
                    return mean, gec

                def _max_length(action):
                    _cur = 0
                    _end = 0
                    _max = 0
                    for it, a in enumerate(action):
                        if a == 0:
                            _cur += 1
                        elif a == 2:
                            _end += 1

                        temp = _cur - _end
                        if temp > _max:
                            _max = temp
                    return _max

                maxlen = [
                    _max_length(action) for action in statistics['action']
                ]
                means, gecs = zip(*(_consective(action)
                                    for action in statistics['action']))

                collections[0] += quality
                collections[1] += delay
                collections[2] += means
                collections[3] += gecs
                collections[4] += maxlen

                values = [('quality', numpy.mean(quality)),
                          ('delay', numpy.mean(delay)),
                          ('wait_mean', numpy.mean(means)),
                          ('wait_max', numpy.mean(gecs)),
                          ('max_len', numpy.mean(maxlen))]
                probar_v.update(ij + 1, values=values)

            validIter.reset()
            valid_bleu, valid_delay, valid_wait, valid_wait_gec, valid_mx = [
                numpy.mean(a) for a in collections
            ]
            print 'Iter = {}: AVG BLEU = {}, DELAY = {}, WAIT(MEAN) = {}, WAIT(MAX) = {}, MaxLen={}'.format(
                it, valid_bleu, valid_delay, valid_wait, valid_wait_gec,
                valid_mx)

            print 'Compute the Corpus BLEU={} (greedy)'.format(
                corpus_bleu(reference, system))

            with open(WORK + '.translate/test.txt', 'w') as fout:
                for sys in system:
                    fout.write('{}\n'.format(' '.join(sys)))

            with open(WORK + '.translate/ref.txt', 'w') as fout:
                for ref in reference:
                    fout.write('{}\n'.format(' '.join(ref[0])))

            history += [collections]
            print 'done'

        if options['upper']:
            print 'done'
            import sys
            sys.exit(-1)

        # training set sentence tuning
        new_srcs, new_trgs = [], []
        for src, trg in zip(srcs, trgs):
            if len(src) <= options['s0']:
                continue  # ignore when the source sentence is less than sidx.
            else:
                new_srcs += [src]
                new_trgs += [trg]

        if len(new_srcs) == 0:
            continue

        srcs, trgs = new_srcs, new_trgs
        statistics, info = _translate(srcs, trgs, train=True, show=True)

        if it % sample_freq == 0:

            # obtain the translation results
            samples = _bpe2words(
                _seqs2words(statistics['sample'], word_idict_trg,
                            statistics['action'], 1))
            sources = _bpe2words(
                _seqs2words(statistics['SWord'], word_idict,
                            statistics['action'], 0))
            targets = _bpe2words(
                _seqs2words(statistics['TWord'], word_idict_trg))

            # obtain the delay (normalized)
            # delays = _action2delay(srcs[0], statistics['action'])

            c = 0
            for j in xrange(len(samples)):

                if statistics['seq_info'][j][0] == 0:
                    if c < (config['sample'] / 2.):
                        c += 1
                        continue

                    print '--Iter: {}'.format(it)
                    print 'source: ', sources[j]
                    print 'sample: ', samples[j]
                    print 'target: ', targets[j]
                    print 'quality:', statistics['track'][j][0]
                    print 'delay:', statistics['track'][j][1]
                    print 'reward:', statistics['track'][j][2]
                    break

        # NaN detector
        #for w in info:
        #    if numpy.isnan(info[w]) or numpy.isinf(info[w]):
        #        raise RuntimeError, 'NaN/INF is detected!! {} : ID={}'.format(w, id)

        # remote display
        if remote:
            logs = {
                'R': info['R'],
                'Q': info['Q'],
                'D': info['D'],
                'P': float(info['P'])
            }
            if 'a_cost' in info:
                logs['A'] = info['a_cost']

            print logs
            for w in logs:
                Log_avg[w] = Log_avg.get(w, 0) + logs[w]

            if it % display_freq == (display_freq - 1):
                for w in Log_avg:
                    Log_avg[w] /= display_freq

                monitor.display(it + 1, Log_avg)
                Log_avg = dict()

        # save the history & model
        history += [info]
        if it % save_freq == 0:
            agent.save(history, it)
コード例 #8
0
    def train(self,
              train_x,
              train_y,
              epoch=100,
              batch_size=128,
              validation_data=None,
              valid_freq=100,
              patience=10,
              monitor=False):

        # Define the symbolic train model
        batch_index = T.iscalar('batch_index')
        x = T.matrix('x', dtype=train_x.get_value(borrow=True).dtype)
        y = T.vector('y', dtype=train_y.get_value(borrow=True).dtype)
        loss = self.loss(x, y)
        updates = self.optimizer.get_updates(loss, self.params)

        sample_num = train_x.get_value(borrow=True).shape[0]
        train_batch_num = int(np.ceil(sample_num / batch_size))
        train_fn = theano.function(
            inputs=[batch_index],
            outputs=loss,
            updates=updates,
            givens={
                x: train_x[batch_index * batch_size:(batch_index + 1) *
                           batch_size],
                y: train_y[batch_index * batch_size:(batch_index + 1) *
                           batch_size]
            })
        train_acc_fn = theano.function([], self.score(train_x, train_y))

        # Initialization for validation
        if validation_data is not None:
            valid = True
            (valid_x, valid_y) = validation_data
            valid_fn = theano.function(
                inputs=[batch_index],
                outputs=loss,
                updates=updates,
                givens={
                    x:
                    valid_x[batch_index * batch_size:(batch_index + 1) *
                            batch_size],
                    y:
                    valid_y[batch_index * batch_size:(batch_index + 1) *
                            batch_size]
                })
            valid_acc_fn = theano.function([], self.score(valid_x, valid_y))
            valid_batch_num = int(
                np.ceil(valid_x.get_value(borrow=True).shape[0] / batch_size))
            valid_losses = []
            valid_acces = []
            best_valid_acc = 0
            p = 0
        else:
            valid = False
            valid_losses = None
            valid_acces = None

        # Initialization for monitor
        if monitor:
            m = Monitor(monitor_acc=True)

        train_losses = []
        train_acces = []
        stop = False
        iterations = epoch * train_batch_num
        for iter in range(iterations):
            i = int(iter / train_batch_num)  # current epoch
            j = iter % train_batch_num  # batch index

            # Train on a batch
            train_loss = train_fn(j)
            train_losses.append(train_loss)
            self.logger.info(
                'TRAINING - Epoch({0:4d} / {1:4d}), train loss: {2}'.format(
                    i + 1, epoch, train_loss))

            # Validating
            if valid and iter % valid_freq == 0:
                valid_loss = np.mean(
                    [valid_fn(k) for k in range(valid_batch_num)])
                valid_losses.append(valid_loss)

                train_acc = train_acc_fn()
                train_acces.append(train_acc)
                valid_acc = valid_acc_fn()
                valid_acces.append(valid_acc)
                self.logger.info(
                    'VALIDATING - Iteration ({0}), valid loss: {1}'.format(
                        iter, valid_loss))
                self.logger.info(
                    'VALIDATING - Iteration ({0}), train acc: {1}'.format(
                        iter, train_acc))
                self.logger.info(
                    'VALIDATING - Iteration ({0}), valid acc: {1}'.format(
                        iter, valid_acc))

                # Be patient if get lower validation losss
                if valid_acc > best_valid_acc:
                    best_valid_acc = valid_acc
                    p = 0
                else:
                    p += 1
                    if p >= patience:
                        stop = True

            if monitor:
                m.update(train_losses, valid_losses, valid_freq, train_acces,
                         valid_acces)

            if stop:
                break

        if monitor:
            m.save()
コード例 #9
0
    def train(self,
              train_x,
              mask_train_x,
              train_y,
              mask_train_y,
              epoch=10,
              batch_size=128,
              validation_data=None,
              valid_freq=100,
              patience=10,
              monitor=False,
              epoch_end_callback=None):

        # Define the symbolic train model
        batch_index = T.iscalar('batch_index')
        x = T.imatrix('x')
        y = T.imatrix('y')
        m_x = T.imatrix('m_x')
        m_y = T.imatrix('m_y')
        loss = self.loss(x, m_x, y, m_y)
        updates = self.optimizer.get_updates(loss, self.params)

        sample_num = train_x.get_value(borrow=True).shape[0]
        train_batch_num = int(np.ceil(sample_num / batch_size))
        train_fn = theano.function(
            inputs=[batch_index],
            outputs=loss,
            updates=updates,
            givens={
                x:
                train_x[batch_index * batch_size:(batch_index + 1) *
                        batch_size],
                y:
                train_y[batch_index * batch_size:(batch_index + 1) *
                        batch_size],
                m_x:
                mask_train_x[:, batch_index * batch_size:(batch_index + 1) *
                             batch_size],
                m_y:
                mask_train_y[:, batch_index * batch_size:(batch_index + 1) *
                             batch_size]
            })

        # Initialization for validation
        if validation_data is not None:
            valid = True
            (valid_x, mask_valid_x, valid_y, mask_valid_y) = validation_data
            valid_fn = theano.function(
                inputs=[batch_index],
                outputs=loss,
                updates=updates,
                givens={
                    x:
                    valid_x[batch_index * batch_size:(batch_index + 1) *
                            batch_size],
                    y:
                    valid_y[batch_index * batch_size:(batch_index + 1) *
                            batch_size],
                    m_x:
                    mask_valid_x[:, batch_index *
                                 batch_size:(batch_index + 1) * batch_size],
                    m_y:
                    mask_valid_y[:, batch_index *
                                 batch_size:(batch_index + 1) * batch_size]
                })
            valid_losses = []
            valid_batch_num = int(
                np.ceil(valid_x.get_value(borrow=True).shape[0] / batch_size))
            best_valid_loss = np.inf
            p = 0
        else:
            valid = False
            valid_losses = None

        # Initialization for monitor
        if monitor:
            m = Monitor(monitor_acc=False)

        train_losses = []
        stop = False
        iterations = epoch * train_batch_num
        for iter in range(iterations):
            i = int(iter / train_batch_num)  # current epoch
            j = iter % train_batch_num  # batch index

            # Train on a batch
            train_loss = train_fn(j)
            train_losses.append(train_loss)
            self.logger.info(
                'TRAINING - Epoch({0:4d} / {1:4d}), train loss: {2}'.format(
                    i + 1, epoch, train_loss))

            if valid and iter % valid_freq == 0:
                valid_loss = np.mean(
                    [valid_fn(k) for k in range(valid_batch_num)])
                valid_losses.append(valid_loss)

                self.logger.info(
                    'VALIDATING - Iteration ({0}), valid loss: {1}'.format(
                        iter, valid_loss))

                # Be patient if get lower validation losss
                if valid_loss < best_valid_loss:
                    best_valid_loss = valid_loss
                    p = 0
                else:
                    p += 1
                    if p >= patience:
                        stop = True

            if monitor:
                m.update(train_losses, valid_losses, valid_freq)

            if stop:
                break

            if iter % train_batch_num == 0 and epoch_end_callback and callable(
                    epoch_end_callback):
                epoch_end_callback()

        if monitor:
            m.save()

        return np.asarray(train_losses).reshape(epoch, train_batch_num)
コード例 #10
0
    print(
        "\n[info] Creating generator and discriminator architectures for GAN...\n"
    )
    gen = Generator(num_classes, image_size, bn=True)
    disc = Discriminator(num_classes, image_size, min_neurons, bn_epsilon=1e-5)

    print("\n[info] Pre-training or loading pre-trained discriminator...\n")
    disc.pretrain(train_gen,
                  valid_gen,
                  pretrain_iterations,
                  pretrain_learning_rate,
                  retrain=False)
    ''' start training GAN for real '''
    print("\n[info] Start training GAN...\n")
    trial_name = 'GANGoghBN'
    monitor = Monitor(trial_name)
    checkpoint = 0

    if checkpoint > 0:
        success = gen.load('{}{}'.format(trial_name, checkpoint))
        success &= disc.load('{}{}'.format(trial_name, checkpoint))
        if not success:
            checkpoint = 0

    gen_opt = tf.keras.optimizers.Adam(learning_rate=gen_learning_rate,
                                       beta_1=.5,
                                       beta_2=.9)
    disc_opt = tf.keras.optimizers.Adam(learning_rate=disc_learning_rate,
                                        beta_1=.5,
                                        beta_2=.9)
    for i in range(checkpoint, train_iterations):