logger = get_logger('train.log') momentum_policy = FixedValuePolicy(0.95) train_loss_tracker = TrainLossTracker(model, 100, logger) valid_tracker = ValidTracker(model, 500, logger) loss_tracker = LossForValidTracker(logger) valid_tracker.add_observer(loss_tracker) saver = Hdf5Saver(p.trainable_parameters, 5000, 'ptb_parameters.hdf5', logger) trainable_parameters = dict(p.trainable_parameters) sparse_sgd_step = SparseSgdStep([trainable_parameters['embd_W']], FixedValuePolicy(0.01)) del trainable_parameters['embd_W'] nag_step = NagStep(trainable_parameters.values(), FixedValuePolicy(0.01), momentum_policy) # nag_step = SgdStep(trainable_parameters.values(), learning_rate_policy) data_block.blocking_contexts = nag_step.blocking_contexts + sparse_sgd_step.blocking_contexts criterion = MaxIterCriterion(20000) optimizer = Optimizer(criterion, model) optimizer.add_observer(sparse_sgd_step) optimizer.add_observer(nag_step) optimizer.add_observer(train_loss_tracker) optimizer.add_observer(valid_tracker) optimizer.add_observer(saver) optimizer.add_observer(criterion) optimizer.optimize() for device_id in xrange(cudart.cuda_get_device_count()): cudart.cuda_set_device(device_id) cudart.cuda_device_synchronize()
c_fwd_repeat_block, h_fwd_repeat_block, fwd_lstm_block, c_bwd_repeat_block, h_bwd_repeat_block, bwd_lstm_block, seq_hstack, seq_dot_block, seq_sce_block]) logger = get_logger('train.log') momentum_policy = FixedValuePolicy(0.95) train_loss_tracker = TrainLossTracker(model, 100, logger) valid_tracker = ValidTracker(model, 500, logger) loss_tracker = LossForValidTracker(logger) valid_tracker.add_observer(loss_tracker) saver = Hdf5Saver(p.trainable_parameters, 5000, 'ptb_parameters.hdf5', logger) trainable_parameters = dict(p.trainable_parameters) sparse_sgd_step = SparseSgdStep([trainable_parameters['embd_W']], FixedValuePolicy(0.01)) del trainable_parameters['embd_W'] nag_step = NagStep(trainable_parameters.values(), FixedValuePolicy(0.01), momentum_policy) # nag_step = SgdStep(trainable_parameters.values(), learning_rate_policy) data_block.blocking_contexts = nag_step.blocking_contexts + sparse_sgd_step.blocking_contexts criterion = MaxIterCriterion(20000) optimizer = Optimizer(criterion, model) optimizer.add_observer(sparse_sgd_step) optimizer.add_observer(nag_step) optimizer.add_observer(train_loss_tracker) optimizer.add_observer(valid_tracker) optimizer.add_observer(saver) optimizer.add_observer(criterion) optimizer.optimize() for device_id in xrange(cudart.cuda_get_device_count()): cudart.cuda_set_device(device_id) cudart.cuda_device_synchronize()
logger = get_logger('ukr_char_lstm_train.log') # learning_rate_policy = FixedValuePolicy(0.0005) learning_rate_policy = FixedValuePolicy(0.000001) # momentum_policy = ScheduledValuePolicy({0: 0.9}, 'momentum', logger) momentum_policy = ScheduledValuePolicy({0: 0.99}, 'momentum', logger) saver = Hdf5Saver(p.parameters, 200, 'ukr_char_lstm.hdf5', logger) criterion = MaxIterCriterion(5000000000) sgd_step = SparseSgdStep([p['embd_W']], learning_rate_policy) nag_params = dict(p.trainable_parameters) del nag_params['embd_W'] nag_step = NagStep(nag_params.values(), learning_rate_policy, momentum_policy) data_block.blocking_contexts = nag_step.blocking_contexts + sgd_step.blocking_contexts train_loss_tracker = TrainLossTracker(model, 25, logger) class DeppendSetter(object): def notify(self): data_block.blocking_contexts = nag_step.blocking_contexts + sgd_step.blocking_contexts optimizer = Optimizer(criterion, model) optimizer.add_observer(momentum_policy) optimizer.add_observer(sgd_step) optimizer.add_observer(nag_step) optimizer.add_observer(DeppendSetter()) optimizer.add_observer(train_loss_tracker) optimizer.add_observer(saver) optimizer.add_observer(criterion) optimizer.optimize() for device_id in xrange(cudart.cuda_get_device_count()): cudart.cuda_set_device(device_id) cudart.cuda_device_synchronize()
valid_loss_tracker = ValidLossTracker(logger) validator = Validator(model, 16000) validator.add_observer(valid_loss_tracker) saver = Hdf5Saver(p.trainable_parameters, 2000, 'drop_auto.hdf5', logger) trainable_parameters = dict(p.trainable_parameters) sparse_sgd_step = SparseSgdStep([trainable_parameters['embd_W']], FixedValuePolicy(0.01)) del trainable_parameters['embd_W'] nag_step = NagStep(trainable_parameters.values(), FixedValuePolicy(0.01), FixedValuePolicy(0.9)) data_block.blocking_contexts = nag_step.blocking_contexts + sparse_sgd_step.blocking_contexts class DependencySetter(object): def notify(self): data_block.blocking_contexts = nag_step.blocking_contexts + sparse_sgd_step.blocking_contexts criterion = MaxIterCriterion(2000000) optimizer = Optimizer(criterion, model) optimizer.add_observer(sparse_sgd_step) optimizer.add_observer(nag_step) optimizer.add_observer(DependencySetter()) optimizer.add_observer(train_loss_tracker) optimizer.add_observer(validator) optimizer.add_observer(saver) optimizer.add_observer(criterion) optimizer.optimize() for device_id in xrange(cudart.cuda_get_device_count()): cudart.cuda_set_device(device_id) cudart.cuda_device_synchronize()
# learning_rate_policy = FixedValuePolicy(0.0005) learning_rate_policy = FixedValuePolicy(0.000001) # momentum_policy = ScheduledValuePolicy({0: 0.9}, 'momentum', logger) momentum_policy = ScheduledValuePolicy({0: 0.99}, 'momentum', logger) saver = Hdf5Saver(p.parameters, 200, 'ukr_char_lstm.hdf5', logger) criterion = MaxIterCriterion(5000000000) sgd_step = SparseSgdStep([p['embd_W']], learning_rate_policy) nag_params = dict(p.trainable_parameters) del nag_params['embd_W'] nag_step = NagStep(nag_params.values(), learning_rate_policy, momentum_policy) data_block.blocking_contexts = nag_step.blocking_contexts + sgd_step.blocking_contexts train_loss_tracker = TrainLossTracker(model, 25, logger) class DeppendSetter(object): def notify(self): data_block.blocking_contexts = nag_step.blocking_contexts + sgd_step.blocking_contexts optimizer = Optimizer(criterion, model) optimizer.add_observer(momentum_policy) optimizer.add_observer(sgd_step) optimizer.add_observer(nag_step) optimizer.add_observer(DeppendSetter()) optimizer.add_observer(train_loss_tracker) optimizer.add_observer(saver) optimizer.add_observer(criterion) optimizer.optimize() for device_id in xrange(cudart.cuda_get_device_count()): cudart.cuda_set_device(device_id) cudart.cuda_device_synchronize()