def translate_model(queue, rqueue, pid, configuration, normalize): rng = numpy.random.RandomState(1234) enc_dec = EncoderDecoder(rng, **configuration) enc_dec.build_sampler() enc_dec.load(path=configuration['saveto_best']) search_model = BeamSearch(enc_dec=enc_dec, \ beam_size=configuration['beam_size'], \ maxlen=3*configuration['seq_len_src'], \ stochastic=False, configuration=configuration) def _translate(seq): outputs, scores = search_model.apply(numpy.array(seq).T) if normalize: lengths = numpy.array([len(s) for s in outputs]) scores = scores / lengths sidx = numpy.argmin(scores) return outputs[sidx][:-1] while True: req = queue.get() if req is None: break idx, x = req[0], req[1] print pid, '-', idx seq = _translate(x) rqueue.put((idx, seq)) return
parser.add_argument("--model", help="Model to use") parser.add_argument('source', type=str) parser.add_argument('target', type=str) parser.add_argument('alignment', type=str) args = parser.parse_args() configuration = getattr(configurations, args.proto)() if args.state: configuration.update(eval(open(args.state).read())) logger.info("\nModel options:\n{}".format(pprint.pformat(configuration))) rng = numpy.random.RandomState(1234) enc_dec = EncoderDecoder(rng, **configuration) enc_dec.build_sampler() # options to use other trained models if args.model: enc_dec.load(path=args.model) else: enc_dec.load(path=configuration['saveto_best']) test_align = Align( enc_dec=enc_dec, with_attention=configuration['with_attention'], with_coverage=configuration['with_coverage'], coverage_dim=configuration['coverage_dim'], coverage_type=configuration['coverage_type'], max_fertility=configuration['max_fertility'],
def main(configuration, ps_device=None, devices=None): prefer_to_model_parallel = configuration['prefer_to_model_parallel'] l1_reg_weight = configuration['l1_reg_weight'] l2_reg_weight = configuration['l2_reg_weight'] # time_steps*nb_samples src = K.placeholder(shape=(None, None), dtype='int32') src_mask = K.placeholder(shape=(None, None)) trg = K.placeholder(shape=(None, None), dtype='int32') trg_mask = K.placeholder(shape=(None, None)) # for fast training of new parameters ite = K.placeholder(ndim=0) enc_dec = EncoderDecoder(**configuration) softmax_output_num_sampled = configuration['softmax_output_num_sampled'] if devices: if prefer_to_model_parallel: enc_dec.build_trainer_with_model_parallel( src, src_mask, trg, trg_mask, ite, ps_device, devices, l1_reg_weight=l1_reg_weight, l2_reg_weight=l2_reg_weight) else: # clone the input src = [ K.placeholder(shape=(None, None), dtype='int32') for _ in devices ] src_mask = [K.placeholder(shape=(None, None)) for _ in devices] trg = [ K.placeholder(shape=(None, None), dtype='int32') for _ in devices ] trg_mask = [K.placeholder(shape=(None, None)) for _ in devices] enc_dec.build_trainer_with_data_parallel( src, src_mask, trg, trg_mask, ite, devices, l1_reg_weight=l1_reg_weight, l2_reg_weight=l2_reg_weight, softmax_output_num_sampled=softmax_output_num_sampled) else: enc_dec.build_trainer( src, src_mask, trg, trg_mask, ite, l1_reg_weight=l1_reg_weight, l2_reg_weight=l2_reg_weight, softmax_output_num_sampled=softmax_output_num_sampled) enc_dec.build_sampler() if configuration['reload']: enc_dec.load() ''' # comment for fast training sample_search = BeamSearch(enc_dec=enc_dec, configuration=configuration, beam_size=1, maxlen=configuration['seq_len_src'], stochastic=True) valid_search = BeamSearch(enc_dec=enc_dec, configuration=configuration, beam_size=configuration['beam_size'], maxlen=3 * configuration['seq_len_src'], stochastic=False) sampler = Sampler(sample_search, **configuration) bleuvalidator = BleuValidator(valid_search, **configuration) ''' # train function train_fn = enc_dec.train_fn # train data ds = DStream(**configuration) # valid data ''' # comment for fast training vs = get_devtest_stream(data_type='valid', input_file=None, **configuration) ''' iters = args.start valid_bleu_best = -1 epoch_best = -1 iters_best = -1 max_epochs = configuration['finish_after'] for epoch in range(max_epochs): for x, x_mask, y, y_mask in ds.get_iterator(): last_time = time.time() # for data parallel, we need to split the data into #num devices part if devices and not prefer_to_model_parallel: # ignore the case that the number of samples is less than the number of devices num_devices = len(devices) num_samples = len(x) if num_samples < num_devices: logger.warn( 'epoch %d \t updates %d ignored current mini-batch, since its number of samples (%d) < the number of devices (%d)' % (epoch, iters, num_samples, num_devices)) continue inputs = [] for data in (x, x_mask, y, y_mask): parts = split(data, num_devices) parts = [item.T for item in parts] inputs.extend(parts) else: inputs = [x.T, x_mask.T, y.T, y_mask.T] tc = train_fn(inputs) cur_time = time.time() iters += 1 num_of_words = np.prod(x.shape) duration = cur_time - last_time words_per_sec = int(num_of_words / duration) logger.info( 'epoch %d \t updates %d train cost %.4f use time %.4f sec, %d words/sec' % (epoch, iters, tc[0], duration, words_per_sec)) if devices and not prefer_to_model_parallel: # when do model parallel, only return the total cost for i, device in enumerate(devices): logger.info( 'epoch %d \t updates %d device %s train cost %.4f' % (epoch, iters, device, tc[i + 1])) ''' # Commented for fast training if iters % configuration['save_freq'] == 0: enc_dec.save() if iters % configuration['sample_freq'] == 0: sampler.apply(x, y) if iters < configuration['val_burn_in']: continue if (iters <= configuration['val_burn_in_fine'] and iters % configuration['valid_freq'] == 0) \ or (iters > configuration['val_burn_in_fine'] and iters % configuration['valid_freq_fine'] == 0): valid_bleu = bleuvalidator.apply(vs, configuration['valid_out']) os.system('mkdir -p results/%d' % iters) os.system('mv %s* %s results/%d' % (configuration['valid_out'], configuration['saveto'], iters)) logger.info('valid_test \t epoch %d \t updates %d valid_bleu %.4f' % (epoch, iters, valid_bleu)) if valid_bleu > valid_bleu_best: valid_bleu_best = valid_bleu epoch_best = epoch iters_best = iters enc_dec.save(path=configuration['saveto_best']) ''' logger.info('final result: epoch %d \t updates %d valid_bleu_best %.4f' % (epoch_best, iters_best, valid_bleu_best))
def main(configuration, ps_device=None, devices=None): mkl_multinode = configuration['mkl_multinode'] if mkl_multinode == True: mlsl_obj = mlsl.MLSL() mlsl_obj.init() node_idx = mlsl_obj.get_process_idx() node_num = mlsl_obj.get_process_count() print 'rank ', node_idx print 'nodes ', node_num dist = mlsl_obj.create_distribution(node_num, 1) else: mlsl_obj = None dist = None prefer_to_model_parallel = configuration['prefer_to_model_parallel'] l1_reg_weight = configuration['l1_reg_weight'] l2_reg_weight = configuration['l2_reg_weight'] # time_steps*nb_samples src = K.placeholder(shape=(None, None), dtype='int32') src_mask = K.placeholder(shape=(None, None)) trg = K.placeholder(shape=(None, None), dtype='int32') trg_mask = K.placeholder(shape=(None, None)) # for fast training of new parameters ite = K.placeholder(ndim=0) enc_dec = EncoderDecoder(**configuration) softmax_output_num_sampled = configuration['softmax_output_num_sampled'] if devices: if prefer_to_model_parallel: enc_dec.build_trainer_with_model_parallel( src, src_mask, trg, trg_mask, ite, ps_device, devices, l1_reg_weight=l1_reg_weight, l2_reg_weight=l2_reg_weight) else: # clone the input src = [ K.placeholder(shape=(None, None), dtype='int32') for _ in devices ] src_mask = [K.placeholder(shape=(None, None)) for _ in devices] trg = [ K.placeholder(shape=(None, None), dtype='int32') for _ in devices ] trg_mask = [K.placeholder(shape=(None, None)) for _ in devices] enc_dec.build_trainer_with_data_parallel( src, src_mask, trg, trg_mask, ite, devices, l1_reg_weight=l1_reg_weight, l2_reg_weight=l2_reg_weight, softmax_output_num_sampled=softmax_output_num_sampled) else: enc_dec.build_trainer( src, src_mask, trg, trg_mask, ite, l1_reg_weight=l1_reg_weight, l2_reg_weight=l2_reg_weight, softmax_output_num_sampled=softmax_output_num_sampled, mlsl_obj=mlsl_obj, dist=dist) enc_dec.build_sampler() if configuration['reload']: enc_dec.load() ''' # comment for fast training sample_search = BeamSearch(enc_dec=enc_dec, configuration=configuration, beam_size=1, maxlen=configuration['seq_len_src'], stochastic=True) valid_search = BeamSearch(enc_dec=enc_dec, configuration=configuration, beam_size=configuration['beam_size'], maxlen=3 * configuration['seq_len_src'], stochastic=False) sampler = Sampler(sample_search, **configuration) bleuvalidator = BleuValidator(valid_search, **configuration) ''' # train function train_fn = enc_dec.train_fn # train data ds = DStream(**configuration) # valid data ''' # comment for fast training vs = get_devtest_stream(data_type='valid', input_file=None, **configuration) ''' iters = args.start valid_bleu_best = -1 epoch_best = -1 iters_best = -1 max_epochs = configuration['finish_after'] logger.info("epochs %d" % (max_epochs)) fn = 'nmt_mkl_log' if mkl_multinode == True: if node_idx == 0: file = open(fn, 'w', 0) last_time = time.time() print('mkl multinode') else: file = open(fn, 'w', 0) last_time = time.time() print('mkl single node') for epoch in range(max_epochs): for x, x_mask, y, y_mask in ds.get_iterator(): iter_count = 0 #last_time = time.time() # for data parallel, we need to split the data into #num devices part if devices and not prefer_to_model_parallel: # ignore the case that the number of samples is less than the number of devices num_devices = len(devices) num_samples = len(x) if num_samples < num_devices: logger.warn( 'epoch %d \t updates %d ignored current mini-batch, since its number of samples (%d) < the number of devices (%d)' % (epoch, iters, num_samples, num_devices)) continue inputs = [] for data in (x, x_mask, y, y_mask): parts = split(data, num_devices) parts = [item.T for item in parts] inputs.extend(parts) else: inputs = [x.T, x_mask.T, y.T, y_mask.T] #print('train start') tc = train_fn(inputs) #print('train finish') iters += 1 #cur_time = time.time() #duration = cur_time - last_time #num_of_words = np.prod(x.shape) #words_per_sec = int(num_of_words / duration) #logger.info('epoch %d \t updates %d train cost %.4f use time %.4f sec, %d words/sec, data x %s, data y %s' # % (epoch, iters, tc[0], duration, words_per_sec, x.shape, y.shape)) ''' # Commented for fast training if iters % configuration['save_freq'] == 0: enc_dec.save() if iters % configuration['sample_freq'] == 0: sampler.apply(x, y) if iters < configuration['val_burn_in']: continue if (iters <= configuration['val_burn_in_fine'] and iters % configuration['valid_freq'] == 0) \ or (iters > configuration['val_burn_in_fine'] and iters % configuration['valid_freq_fine'] == 0): valid_bleu = bleuvalidator.apply(vs, configuration['valid_out']) os.system('mkdir -p results/%d' % iters) os.system('mv %s* %s results/%d' % (configuration['valid_out'], configuration['saveto'], iters)) logger.info('valid_test \t epoch %d \t updates %d valid_bleu %.4f' % (epoch, iters, valid_bleu)) if valid_bleu > valid_bleu_best: valid_bleu_best = valid_bleu epoch_best = epoch iters_best = iters enc_dec.save(path=configuration['saveto_best']) ''' ''' if mkl_multinode and node_idx == 0: file.write(str(tc[0])+'\n') else: file.write(str(tc[0])+'\n') ''' iter_count += 1 if mkl_multinode == True: if node_idx == 0: file.close() cur_time = time.time() duration = cur_time - last_time print('time one epoch ', duration) else: file.close() cur_time = time.time() duration = cur_time - last_time print('time one epoch ', duration) if mkl_multinode == True: mlsl_obj.delete_distribution(dist) mlsl_obj.finalize()
configuration = getattr(configurations, args.proto)() # added by Zhaopeng Tu, 2016-05-12 if args.state: configuration.update(eval(open(args.state).read())) logger.info("\nModel options:\n{}".format(pprint.pformat(configuration))) batch_size = configuration['batch_size'] src = T.lmatrix() src_mask = T.matrix() trg = T.lmatrix() trg_mask = T.matrix() rng = numpy.random.RandomState(1234) enc_dec = EncoderDecoder(rng, **configuration) enc_dec.build_trainer(src, src_mask, trg, trg_mask) enc_dec.build_sampler() if configuration['reload']: enc_dec.load() sample_search = BeamSearch(enc_dec=enc_dec, configuration=configuration, beam_size=1, maxlen=configuration['seq_len_src'], stochastic=True) valid_search = BeamSearch(enc_dec=enc_dec, configuration=configuration, beam_size=configuration['beam_size'], maxlen=3*configuration['seq_len_src'], stochastic=False)
def main(configuration, is_chief=False): l1_reg_weight = configuration['l1_reg_weight'] l2_reg_weight = configuration['l2_reg_weight'] # time_steps*nb_samples src = K.placeholder(shape=(None, None), dtype='int32') src_mask = K.placeholder(shape=(None, None)) trg = K.placeholder(shape=(None, None), dtype='int32') trg_mask = K.placeholder(shape=(None, None)) # for fast training of new parameters ite = K.placeholder(ndim=0) enc_dec = EncoderDecoder(**configuration) softmax_output_num_sampled = configuration['softmax_output_num_sampled'] enc_dec.build_trainer( src, src_mask, trg, trg_mask, ite, l1_reg_weight=l1_reg_weight, l2_reg_weight=l2_reg_weight, softmax_output_num_sampled=softmax_output_num_sampled) enc_dec.build_sampler() # Chief is responsible for initializing and loading model states if is_chief: init_op = tf.initialize_all_variables() init_fn = K.function(inputs=[], outputs=[init_op]) init_fn([]) if configuration['reload']: enc_dec.load() sample_search = BeamSearch(enc_dec=enc_dec, configuration=configuration, beam_size=1, maxlen=configuration['seq_len_src'], stochastic=True) valid_search = BeamSearch(enc_dec=enc_dec, configuration=configuration, beam_size=configuration['beam_size'], maxlen=3 * configuration['seq_len_src'], stochastic=False) sampler = Sampler(sample_search, **configuration) bleuvalidator = BleuValidator(valid_search, **configuration) # train function train_fn = enc_dec.train_fn if configuration['with_reconstruction'] and configuration[ 'with_fast_training']: fast_train_fn = enc_dec.fast_train_fn # train data ds = DStream(**configuration) # valid data vs = get_devtest_stream(data_type='valid', input_file=None, **configuration) iters = args.start valid_bleu_best = -1 epoch_best = -1 iters_best = -1 max_epochs = configuration['finish_after'] # TODO: use global iter and only the chief can save the model for epoch in range(max_epochs): for x, x_mask, y, y_mask in ds.get_iterator(): last_time = time.time() if configuration['with_reconstruction'] and configuration[ 'with_fast_training'] and iters < configuration[ 'fast_training_iterations']: if configuration['fix_base_parameters'] and not configuration[ 'with_tied_weights']: tc = fast_train_fn([x.T, x_mask.T, y.T, y_mask.T]) else: tc = fast_train_fn([x.T, x_mask.T, y.T, y_mask.T, iters]) else: tc = train_fn([x.T, x_mask.T, y.T, y_mask.T]) cur_time = time.time() iters += 1 logger.info( 'epoch %d \t updates %d train cost %.4f use time %.4f' % (epoch, iters, tc[0], cur_time - last_time)) if iters % configuration['save_freq'] == 0: enc_dec.save() if iters % configuration['sample_freq'] == 0: sampler.apply(x, y) if iters < configuration['val_burn_in']: continue if (iters <= configuration['val_burn_in_fine'] and iters % configuration['valid_freq'] == 0) \ or (iters > configuration['val_burn_in_fine'] and iters % configuration['valid_freq_fine'] == 0): valid_bleu = bleuvalidator.apply(vs, configuration['valid_out']) os.system('mkdir -p results/%d' % iters) os.system('mv %s* %s results/%d' % (configuration['valid_out'], configuration['saveto'], iters)) logger.info( 'valid_test \t epoch %d \t updates %d valid_bleu %.4f' % (epoch, iters, valid_bleu)) if valid_bleu > valid_bleu_best: valid_bleu_best = valid_bleu epoch_best = epoch iters_best = iters enc_dec.save(path=configuration['saveto_best']) logger.info('final result: epoch %d \t updates %d valid_bleu_best %.4f' % (epoch_best, iters_best, valid_bleu_best))
def build_model(configuration): model = EncoderDecoder(**configuration) return model
if __name__ == "__main__": args = parse_args() logger = logging.getLogger(__name__) logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.INFO) server_address = ('', args.port) httpd = BaseHTTPServer.HTTPServer(server_address, MTReqHandler) configuration = getattr(configurations, 'get_config_search_coverage')() if args.state: configuration.update(eval(open(args.state).read())) logger.info("\nModel options:\n{}".format(pprint.pformat(configuration))) enc_dec = EncoderDecoder(**configuration) enc_dec.build_sampler() enc_dec.load(path=args.model_path) search = BeamSearch(enc_dec=enc_dec, beam_size=args.beam_size, maxlen=3 * configuration['seq_len_src'], stochastic=False, configuration=configuration) sampler = Sampler(search, **configuration) httpd.sampler = sampler logger.info('Server starting..') httpd.serve_forever()