def make_model(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1): "Helper: Construct a model from hyperparameters." c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) model = EncoderDecoder( Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)), nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)), Generator(d_model, tgt_vocab)) # This was important from their code. # Initialize parameters with Glorot / fan_avg. for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) """https://zhuanlan.zhihu.com/p/74274453 #權值初始化 Xavier均勻分佈""" return model
def encode(labels, classes): encoder_decoder = EncoderDecoder() encoder_decoder.initialize_encode_and_decode_maps_from(classes) encoded_labels = [] for label in labels: encoded_labels.append(encoder_decoder.encode(label)) return encoded_labels
def __init__(self, parameter_dict): self.parameter_dict = parameter_dict self.id2image = parameter_dict["x"] self.first_word = parameter_dict["first_word"] self.target = parameter_dict["target"] self.vocab = parameter_dict["vocab"] self.embed = parameter_dict["embed"] self.epoch = parameter_dict["epoch"] self.hidden = parameter_dict["hidden"] self.minibatch = parameter_dict["minibatch"] self.generation_limit = parameter_dict["generation_limit"] self.use_gpu = parameter_dict["use_gpu"] self.gpu_id = parameter_dict["gpu_id"] self.choose_model = parameter_dict["choose_model"] self.common_function = CommonFunction() self.model = "ChainerImageCaption" self.trg_batch = [] self.trg_vocab = [] self.is_training = True self.generation_limit = 0 self.encdec = EncoderDecoder(self.vocab, self.embed, self.hidden, self.choose_model, self.use_gpu, self.gpu_id) if self.use_gpu: self.encdec.to_gpu() self.__set_gpu()
def make_model(src_vocab, tgt_vocab, emb_size=256, hidden_size=512, num_layers=1, dropout=0.1): "Helper: Construct a model from hyperparameters." attention = BahdanauAttention(hidden_size) model = EncoderDecoder( Encoder(emb_size, hidden_size, num_layers=num_layers, dropout=dropout), Decoder(emb_size, hidden_size, attention, num_layers=num_layers, dropout=dropout), nn.Embedding(src_vocab, emb_size), nn.Embedding(tgt_vocab, emb_size), Generator(hidden_size, tgt_vocab)) return model.cuda() if USE_CUDA else model
def train(self): trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab) trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') encdec = EncoderDecoder(self.vocab, self.embed, self.hidden) if self.word2vecFlag: self.copy_model(self.word2vec, encdec.enc) self.copy_model(self.word2vec, encdec.dec, dec_flag=True) for epoch in range(self.epoch): trace('epoch %d/%d: ' % (epoch + 1, self.epoch)) trained = 0 gen1 = gens.word_list(self.source) gen2 = gens.word_list(self.target) gen3 = gens.batch( gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch) opt = optimizers.AdaGrad(lr=0.01) opt.setup(encdec) opt.add_hook(optimizer.GradientClipping(5)) random_number = random.randint(0, self.minibatch - 1) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) # If you use the ipython note book you hace to use the forward function # hyp_batch, loss = self.forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0) hyp_batch, loss = self.forward_implement( src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0) loss.backward() opt.update() self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch) trained += K trace('saving model ...') prefix = self.model src_vocab.save(prefix + '.srcvocab') trg_vocab.save(prefix + '.trgvocab') encdec.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', encdec) trace('finished.')
def test(self): trace('loading model ...') src_vocab = Vocabulary.load(self.model + '.srcvocab') trg_vocab = Vocabulary.load(self.model + '.trgvocab') encdec = EncoderDecoder.load_spec(self.model + '.spec') serializers.load_hdf5(self.model + '.weights', encdec) trace('generating translation ...') generated = 0 with open(self.target, 'w') as fp: for src_batch in gens.batch(gens.word_list(self.source), self.minibatch): src_batch = fill_batch(src_batch) K = len(src_batch) trace('sample %8d - %8d ...' % (generated + 1, generated + K)) hyp_batch = self.forward(src_batch, None, src_vocab, trg_vocab, encdec, False, self.generation_limit) source_cuont = 0 for hyp in hyp_batch: hyp.append('</s>') hyp = hyp[:hyp.index('</s>')] print("src : " + "".join(src_batch[source_cuont]).replace("</s>", "")) print('hyp : ' + ''.join(hyp)) print(' '.join(hyp), file=fp) source_cuont = source_cuont + 1 generated += K trace('finished.')
def test(self): trace('loading model ...') src_vocab = Vocabulary.load(self.model + '.srcvocab') trg_vocab = Vocabulary.load(self.model + '.trgvocab') encdec = EncoderDecoder.load_spec(self.model + '.spec') serializers.load_hdf5(self.model + '.weights', encdec) trace('generating translation ...') generated = 0 with open(self.target, 'w') as fp: for src_batch in gens.batch(gens.word_list(self.source), self.minibatch): src_batch = fill_batch(src_batch) K = len(src_batch) trace('sample %8d - %8d ...' % (generated + 1, generated + K)) hyp_batch = self.forward(src_batch, None, src_vocab, trg_vocab, encdec, False, self.generation_limit) source_cuont = 0 for hyp in hyp_batch: hyp.append('</s>') hyp = hyp[:hyp.index('</s>')] print("src : " + "".join(src_batch[source_cuont]).replace("</s>", "")) print('hyp : ' +''.join(hyp)) print(' '.join(hyp), file=fp) source_cuont = source_cuont + 1 generated += K trace('finished.')
def __predict_sentence(self, src_batch): dialogue = EncoderDecoderModelForwardSlack(self.parameter) src_vocab = Vocabulary.load(self.model_name + '.srcvocab') trg_vocab = Vocabulary.load(self.model_name + '.trgvocab') model = EncoderDecoder.load_spec(self.model_name + '.spec') serializers.load_hdf5(dialogue.model + '.weights', model) hyp_batch = dialogue.forward(src_batch, None, src_vocab, trg_vocab, model, False, self.generation_limit) return hyp_batch
def train(self): trace('making vocabularies ...') src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab) trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') encdec = EncoderDecoder(self.vocab, self.embed, self.hidden) if self.word2vecFlag: self.copy_model(self.word2vec, encdec.enc) self.copy_model(self.word2vec, encdec.dec, dec_flag=True) else: encdec = self.encdec for epoch in range(self.epoch): trace('epoch %d/%d: ' % (epoch + 1, self.epoch)) trained = 0 gen1 = gens.word_list(self.source) gen2 = gens.word_list(self.target) gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch) opt = optimizers.AdaGrad(lr = 0.01) opt.setup(encdec) opt.add_hook(optimizer.GradientClipping(5)) random_number = random.randint(0, self.minibatch - 1) for src_batch, trg_batch in gen3: src_batch = fill_batch(src_batch) trg_batch = fill_batch(trg_batch) K = len(src_batch) hyp_batch, loss = self.forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0) loss.backward() opt.update() if trained == 0: self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch) trained += K trace('saving model ...') prefix = self.model src_vocab.save(prefix + '.srcvocab') trg_vocab.save(prefix + '.trgvocab') encdec.save_spec(prefix + '.spec') serializers.save_hdf5(prefix + '.weights', encdec) trace('finished.')
def _make_model(self, num_tgt_chars, N, d_model, d_ff, h, dropout): """ :param num_tgt_chars: output space :param N: number of decoder and encoder layers :param d_model: model dimensionality :param d_ff: hidden size of the feed-forward neural network :param h: number of attention heads :param dropout: dropout rate :return: model """ c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) if self.config.USE_RESNET: feature_extractor = ResNet(block=BasicBlock, layers=self.config.RESNET_LAYERS, d_model=self.config.D_MODEL) else: feature_extractor = FeatureExtractionNetwork(d_model=self.config.D_MODEL) direction_embed = Embeddings(d_model, 2) model = EncoderDecoder( encoder=Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), decoder=Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), tgt_embed=nn.Sequential(Embeddings(d_model, num_tgt_chars), c(position)), generator=PredictionLayer(d_model, num_tgt_chars), feature_extractor=feature_extractor, prediction_layer=PredictionLayer(d_model, len(Dataset.CHAR_ID_MAP)), bidirectional_decoding=self.config.BIDIRECTIONAL_DECODING, direction_embed=direction_embed, device=self.device ) for p in model.parameters(): if p.dim() > 1: nn.init.xavier_normal_(p) logging.info("Model created") return model
def make_model(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1): "Helper: Construct a model from hyperparameters" c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) ff = PositionWiseFeedForward(d_model, dropout) position = PositionalEncoding(d_model, dropout) model = EncoderDecoder( Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), Decoder(DecoderLayer(d_model, c(attn), c(ff), dropout), N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)), nn.Sequential(Embeddings(d_model, tgt_vocab, c(position))), Generator(d_model, tgt_vocab)) for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform(p) return model
def make_model(src_vocab, tgt_vocab, emb_size=300, hidden_size=512, num_layers=1, dropout=0.1): "Helper: Construct a model from hyperparameters." attention = BahdanauAttention(hidden_size) use_cuda = torch.cuda.is_available() model = EncoderDecoder( Encoder(emb_size, hidden_size, num_layers=num_layers, dropout=dropout), Decoder(emb_size, hidden_size, attention, num_layers=num_layers, dropout=dropout), BatchWordEmbeddings(src_vocab, emb_dim=emb_size, use_cuda=use_cuda), BatchWordEmbeddings(tgt_vocab, emb_dim=emb_size, use_cuda=use_cuda), Generator(hidden_size, len(tgt_vocab))) return model.cuda() if use_cuda else model
def make_model(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1): c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) model = EncoderDecoder( Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N), nn.Sequential(Embeddings(d_model, src_vocab), c(position)), nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)), Generator(d_model, tgt_vocab)) # This was important from their code. # Initialize parameters with Glorot / fan_avg. for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) return model
def __judge_print(self): if len(self.data) >= 1 and "text" in self.data[0]: print(self.data[0]["text"]) if "chainer:" in self.data[0]["text"]: # input sentence src_batch = self.__input_sentence() #predict hyp_batch = self.__predict_sentence(src_batch) #show predict word word = ''.join(hyp_batch[0]).replace("</s>", "") print(self.sc.api_call("chat.postMessage", user=self.usr, channel = self.chan, text = word)) if "chainer_train" in self.data[0]["text"]: self.__setting_parameter() model = EncoderDecoder.load_spec(self.model_name + '.spec') dialogue = EncoderDecoderModelForwardSlack(self.parameter) serializers.load_hdf5(dialogue.model + '.weights', model) dialogue.encdec = model dialogue.word2vecFlag = False dialogue.train()
def test(self): trace('loading model ...') self.trg_vocab = Vocabulary.load("model/" + self.model + '.trgvocab') self.batch_size = len(trg_batch) encdec = EncoderDecoder.load_spec("model/" + self.model + '.spec') serializers.load_hdf5("model/" + self.model + '.weights', encdec) trace('generating translation ...') generated = 0 with open(self.target, 'w') as fp: self.__forward_img() trace('sample %8d ...' % (generated + 1)) hyp_batch = self.__forward_word(self.trg_batch, encdec, False, self.generation_limit) for hyp in hyp_batch: hyp.append('</s>') hyp = hyp[:hyp.index('</s>')] print('hyp : ' +''.join(hyp)) print(' '.join(hyp), file=fp) trace('finished.')
def __judge_print(self): if len(self.data) >= 1 and "text" in self.data[0]: print(self.data[0]["text"]) if "chainer:" in self.data[0]["text"]: # input sentence src_batch = self.__input_sentence() #predict hyp_batch = self.__predict_sentence(src_batch) #show predict word word = ''.join(hyp_batch[0]).replace("</s>", "") print( self.sc.api_call("chat.postMessage", user=self.usr, channel=self.chan, text=word)) if "chainer_train" in self.data[0]["text"]: self.__setting_parameter() model = EncoderDecoder.load_spec(self.model_name + '.spec') dialogue = EncoderDecoderModelForwardSlack(self.parameter) serializers.load_hdf5(dialogue.model + '.weights', model) dialogue.encdec = model dialogue.word2vecFlag = False dialogue.train()
def test(self): trace('loading model ...') self.trg_vocab = Vocabulary.load("model/" + self.model + '.trgvocab') self.batch_size = len(trg_batch) encdec = EncoderDecoder.load_spec("model/" + self.model + '.spec') serializers.load_hdf5("model/" + self.model + '.weights', encdec) trace('generating translation ...') generated = 0 with open(self.target, 'w') as fp: self.__forward_img() trace('sample %8d ...' % (generated + 1)) hyp_batch = self.__forward_word(self.trg_batch, encdec, False, self.generation_limit) for hyp in hyp_batch: hyp.append('</s>') hyp = hyp[:hyp.index('</s>')] print('hyp : ' + ''.join(hyp)) print(' '.join(hyp), file=fp) trace('finished.')
# Imports from EncoderDecoder import EncoderDecoder import argparse import sys # Set up Encoder/Decoder object. ed = EncoderDecoder() # Set up parser object and arguments. parser = argparse.ArgumentParser(description='Encode/Decode your data.') parser.add_argument('-e', '--encode', metavar='string', type=str, nargs='+', help='Pass string to encode.') parser.add_argument('-d', '--decode', metavar='int', type=int, nargs='+', help='Pass array of integers to decode.') # Handling no input. if len(sys.argv) == 1: parser.print_help() parser.exit() args = parser.parse_args() # Decode if (args.decode):
class EncoderDecoderModel: def __init__(self, parameter_dict): self.parameter_dict = parameter_dict self.id2image = parameter_dict["x"] self.first_word = parameter_dict["first_word"] self.target = parameter_dict["target"] self.vocab = parameter_dict["vocab"] self.embed = parameter_dict["embed"] self.epoch = parameter_dict["epoch"] self.hidden = parameter_dict["hidden"] self.minibatch = parameter_dict["minibatch"] self.generation_limit = parameter_dict["generation_limit"] self.use_gpu = parameter_dict["use_gpu"] self.gpu_id = parameter_dict["gpu_id"] self.choose_model = parameter_dict["choose_model"] self.common_function = CommonFunction() self.model = "ChainerImageCaption" self.trg_batch = [] self.trg_vocab = [] self.is_training = True self.generation_limit = 0 self.encdec = EncoderDecoder(self.vocab, self.embed, self.hidden, self.choose_model, self.use_gpu, self.gpu_id) if self.use_gpu: self.encdec.to_gpu() self.__set_gpu() def __set_gpu(self): XP.set_library(self.use_gpu, self.gpu_id) def __forward_img(self): x = XP.farray(self.id2image.data) return self.encdec.encode(x) def __forward_word(self, trg_batch, encdec, is_training, generation_limit): trg_stoi = self.trg_vocab.stoi trg_itos = self.trg_vocab.itos t = XP.iarray([trg_stoi('<s>') for _ in range(self.batch_size)]) hyp_batch = [[] for _ in range(self.batch_size)] trg_len = len(trg_batch[0]) if trg_batch else 0 if is_training: loss = XP.fzeros(()) for l in range(trg_len): y = encdec.decode(t) t = XP.iarray([ trg_stoi(trg_batch[k][l]) for k in range(self.batch_size) ]) loss += functions.softmax_cross_entropy(y, t) output = cuda.to_cpu(y.data.argmax(1)) for k in range(self.batch_size): hyp_batch[k].append(trg_itos(output[k])) return loss, hyp_batch else: while len(hyp_batch[0]) < generation_limit: y = encdec.decode(t) output = cuda.to_cpu(y.data.argmax(1)) t = self.common_function.my_array(output, np.int32) for k in range(self.batch_size): hyp_batch[k].append(trg_itos(output[k])) if all(hyp_batch[k][-1] == '</s>' for k in range(self.batch_size)): break return hyp_batch def train(self, epoch): trace('making vocabularies ...') self.trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') trace('epoch %d/%d: ' % (epoch + 1, self.epoch)) opt = optimizers.AdaGrad(lr=0.01) opt.setup(self.encdec) opt.add_hook(optimizer.GradientClipping(5)) gen1 = gens.word_list(self.target) gen = gens.batch(gen1, self.minibatch) for trg_batch in gen: self.batch_size = len(trg_batch) self.trg_batch = fill_batch(trg_batch) if len(trg_batch) != self.minibatch: break self.encdec.clear(self.batch_size) self.__forward_img() self.encdec.reset(self.batch_size) loss, hyp_batch = self.__forward_word(self.trg_batch, self.encdec, True, 0) loss.backward() opt.update() K = len(self.trg_batch) - 2 self.print_out(K, hyp_batch, epoch) def save_model(self): trace('saving model ...') prefix = self.model self.trg_vocab.save("model/" + prefix + '.trgvocab') self.encdec.save_spec("model/" + prefix + '.spec') serializers.save_hdf5("model/" + prefix + '.weights', self.encdec) trace('finished.') def test(self): trace('loading model ...') self.trg_vocab = Vocabulary.load("model/" + self.model + '.trgvocab') self.batch_size = len(trg_batch) encdec = EncoderDecoder.load_spec("model/" + self.model + '.spec') serializers.load_hdf5("model/" + self.model + '.weights', encdec) trace('generating translation ...') generated = 0 with open(self.target, 'w') as fp: self.__forward_img() trace('sample %8d ...' % (generated + 1)) hyp_batch = self.__forward_word(self.trg_batch, encdec, False, self.generation_limit) for hyp in hyp_batch: hyp.append('</s>') hyp = hyp[:hyp.index('</s>')] print('hyp : ' + ''.join(hyp)) print(' '.join(hyp), file=fp) trace('finished.') def print_out(self, K, hyp_batch, epoch): if len(self.trg_batch) - 2 < K: K = len(self.trg_batch) - 2 if len(hyp_batch) - 2 < K: K = len(hyp_batch) - 2 trace('epoch %3d/%3d, sample %8d' % (epoch, self.epoch, K + 1)) # trace('epoch %3d/%3d, sample %8d' % (i_epoch + 1, self.epoch, trained + 1)) trace(' trg = ' + ' '.join([x if x != '</s>' else '*' for x in self.trg_batch[K]])) trace(' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[K]]))
# Local imports from DefaultCNN import DefaultCNN from PureCNN import PureCNN from EncoderDecoder import EncoderDecoder from ShipDataset import ShipDataset import utils GPU_AVAILABLE = torch.cuda.is_available() and torch.cuda.device_count() > 0 # Images are 768x768 IMAGE_SIZE = 768 EPOCH = 3 BATCH_SIZE = 32 LEARNING_RATE = 0.0001 model = EncoderDecoder() if GPU_AVAILABLE: model = model.cuda() # Define the mask for the train/validate split data = pd.read_csv("../data/train_ship_segmentations_v2.csv") mask = np.random.rand(len(data)) < 0.9 mask2 = np.random.rand(len(data)) >= 0.9 total_count = len(data) train_count = (mask == True).sum() val_count = total_count - train_count val_count = (mask2 == False).sum() data = None # Prepare the dataset and the dataloader train_data = ShipDataset(
class EncoderDecoderModel: def __init__(self, parameter_dict): self.parameter_dict = parameter_dict self.id2image = parameter_dict["x"] self.first_word = parameter_dict["first_word"] self.target = parameter_dict["target"] self.vocab = parameter_dict["vocab"] self.embed = parameter_dict["embed"] self.epoch = parameter_dict["epoch"] self.hidden = parameter_dict["hidden"] self.minibatch = parameter_dict["minibatch"] self.generation_limit = parameter_dict["generation_limit"] self.use_gpu = parameter_dict["use_gpu"] self.gpu_id = parameter_dict["gpu_id"] self.choose_model = parameter_dict["choose_model"] self.common_function = CommonFunction() self.model = "ChainerImageCaption" self.trg_batch = [] self.trg_vocab = [] self.is_training = True self.generation_limit = 0 self.encdec = EncoderDecoder(self.vocab, self.embed, self.hidden, self.choose_model, self.use_gpu, self.gpu_id) if self.use_gpu: self.encdec.to_gpu() self.__set_gpu() def __set_gpu(self): XP.set_library(self.use_gpu, self.gpu_id) def __forward_img(self): x = XP.farray(self.id2image.data) return self.encdec.encode(x) def __forward_word(self, trg_batch, encdec, is_training, generation_limit): trg_stoi = self.trg_vocab.stoi trg_itos = self.trg_vocab.itos t = XP.iarray([trg_stoi('<s>') for _ in range(self.batch_size)]) hyp_batch = [[] for _ in range(self.batch_size)] trg_len = len(trg_batch[0]) if trg_batch else 0 if is_training: loss = XP.fzeros(()) for l in range(trg_len): y = encdec.decode(t) t = XP.iarray([trg_stoi(trg_batch[k][l]) for k in range(self.batch_size)]) loss += functions.softmax_cross_entropy(y, t) output = cuda.to_cpu(y.data.argmax(1)) for k in range(self.batch_size): hyp_batch[k].append(trg_itos(output[k])) return loss, hyp_batch else: while len(hyp_batch[0]) < generation_limit: y = encdec.decode(t) output = cuda.to_cpu(y.data.argmax(1)) t = self.common_function.my_array(output, np.int32) for k in range(self.batch_size): hyp_batch[k].append(trg_itos(output[k])) if all(hyp_batch[k][-1] == '</s>' for k in range(self.batch_size)): break return hyp_batch def train(self, epoch): trace('making vocabularies ...') self.trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab) trace('making model ...') trace('epoch %d/%d: ' % (epoch + 1, self.epoch)) opt = optimizers.AdaGrad(lr=0.01) opt.setup(self.encdec) opt.add_hook(optimizer.GradientClipping(5)) gen1 = gens.word_list(self.target) gen = gens.batch(gen1, self.minibatch) for trg_batch in gen: self.batch_size = len(trg_batch) self.trg_batch = fill_batch(trg_batch) if len(trg_batch) != self.minibatch: break self.encdec.clear(self.batch_size) self.__forward_img() self.encdec.reset(self.batch_size) loss, hyp_batch = self.__forward_word(self.trg_batch, self.encdec, True, 0) loss.backward() opt.update() K = len(self.trg_batch) - 2 self.print_out(K, hyp_batch, epoch) def save_model(self): trace('saving model ...') prefix = self.model self.trg_vocab.save("model/" + prefix + '.trgvocab') self.encdec.save_spec("model/" + prefix + '.spec') serializers.save_hdf5("model/" + prefix + '.weights', self.encdec) trace('finished.') def test(self): trace('loading model ...') self.trg_vocab = Vocabulary.load("model/" + self.model + '.trgvocab') self.batch_size = len(trg_batch) encdec = EncoderDecoder.load_spec("model/" + self.model + '.spec') serializers.load_hdf5("model/" + self.model + '.weights', encdec) trace('generating translation ...') generated = 0 with open(self.target, 'w') as fp: self.__forward_img() trace('sample %8d ...' % (generated + 1)) hyp_batch = self.__forward_word(self.trg_batch, encdec, False, self.generation_limit) for hyp in hyp_batch: hyp.append('</s>') hyp = hyp[:hyp.index('</s>')] print('hyp : ' +''.join(hyp)) print(' '.join(hyp), file=fp) trace('finished.') def print_out(self, K, hyp_batch, epoch): if len(self.trg_batch) - 2 < K: K = len(self.trg_batch) - 2 if len(hyp_batch) - 2 < K: K = len(hyp_batch) - 2 trace('epoch %3d/%3d, sample %8d' % (epoch, self.epoch, K + 1)) # trace('epoch %3d/%3d, sample %8d' % (i_epoch + 1, self.epoch, trained + 1)) trace(' trg = ' + ' '.join([x if x != '</s>' else '*' for x in self.trg_batch[K]])) trace(' hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[K]]))