def prepare_training(self, batch_size, data_engine, collate_fn): self.train_data_loader = DataLoader(data_engine, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True, collate_fn=collate_fn, pin_memory=True) self.parameters = filter(lambda p: p.requires_grad, self.lm.parameters()) self.optimizer = build_optimizer(self.config["optimizer"], self.parameters, self.config["learning_rate"]) with open(self.log_file, 'w') as fw: fw.write("epoch,train_loss,valid_loss\n")
def __init__(self, batch_size, optimizer, learning_rate, train_data_engine, test_data_engine, dim_hidden, dim_embedding, vocab_size=None, attr_vocab_size=None, n_layers=1, bidirectional=False, model_dir="./model", log_dir="./log", is_load=True, replace_model=True, model='nlu-nlg', schedule='iterative', device=None, dir_name='test', f1_per_sample=False, dim_loss=False, with_intent=True, nlg_path=None): # Initialize attributes # model_dir = os.path.join(model_dir, dir_name) if not os.path.isdir(model_dir): os.makedirs(model_dir) self.model_dir = model_dir self.log_dir = log_dir self.dir_name = dir_name self.device = get_device(device) self.maskpredict = MaskPredict( dim_embedding=dim_embedding, dim_hidden=dim_hidden, # attr_vocab_size=attr_vocab_size, vocab_size=train_data_engine.tokenizer.get_vocab_size(), # n_slot_key=len(train_data_engine.slot_vocab), n_slot_key=len(train_data_engine.nlg_slot_vocab), n_intent=len(train_data_engine.intent_vocab), n_layers=n_layers, bidirectional=False, batch_size=batch_size) self.optimizer = torch.optim.Adam(self.maskpredict.parameters(), 3e-4, weight_decay=1e-4) self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=5, gamma=0.1) if is_load: print_time_info("Loading marginal model from %s" % self.model_dir) self.load_model(self.model_dir) else: pass # self.nlg = NLGRNN( # dim_embedding=dim_embedding, # dim_hidden=dim_hidden, # # attr_vocab_size=attr_vocab_size, # vocab_size=train_data_engine.tokenizer.get_vocab_size(), # # n_slot_key=len(train_data_engine.slot_vocab), # n_slot_key=len(train_data_engine.nlg_slot_vocab), # n_intent=len(train_data_engine.intent_vocab), # n_layers=n_layers, # bidirectional=False, # batch_size=batch_size) # pretrained_nlg = torch.load(nlg_path) # self.maskpredict.load_encoder(pretrained_nlg) self.train_data_engine = train_data_engine self.test_data_engine = test_data_engine self.train_nlg_data_loader = DataLoader( train_data_engine, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True, collate_fn=train_data_engine.collate_fn_nlg, pin_memory=True) self.test_nlg_data_loader = DataLoader( test_data_engine, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=True, collate_fn=test_data_engine.collate_fn_nlg, pin_memory=True) self.maskpredict_parameters = filter(lambda p: p.requires_grad, self.maskpredict.parameters()) self.maskpredict_optimizer = build_optimizer( optimizer, self.maskpredict_parameters, learning_rate) self.train_log_path = os.path.join(self.log_dir, "train_log.csv") self.valid_log_path = os.path.join(self.log_dir, "valid_log.csv") self.test_result_path = os.path.join(self.log_dir, "test_result.txt") with open(self.train_log_path, 'w') as file: file.write("epoch,loss\n") with open(self.valid_log_path, 'w') as file: file.write("epoch,loss\n")
def __init__( self, batch_size, optimizer, learning_rate, train_data_engine, test_data_engine, dim_hidden, dim_embedding, vocab_size=None, n_layers=1, model_dir="./model", log_dir="./log", is_load=True, replace_model=True, device=None, dir_name='test' ): # Initialize attributes self.data_engine = train_data_engine self.n_layers = n_layers self.log_dir = log_dir self.model_dir = model_dir self.dim_hidden = dim_hidden self.dim_embedding = dim_embedding self.vocab_size = vocab_size self.dir_name = dir_name self.device = get_device(device) self.lm = LMRNN( dim_embedding=dim_embedding, dim_hidden=dim_hidden, attr_vocab_size=None, vocab_size=vocab_size, n_layers=n_layers, bidirectional=False ) self.lm.to(self.device) self.parameters = filter( lambda p: p.requires_grad, self.lm.parameters()) self.optimizer = build_optimizer( optimizer, self.parameters, learning_rate) self.model_dir, self.log_dir = handle_model_dirs( model_dir, log_dir, dir_name, replace_model, is_load ) if is_load: self.load_model(self.model_dir) self.train_data_engine = train_data_engine self.test_data_engine = test_data_engine self.train_data_loader = DataLoader( train_data_engine, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True, collate_fn=collate_fn_nl, pin_memory=True) self.test_data_loader = DataLoader( test_data_engine, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=True, collate_fn=collate_fn_nl, pin_memory=True)
def __init__(self, batch_size, optimizer, learning_rate, train_data_engine, test_data_engine, dim_hidden, dim_embedding, vocab_size=None, attr_vocab_size=None, n_layers=1, bidirectional=False, model_dir="./model", log_dir="./log", is_load=True, replace_model=True, device=None, dir_name='test', with_intent=True): # Initialize attributes self.data_engine = train_data_engine self.n_layers = n_layers self.log_dir = log_dir self.model_dir = model_dir self.dim_hidden = dim_hidden self.dim_embedding = dim_embedding self.vocab_size = vocab_size self.attr_vocab_size = attr_vocab_size self.dir_name = dir_name self.with_intent = with_intent self.device = get_device(device) self.nlu = NLURNN( dim_embedding=dim_embedding, dim_hidden=dim_hidden, vocab_size=train_data_engine.tokenizer.get_vocab_size(), slot_vocab_size=len(train_data_engine.nlu_slot_vocab), intent_vocab_size=len(train_data_engine.intent_vocab), n_layers=n_layers, bidirectional=bidirectional) self.nlg = NLGRNN( dim_embedding=dim_embedding, dim_hidden=dim_hidden, vocab_size=train_data_engine.tokenizer.get_vocab_size(), n_slot_key=len(train_data_engine.nlg_slot_vocab), n_intent=len(train_data_engine.intent_vocab), n_layers=n_layers, bidirectional=False, batch_size=batch_size) self.nlu.to(self.device) self.nlg.to(self.device) # Initialize data loaders and optimizers self.train_data_engine = train_data_engine self.test_data_engine = test_data_engine self.test_result_path = os.path.join(self.log_dir, "test_result.txt") self.nlu_output_file = None """ self.train_data_loader = DataLoader( train_data_engine, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True, collate_fn=collate_fn_nlg, pin_memory=True) self.test_data_loader = DataLoader( test_data_engine, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=True, collate_fn=collate_fn_nlg, pin_memory=True) """ self.train_nlu_data_loader = DataLoader( train_data_engine, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True, collate_fn=train_data_engine.collate_fn_nlu, pin_memory=True) self.train_nlg_data_loader = DataLoader( train_data_engine, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True, collate_fn=train_data_engine.collate_fn_nlg, pin_memory=True) self.test_nlu_data_loader = DataLoader( test_data_engine, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=False, collate_fn=test_data_engine.collate_fn_nlu, pin_memory=True) self.test_nlg_data_loader = DataLoader( test_data_engine, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=False, collate_fn=test_data_engine.collate_fn_nlg, pin_memory=True) # nlu parameters optimization self.nlu_parameters = filter(lambda p: p.requires_grad, self.nlu.parameters()) self.nlu_optimizer = build_optimizer(optimizer, self.nlu_parameters, learning_rate) # nlg parameters optimization self.nlg_parameters = filter(lambda p: p.requires_grad, self.nlg.parameters()) self.nlg_optimizer = build_optimizer(optimizer, self.nlg_parameters, learning_rate) print_time_info("Model create complete") self.model_dir, self.log_dir = handle_model_dirs( model_dir, log_dir, dir_name, replace_model, is_load) if is_load: print_time_info("Loading model from directory %s" % self.model_dir) self.load_model(self.model_dir) print_time_info("Model create completed.") self.train_log_path = os.path.join(self.log_dir, "train_log.csv") self.valid_log_path = os.path.join(self.log_dir, "valid_log.csv") with open(self.train_log_path, 'w') as file: file.write( "epoch,nlu_loss,nlg_loss,intent_acc,slot_f1,bleu,rouge(1,2,L)\n" ) with open(self.valid_log_path, 'w') as file: file.write( "epoch,nlu_loss,nlg_loss,intent_acc,slot_f1,bleu,rouge(1,2,L)\n" ) # Initialize batch count self.batches = 0
def __init__(self, batch_size, en_optimizer, de_optimizer, en_learning_rate, de_learning_rate, attn_method, train_data_engine, test_data_engine, use_embedding, en_use_attr_init_state, en_hidden_size=100, de_hidden_size=100, en_vocab_size=None, de_vocab_size=None, vocab_size=None, en_embedding_dim=None, de_embedding_dim=None, embedding_dim=None, embeddings=None, en_embedding=True, share_embedding=True, n_decoders=2, cell="GRU", n_en_layers=1, n_de_layers=1, bidirectional=False, feed_last=False, repeat_input=False, batch_norm=False, model_dir="./model", log_dir="./log", is_load=True, check_mem_usage_batches=0, replace_model=True, finetune_embedding=False, model_config=None): # Initialize attributes self.data_engine = train_data_engine self.check_mem_usage_batches = check_mem_usage_batches self.n_decoders = n_decoders self.log_dir = log_dir self.model_dir = model_dir self.en_embedding_dim = en_embedding_dim self.de_embedding_dim = de_embedding_dim self.embedding_dim = embedding_dim self.repeat_input = repeat_input self.de_hidden_size = de_hidden_size self.bidirectional = bidirectional self.dir_name = model_config.dir_name self.h_attn = model_config.h_attn # embedding layer setting if not en_embedding: en_embed = None de_embed = nn.Embedding(de_vocab_size, de_embedding_dim) if use_embedding: de_embed.weight = embeddings if not finetune_embedding: de_embed.weight.requires_grad = False else: if share_embedding: embed = nn.Embedding(vocab_size, embedding_dim) if use_embedding: embed.weight = embeddings if not finetune_embedding: embed.weight.requires_grad = False en_embed = embed de_embed = embed else: en_embed = nn.Embedding(en_vocab_size, en_embedding_dim) de_embed = nn.Embedding(de_vocab_size, de_embedding_dim) if use_embedding: # in E2ENLG dataset, only decoder use word embedding de_embed.weight = embeddings if not finetune_embedding: de_embed.weight.requires_grad = False self.encoder = EncoderRNN( en_embedding=en_embedding, embedding=en_embed, en_vocab_size=en_vocab_size, en_embedding_dim=(embedding_dim if share_embedding and en_embedding else en_embedding_dim), hidden_size=en_hidden_size, n_layers=n_en_layers, bidirectional=bidirectional, cell=cell) self.cell = cell self.decoders = [] for n in range(n_decoders): decoder = DecoderRNN( embedding=de_embed, de_vocab_size=de_vocab_size, de_embedding_dim=(embedding_dim if share_embedding and en_embedding else self.de_embedding_dim), en_hidden_size=en_hidden_size, de_hidden_size=de_hidden_size, n_en_layers=n_en_layers, n_de_layers=n_de_layers, bidirectional=bidirectional, feed_last=(True if feed_last and n > 0 else False), batch_norm=batch_norm, attn_method=attn_method, cell=cell, h_attn=self.h_attn, index=n) self.decoders.append(decoder) self.encoder = self.encoder.cuda() if use_cuda else self.encoder self.decoders = [ decoder.cuda() if use_cuda else decoder for decoder in self.decoders ] # Initialize data loaders and optimizers self.train_data_engine = train_data_engine self.test_data_engine = test_data_engine self.train_data_loader = DataLoader(train_data_engine, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True, collate_fn=collate_fn, pin_memory=True) self.test_data_loader = DataLoader(test_data_engine, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=True, collate_fn=collate_fn, pin_memory=True) # encoder parameters optimization self.encoder_parameters = filter(lambda p: p.requires_grad, self.encoder.parameters()) self.encoder_optimizer = build_optimizer(en_optimizer, self.encoder_parameters, en_learning_rate) # decoder parameters optimization decoder_parameters = [] for decoder in self.decoders: decoder_parameters.extend(list(decoder.parameters())) self.decoder_parameters = filter(lambda p: p.requires_grad, decoder_parameters) self.decoder_optimizer = build_optimizer(de_optimizer, self.decoder_parameters, de_learning_rate) print_time_info("Model create complete") if not replace_model: self.model_dir = os.path.join(self.model_dir, self.dir_name) if not os.path.isdir(self.model_dir): os.makedirs(self.model_dir) else: if not is_load: check_dir(self.model_dir) self.log_dir = os.path.join(self.log_dir, self.dir_name) if not os.path.isdir(self.log_dir): os.makedirs(self.log_dir) os.makedirs(os.path.join(self.log_dir, "validation")) if not is_load: with open(os.path.join(self.log_dir, "model_config"), "w+") as f: for arg in vars(model_config): f.write("{}: {}\n".format(arg, str(getattr(model_config, arg)))) f.close() if is_load: self.load_model(self.model_dir) # Initialize the log files self.logger = Logger(self.log_dir) self.train_log_path = os.path.join(self.log_dir, "train_log.csv") self.valid_batch_log_path = os.path.join(self.log_dir, "valid_batch_log.csv") self.valid_epoch_log_path = os.path.join(self.log_dir, "valid_epoch_log.csv") with open(self.train_log_path, 'w') as file: file.write("epoch, batch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n") with open(self.valid_batch_log_path, 'w') as file: file.write("epoch, batch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n") with open(self.valid_epoch_log_path, 'w') as file: file.write("epoch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n") # Initialize batch count self.batches = 0 self.en_use_attr_init_state = en_use_attr_init_state
def __init__(self, batch_size, en_optimizer, de_optimizer, en_learning_rate, de_learning_rate, attn_method, train_data_engine, test_data_engine, use_embedding, en_use_attr_init_state, en_hidden_size=100, de_hidden_size=100, en_vocab_size=None, de_vocab_size=None, vocab_size=None, en_embedding_dim=None, de_embedding_dim=None, embedding_dim=None, embeddings=None, en_embedding=True, share_embedding=True, n_decoders=2, cell="GRU", n_en_layers=1, n_de_layers=1, bidirectional=False, feed_last=False, repeat_input=False, batch_norm=False, model_dir="./model", log_dir="./log", is_load=True, check_mem_usage_batches=0, replace_model=True, finetune_embedding=False, model_config=None): # Initialize attributes self.data_engine = train_data_engine self.check_mem_usage_batches = check_mem_usage_batches self.n_decoders = n_decoders self.log_dir = log_dir self.model_dir = model_dir self.en_embedding_dim = en_embedding_dim self.de_embedding_dim = de_embedding_dim self.embedding_dim = embedding_dim self.repeat_input = repeat_input # Initialize embeddings, encoders and decoders """ There are some available options here, most of which matter when using E2E dataset. (You still can use them while using dialogue generation dataset like CMDC, but it's NOT RECOMMENDED.) 1) en_embedding (default True): If the option is on, we're going to add embedding layer into encoder; otherwise, the one-hot vectors are directly fed into encoder's RNN. For now, the decoder always has an embedding layer; this is because that we assumed that the decoder should always output the natural language, and it's reasonable that using an embedding layer instead of directly pass one-hot vectors into RNN. 2) share_embedding (default True): If the option is on, first you should make sure that the input of encoder and decoder are in same vector space, (e.g. both natural language); otherwise, it will cause some strange result, (it is possible that you can train the model without any error, but the shared embedding layer doesn't make sense, as you should know.) When the option is on, the embedding dimension will be the argument embedding_dim, and the vocabulary size will be vocab_size; the argument en_embedding_dim, de_embedding_dim, en_vocab_size and de_vocab_size won't be used. 3) use_embedding (default True): When the option is on: (1) If share_embedding option is on, the shared embedding will be initialized with the embeddings we pass into the model. (2) If en_embedding is on while share_embedding option being off, only the embedding in decoder will be initialized with the pre-trained embeddings, and the encoder embeddings will be trained from scratch (this combination of options is NOT APPROPRIATE when using dialogue generation dataset, as you should know, it's kind of strange that we only initialize the embedding in decoder when both input and output of the encoder and decoder are in same vector space.) As mentioned above, since that the options are not disjoint, I'll list some possible combination below, which are reasonable to be tested and compared: 1) en_embedding=True, share_embedding=True, \ use_embedding=True (dialogue generation) 2) en_embedding=True, share_embedding=True, \ use_embedding=False (dialogue generation) 3) en_embedding=True, share_embedding=False, \ use_embedding=True (semantic form to NL) 4) en_embedding=False, share_embedding=X(don't care), \ use_embedding=True (semantic form to NL) 5) en_embedding=True, share_embedding=False, \ use_embedding=False (semantic form to NL) 6) en_embedding=False, share_embedding=X(don't care), \ use_embedding=False (semantic form to NL) """ # embedding layer setting if not en_embedding: en_embed = None de_embed = nn.Embedding(de_vocab_size, de_embedding_dim) if use_embedding: de_embed.weight = embeddings if not finetune_embedding: de_embed.weight.requires_grad = False else: if share_embedding: embed = nn.Embedding(vocab_size, embedding_dim) if use_embedding: embed.weight = embeddings if not finetune_embedding: embed.weight.requires_grad = False en_embed = embed de_embed = embed else: en_embed = nn.Embedding(en_vocab_size, en_embedding_dim) de_embed = nn.Embedding(de_vocab_size, de_embedding_dim) if use_embedding: # in E2ENLG dataset, only decoder use word embedding de_embed.weight = embeddings if not finetune_embedding: de_embed.weight.requires_grad = False self.encoder = EncoderRNN( en_embedding=en_embedding, embedding=en_embed, en_vocab_size=en_vocab_size, en_embedding_dim=(embedding_dim if share_embedding and en_embedding else en_embedding_dim), hidden_size=en_hidden_size, n_layers=n_en_layers, bidirectional=bidirectional, cell=cell) self.cell = cell self.decoders = [] for n in range(n_decoders): decoder = DecoderRNN( embedding=de_embed, de_vocab_size=de_vocab_size, de_embedding_dim=(embedding_dim if share_embedding and en_embedding else self.de_embedding_dim), en_hidden_size=en_hidden_size, de_hidden_size=de_hidden_size, n_en_layers=n_en_layers, n_de_layers=n_de_layers, bidirectional=bidirectional, feed_last=(True if feed_last and n > 0 else False), batch_norm=batch_norm, attn_method=attn_method, cell=cell) self.decoders.append(decoder) self.encoder = self.encoder.cuda() if use_cuda else self.encoder self.decoders = [ decoder.cuda() if use_cuda else decoder for decoder in self.decoders ] # Initialize data loaders and optimizers self.train_data_loader = DataLoader(train_data_engine, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True, collate_fn=collate_fn, pin_memory=True) self.test_data_loader = DataLoader(test_data_engine, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=True, collate_fn=collate_fn, pin_memory=True) # encoder parameters optimization self.encoder_parameters = filter(lambda p: p.requires_grad, self.encoder.parameters()) self.encoder_optimizer = build_optimizer(en_optimizer, self.encoder_parameters, en_learning_rate) # decoder parameters optimization decoder_parameters = [] for decoder in self.decoders: decoder_parameters.extend(list(decoder.parameters())) self.decoder_parameters = filter(lambda p: p.requires_grad, decoder_parameters) self.decoder_optimizer = build_optimizer(de_optimizer, self.decoder_parameters, de_learning_rate) print_time_info("Model create complete") # check directory and model existence Y, M, D, h, m, s = get_time() if not replace_model: self.model_dir = os.path.join( self.model_dir, "{}{:0>2}{:0>2}_{:0>2}{:0>2}{:0>2}".format(Y, M, D, h, m, s)) if not os.path.isdir(self.model_dir): os.makedirs(self.model_dir) else: if not is_load: check_dir(self.model_dir) self.log_dir = os.path.join( self.log_dir, "{}{:0>2}{:0>2}_{:0>2}{:0>2}{:0>2}".format(Y, M, D, h, m, s)) if not os.path.isdir(self.log_dir): os.makedirs(self.log_dir) os.makedirs(os.path.join(self.log_dir, "validation")) with open(os.path.join(self.log_dir, "model_config"), "w+") as f: for arg in vars(model_config): f.write("{}: {}\n".format(arg, str(getattr(model_config, arg)))) f.close() if is_load: self.load_model(model_dir) # Initialize the log files self.logger = Logger(self.log_dir) self.train_log_path = os.path.join(self.log_dir, "train_log.csv") self.valid_batch_log_path = os.path.join(self.log_dir, "valid_batch_log.csv") self.valid_epoch_log_path = os.path.join(self.log_dir, "valid_epoch_log.csv") with open(self.train_log_path, 'w') as file: file.write("epoch, batch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n") with open(self.valid_batch_log_path, 'w') as file: file.write("epoch, batch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n") with open(self.valid_epoch_log_path, 'w') as file: file.write("epoch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n") # Initialize batch count self.batches = 0 self.en_use_attr_init_state = en_use_attr_init_state
def __init__(self, batch_size, en_optimizer, de_optimizer, en_learning_rate, de_learning_rate, attn_method, train_data_engine, test_data_engine, use_embedding, en_hidden_size, de_hidden_size, en_vocab_size, de_vocab_size, vocab_size, embedding_dim, embeddings, n_decoders, n_en_layers, n_de_layers, bidirectional, feed_last, repeat_input, model_dir, log_dir, finetune_embedding, model_config): # Initialize attributes self.data_engine = train_data_engine self.n_decoders = n_decoders self.log_dir = log_dir self.model_dir = model_dir self.embedding_dim = embedding_dim self.repeat_input = repeat_input # Initialize embeddings, encoders and decoders # embedding layer setting de_embed = nn.Embedding(de_vocab_size, embedding_dim) if use_embedding: de_embed.weight = embeddings if not finetune_embedding: de_embed.weight.requires_grad = False self.encoder = EncoderRNN(en_vocab_size=en_vocab_size, hidden_size=en_hidden_size, n_layers=n_en_layers, bidirectional=bidirectional) self.decoders = [] for n in range(n_decoders): decoder = DecoderRNN( embedding=de_embed, de_vocab_size=de_vocab_size, de_embedding_dim=embedding_dim, en_hidden_size=en_hidden_size, de_hidden_size=de_hidden_size, n_en_layers=n_en_layers, n_de_layers=n_de_layers, attn_method=attn_method, bidirectional=bidirectional, feed_last=(True if feed_last and n > 0 else False)) self.decoders.append(decoder) self.encoder = self.encoder.cuda() if use_cuda else self.encoder self.decoders = [ decoder.cuda() if use_cuda else decoder for decoder in self.decoders ] # Initialize data loaders and optimizers self.train_data_loader = DataLoader(train_data_engine, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True, collate_fn=collate_fn, pin_memory=True) self.test_data_loader = DataLoader(test_data_engine, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=True, collate_fn=collate_fn, pin_memory=True) # encoder parameters optimization self.encoder_parameters = filter(lambda p: p.requires_grad, self.encoder.parameters()) self.encoder_optimizer = build_optimizer(en_optimizer, self.encoder_parameters, en_learning_rate) # decoder parameters optimization decoder_parameters = [] for decoder in self.decoders: decoder_parameters.extend(list(decoder.parameters())) self.decoder_parameters = filter(lambda p: p.requires_grad, decoder_parameters) self.decoder_optimizer = build_optimizer(de_optimizer, self.decoder_parameters, de_learning_rate) print_time_info("Model create complete") # check directory and model existence Y, M, D, h, m, s = get_time() self.model_dir = os.path.join( self.model_dir, "{}{:0>2}{:0>2}_{:0>2}{:0>2}{:0>2}".format(Y, M, D, h, m, s)) if not os.path.isdir(self.model_dir): os.makedirs(self.model_dir) else: check_dir(self.model_dir) self.log_dir = os.path.join( self.log_dir, "{}{:0>2}{:0>2}_{:0>2}{:0>2}{:0>2}".format(Y, M, D, h, m, s)) if not os.path.isdir(self.log_dir): os.makedirs(self.log_dir) os.makedirs(os.path.join(self.log_dir, "validation")) with open(os.path.join(self.log_dir, "model_config"), "w+") as f: for arg in vars(model_config): f.write("{}: {}\n".format(arg, str(getattr(model_config, arg)))) f.close() # Initialize the log files self.train_log_path = os.path.join(self.log_dir, "train_log.csv") self.valid_batch_log_path = os.path.join(self.log_dir, "valid_batch_log.csv") self.valid_epoch_log_path = os.path.join(self.log_dir, "valid_epoch_log.csv") with open(self.train_log_path, 'w') as file: file.write("epoch, batch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n") with open(self.valid_batch_log_path, 'w') as file: file.write("epoch, batch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n") with open(self.valid_epoch_log_path, 'w') as file: file.write("epoch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n") # Initialize batch count self.batches = 0