Exemple #1
0
    def prepare_training(self, batch_size, data_engine, collate_fn):
        self.train_data_loader = DataLoader(data_engine,
                                            batch_size=batch_size,
                                            shuffle=True,
                                            num_workers=1,
                                            drop_last=True,
                                            collate_fn=collate_fn,
                                            pin_memory=True)

        self.parameters = filter(lambda p: p.requires_grad,
                                 self.lm.parameters())
        self.optimizer = build_optimizer(self.config["optimizer"],
                                         self.parameters,
                                         self.config["learning_rate"])

        with open(self.log_file, 'w') as fw:
            fw.write("epoch,train_loss,valid_loss\n")
Exemple #2
0
    def __init__(self,
                 batch_size,
                 optimizer,
                 learning_rate,
                 train_data_engine,
                 test_data_engine,
                 dim_hidden,
                 dim_embedding,
                 vocab_size=None,
                 attr_vocab_size=None,
                 n_layers=1,
                 bidirectional=False,
                 model_dir="./model",
                 log_dir="./log",
                 is_load=True,
                 replace_model=True,
                 model='nlu-nlg',
                 schedule='iterative',
                 device=None,
                 dir_name='test',
                 f1_per_sample=False,
                 dim_loss=False,
                 with_intent=True,
                 nlg_path=None):

        # Initialize attributes
        # model_dir = os.path.join(model_dir, dir_name)

        if not os.path.isdir(model_dir):
            os.makedirs(model_dir)

        self.model_dir = model_dir
        self.log_dir = log_dir
        self.dir_name = dir_name

        self.device = get_device(device)

        self.maskpredict = MaskPredict(
            dim_embedding=dim_embedding,
            dim_hidden=dim_hidden,
            # attr_vocab_size=attr_vocab_size,
            vocab_size=train_data_engine.tokenizer.get_vocab_size(),
            # n_slot_key=len(train_data_engine.slot_vocab),
            n_slot_key=len(train_data_engine.nlg_slot_vocab),
            n_intent=len(train_data_engine.intent_vocab),
            n_layers=n_layers,
            bidirectional=False,
            batch_size=batch_size)

        self.optimizer = torch.optim.Adam(self.maskpredict.parameters(),
                                          3e-4,
                                          weight_decay=1e-4)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,
                                                         step_size=5,
                                                         gamma=0.1)

        if is_load:
            print_time_info("Loading marginal model from %s" % self.model_dir)
            self.load_model(self.model_dir)
        else:
            pass
            # self.nlg = NLGRNN(
            #     dim_embedding=dim_embedding,
            #     dim_hidden=dim_hidden,
            #     # attr_vocab_size=attr_vocab_size,
            #     vocab_size=train_data_engine.tokenizer.get_vocab_size(),
            #     # n_slot_key=len(train_data_engine.slot_vocab),
            #     n_slot_key=len(train_data_engine.nlg_slot_vocab),
            #     n_intent=len(train_data_engine.intent_vocab),
            #     n_layers=n_layers,
            #     bidirectional=False,
            #     batch_size=batch_size)
            # pretrained_nlg = torch.load(nlg_path)
            # self.maskpredict.load_encoder(pretrained_nlg)

        self.train_data_engine = train_data_engine
        self.test_data_engine = test_data_engine

        self.train_nlg_data_loader = DataLoader(
            train_data_engine,
            batch_size=batch_size,
            shuffle=True,
            num_workers=1,
            drop_last=True,
            collate_fn=train_data_engine.collate_fn_nlg,
            pin_memory=True)

        self.test_nlg_data_loader = DataLoader(
            test_data_engine,
            batch_size=batch_size,
            shuffle=False,
            num_workers=1,
            drop_last=True,
            collate_fn=test_data_engine.collate_fn_nlg,
            pin_memory=True)

        self.maskpredict_parameters = filter(lambda p: p.requires_grad,
                                             self.maskpredict.parameters())
        self.maskpredict_optimizer = build_optimizer(
            optimizer, self.maskpredict_parameters, learning_rate)

        self.train_log_path = os.path.join(self.log_dir, "train_log.csv")
        self.valid_log_path = os.path.join(self.log_dir, "valid_log.csv")

        self.test_result_path = os.path.join(self.log_dir, "test_result.txt")

        with open(self.train_log_path, 'w') as file:
            file.write("epoch,loss\n")
        with open(self.valid_log_path, 'w') as file:
            file.write("epoch,loss\n")
Exemple #3
0
    def __init__(
            self,
            batch_size,
            optimizer,
            learning_rate,
            train_data_engine,
            test_data_engine,
            dim_hidden,
            dim_embedding,
            vocab_size=None,
            n_layers=1,
            model_dir="./model",
            log_dir="./log",
            is_load=True,
            replace_model=True,
            device=None,
            dir_name='test'
    ):

        # Initialize attributes
        self.data_engine = train_data_engine
        self.n_layers = n_layers
        self.log_dir = log_dir
        self.model_dir = model_dir
        self.dim_hidden = dim_hidden
        self.dim_embedding = dim_embedding
        self.vocab_size = vocab_size
        self.dir_name = dir_name

        self.device = get_device(device)

        self.lm = LMRNN(
            dim_embedding=dim_embedding,
            dim_hidden=dim_hidden,
            attr_vocab_size=None,
            vocab_size=vocab_size,
            n_layers=n_layers,
            bidirectional=False
        )

        self.lm.to(self.device)

        self.parameters = filter(
                lambda p: p.requires_grad, self.lm.parameters())
        self.optimizer = build_optimizer(
                optimizer, self.parameters, learning_rate)

        self.model_dir, self.log_dir = handle_model_dirs(
            model_dir, log_dir, dir_name, replace_model, is_load
        )

        if is_load:
            self.load_model(self.model_dir)

        self.train_data_engine = train_data_engine
        self.test_data_engine = test_data_engine
        self.train_data_loader = DataLoader(
                train_data_engine,
                batch_size=batch_size,
                shuffle=True,
                num_workers=1,
                drop_last=True,
                collate_fn=collate_fn_nl,
                pin_memory=True)

        self.test_data_loader = DataLoader(
                test_data_engine,
                batch_size=batch_size,
                shuffle=False,
                num_workers=1,
                drop_last=True,
                collate_fn=collate_fn_nl,
                pin_memory=True)
    def __init__(self,
                 batch_size,
                 optimizer,
                 learning_rate,
                 train_data_engine,
                 test_data_engine,
                 dim_hidden,
                 dim_embedding,
                 vocab_size=None,
                 attr_vocab_size=None,
                 n_layers=1,
                 bidirectional=False,
                 model_dir="./model",
                 log_dir="./log",
                 is_load=True,
                 replace_model=True,
                 device=None,
                 dir_name='test',
                 with_intent=True):

        # Initialize attributes
        self.data_engine = train_data_engine
        self.n_layers = n_layers
        self.log_dir = log_dir
        self.model_dir = model_dir
        self.dim_hidden = dim_hidden
        self.dim_embedding = dim_embedding
        self.vocab_size = vocab_size
        self.attr_vocab_size = attr_vocab_size
        self.dir_name = dir_name
        self.with_intent = with_intent
        self.device = get_device(device)

        self.nlu = NLURNN(
            dim_embedding=dim_embedding,
            dim_hidden=dim_hidden,
            vocab_size=train_data_engine.tokenizer.get_vocab_size(),
            slot_vocab_size=len(train_data_engine.nlu_slot_vocab),
            intent_vocab_size=len(train_data_engine.intent_vocab),
            n_layers=n_layers,
            bidirectional=bidirectional)

        self.nlg = NLGRNN(
            dim_embedding=dim_embedding,
            dim_hidden=dim_hidden,
            vocab_size=train_data_engine.tokenizer.get_vocab_size(),
            n_slot_key=len(train_data_engine.nlg_slot_vocab),
            n_intent=len(train_data_engine.intent_vocab),
            n_layers=n_layers,
            bidirectional=False,
            batch_size=batch_size)

        self.nlu.to(self.device)
        self.nlg.to(self.device)

        # Initialize data loaders and optimizers
        self.train_data_engine = train_data_engine
        self.test_data_engine = test_data_engine
        self.test_result_path = os.path.join(self.log_dir, "test_result.txt")
        self.nlu_output_file = None
        """
        self.train_data_loader = DataLoader(
                train_data_engine,
                batch_size=batch_size,
                shuffle=True,
                num_workers=1,
                drop_last=True,
                collate_fn=collate_fn_nlg,
                pin_memory=True)

        self.test_data_loader = DataLoader(
                test_data_engine,
                batch_size=batch_size,
                shuffle=False,
                num_workers=1,
                drop_last=True,
                collate_fn=collate_fn_nlg,
                pin_memory=True)
        """
        self.train_nlu_data_loader = DataLoader(
            train_data_engine,
            batch_size=batch_size,
            shuffle=True,
            num_workers=1,
            drop_last=True,
            collate_fn=train_data_engine.collate_fn_nlu,
            pin_memory=True)
        self.train_nlg_data_loader = DataLoader(
            train_data_engine,
            batch_size=batch_size,
            shuffle=True,
            num_workers=1,
            drop_last=True,
            collate_fn=train_data_engine.collate_fn_nlg,
            pin_memory=True)

        self.test_nlu_data_loader = DataLoader(
            test_data_engine,
            batch_size=batch_size,
            shuffle=False,
            num_workers=1,
            drop_last=False,
            collate_fn=test_data_engine.collate_fn_nlu,
            pin_memory=True)

        self.test_nlg_data_loader = DataLoader(
            test_data_engine,
            batch_size=batch_size,
            shuffle=False,
            num_workers=1,
            drop_last=False,
            collate_fn=test_data_engine.collate_fn_nlg,
            pin_memory=True)

        # nlu parameters optimization
        self.nlu_parameters = filter(lambda p: p.requires_grad,
                                     self.nlu.parameters())
        self.nlu_optimizer = build_optimizer(optimizer, self.nlu_parameters,
                                             learning_rate)
        # nlg parameters optimization
        self.nlg_parameters = filter(lambda p: p.requires_grad,
                                     self.nlg.parameters())
        self.nlg_optimizer = build_optimizer(optimizer, self.nlg_parameters,
                                             learning_rate)

        print_time_info("Model create complete")

        self.model_dir, self.log_dir = handle_model_dirs(
            model_dir, log_dir, dir_name, replace_model, is_load)

        if is_load:
            print_time_info("Loading model from directory %s" % self.model_dir)
            self.load_model(self.model_dir)

        print_time_info("Model create completed.")

        self.train_log_path = os.path.join(self.log_dir, "train_log.csv")
        self.valid_log_path = os.path.join(self.log_dir, "valid_log.csv")

        with open(self.train_log_path, 'w') as file:
            file.write(
                "epoch,nlu_loss,nlg_loss,intent_acc,slot_f1,bleu,rouge(1,2,L)\n"
            )
        with open(self.valid_log_path, 'w') as file:
            file.write(
                "epoch,nlu_loss,nlg_loss,intent_acc,slot_f1,bleu,rouge(1,2,L)\n"
            )

        # Initialize batch count
        self.batches = 0
Exemple #5
0
    def __init__(self,
                 batch_size,
                 en_optimizer,
                 de_optimizer,
                 en_learning_rate,
                 de_learning_rate,
                 attn_method,
                 train_data_engine,
                 test_data_engine,
                 use_embedding,
                 en_use_attr_init_state,
                 en_hidden_size=100,
                 de_hidden_size=100,
                 en_vocab_size=None,
                 de_vocab_size=None,
                 vocab_size=None,
                 en_embedding_dim=None,
                 de_embedding_dim=None,
                 embedding_dim=None,
                 embeddings=None,
                 en_embedding=True,
                 share_embedding=True,
                 n_decoders=2,
                 cell="GRU",
                 n_en_layers=1,
                 n_de_layers=1,
                 bidirectional=False,
                 feed_last=False,
                 repeat_input=False,
                 batch_norm=False,
                 model_dir="./model",
                 log_dir="./log",
                 is_load=True,
                 check_mem_usage_batches=0,
                 replace_model=True,
                 finetune_embedding=False,
                 model_config=None):

        # Initialize attributes
        self.data_engine = train_data_engine
        self.check_mem_usage_batches = check_mem_usage_batches
        self.n_decoders = n_decoders
        self.log_dir = log_dir
        self.model_dir = model_dir
        self.en_embedding_dim = en_embedding_dim
        self.de_embedding_dim = de_embedding_dim
        self.embedding_dim = embedding_dim
        self.repeat_input = repeat_input
        self.de_hidden_size = de_hidden_size
        self.bidirectional = bidirectional
        self.dir_name = model_config.dir_name
        self.h_attn = model_config.h_attn

        # embedding layer setting
        if not en_embedding:
            en_embed = None
            de_embed = nn.Embedding(de_vocab_size, de_embedding_dim)
            if use_embedding:
                de_embed.weight = embeddings
                if not finetune_embedding:
                    de_embed.weight.requires_grad = False
        else:
            if share_embedding:
                embed = nn.Embedding(vocab_size, embedding_dim)
                if use_embedding:
                    embed.weight = embeddings
                    if not finetune_embedding:
                        embed.weight.requires_grad = False
                en_embed = embed
                de_embed = embed
            else:
                en_embed = nn.Embedding(en_vocab_size, en_embedding_dim)
                de_embed = nn.Embedding(de_vocab_size, de_embedding_dim)
                if use_embedding:
                    # in E2ENLG dataset, only decoder use word embedding
                    de_embed.weight = embeddings
                    if not finetune_embedding:
                        de_embed.weight.requires_grad = False

        self.encoder = EncoderRNN(
            en_embedding=en_embedding,
            embedding=en_embed,
            en_vocab_size=en_vocab_size,
            en_embedding_dim=(embedding_dim if share_embedding and en_embedding
                              else en_embedding_dim),
            hidden_size=en_hidden_size,
            n_layers=n_en_layers,
            bidirectional=bidirectional,
            cell=cell)

        self.cell = cell
        self.decoders = []
        for n in range(n_decoders):
            decoder = DecoderRNN(
                embedding=de_embed,
                de_vocab_size=de_vocab_size,
                de_embedding_dim=(embedding_dim if share_embedding
                                  and en_embedding else self.de_embedding_dim),
                en_hidden_size=en_hidden_size,
                de_hidden_size=de_hidden_size,
                n_en_layers=n_en_layers,
                n_de_layers=n_de_layers,
                bidirectional=bidirectional,
                feed_last=(True if feed_last and n > 0 else False),
                batch_norm=batch_norm,
                attn_method=attn_method,
                cell=cell,
                h_attn=self.h_attn,
                index=n)
            self.decoders.append(decoder)

        self.encoder = self.encoder.cuda() if use_cuda else self.encoder
        self.decoders = [
            decoder.cuda() if use_cuda else decoder
            for decoder in self.decoders
        ]

        # Initialize data loaders and optimizers
        self.train_data_engine = train_data_engine
        self.test_data_engine = test_data_engine
        self.train_data_loader = DataLoader(train_data_engine,
                                            batch_size=batch_size,
                                            shuffle=True,
                                            num_workers=1,
                                            drop_last=True,
                                            collate_fn=collate_fn,
                                            pin_memory=True)

        self.test_data_loader = DataLoader(test_data_engine,
                                           batch_size=batch_size,
                                           shuffle=False,
                                           num_workers=1,
                                           drop_last=True,
                                           collate_fn=collate_fn,
                                           pin_memory=True)

        # encoder parameters optimization
        self.encoder_parameters = filter(lambda p: p.requires_grad,
                                         self.encoder.parameters())
        self.encoder_optimizer = build_optimizer(en_optimizer,
                                                 self.encoder_parameters,
                                                 en_learning_rate)
        # decoder parameters optimization
        decoder_parameters = []
        for decoder in self.decoders:
            decoder_parameters.extend(list(decoder.parameters()))
        self.decoder_parameters = filter(lambda p: p.requires_grad,
                                         decoder_parameters)
        self.decoder_optimizer = build_optimizer(de_optimizer,
                                                 self.decoder_parameters,
                                                 de_learning_rate)

        print_time_info("Model create complete")

        if not replace_model:
            self.model_dir = os.path.join(self.model_dir, self.dir_name)

        if not os.path.isdir(self.model_dir):
            os.makedirs(self.model_dir)
        else:
            if not is_load:
                check_dir(self.model_dir)
        self.log_dir = os.path.join(self.log_dir, self.dir_name)

        if not os.path.isdir(self.log_dir):
            os.makedirs(self.log_dir)
            os.makedirs(os.path.join(self.log_dir, "validation"))

        if not is_load:
            with open(os.path.join(self.log_dir, "model_config"), "w+") as f:
                for arg in vars(model_config):
                    f.write("{}: {}\n".format(arg,
                                              str(getattr(model_config, arg))))
                f.close()

        if is_load:
            self.load_model(self.model_dir)

        # Initialize the log files
        self.logger = Logger(self.log_dir)
        self.train_log_path = os.path.join(self.log_dir, "train_log.csv")
        self.valid_batch_log_path = os.path.join(self.log_dir,
                                                 "valid_batch_log.csv")
        self.valid_epoch_log_path = os.path.join(self.log_dir,
                                                 "valid_epoch_log.csv")

        with open(self.train_log_path, 'w') as file:
            file.write("epoch, batch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n")
        with open(self.valid_batch_log_path, 'w') as file:
            file.write("epoch, batch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n")
        with open(self.valid_epoch_log_path, 'w') as file:
            file.write("epoch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n")

        # Initialize batch count
        self.batches = 0
        self.en_use_attr_init_state = en_use_attr_init_state
Exemple #6
0
    def __init__(self,
                 batch_size,
                 en_optimizer,
                 de_optimizer,
                 en_learning_rate,
                 de_learning_rate,
                 attn_method,
                 train_data_engine,
                 test_data_engine,
                 use_embedding,
                 en_use_attr_init_state,
                 en_hidden_size=100,
                 de_hidden_size=100,
                 en_vocab_size=None,
                 de_vocab_size=None,
                 vocab_size=None,
                 en_embedding_dim=None,
                 de_embedding_dim=None,
                 embedding_dim=None,
                 embeddings=None,
                 en_embedding=True,
                 share_embedding=True,
                 n_decoders=2,
                 cell="GRU",
                 n_en_layers=1,
                 n_de_layers=1,
                 bidirectional=False,
                 feed_last=False,
                 repeat_input=False,
                 batch_norm=False,
                 model_dir="./model",
                 log_dir="./log",
                 is_load=True,
                 check_mem_usage_batches=0,
                 replace_model=True,
                 finetune_embedding=False,
                 model_config=None):

        # Initialize attributes
        self.data_engine = train_data_engine
        self.check_mem_usage_batches = check_mem_usage_batches
        self.n_decoders = n_decoders
        self.log_dir = log_dir
        self.model_dir = model_dir
        self.en_embedding_dim = en_embedding_dim
        self.de_embedding_dim = de_embedding_dim
        self.embedding_dim = embedding_dim
        self.repeat_input = repeat_input

        # Initialize embeddings, encoders and decoders
        """
        There are some available options here, most of which matter when using
        E2E dataset.
        (You still can use them while using dialogue generation dataset
        like CMDC, but it's NOT RECOMMENDED.)

        1) en_embedding (default True):
            If the option is on, we're going to add embedding layer into
            encoder; otherwise, the one-hot vectors are directly fed into
            encoder's RNN.
            For now, the decoder always has an embedding layer; this is
            because that we assumed that the decoder should always output the
            natural language, and it's reasonable that using an embedding layer
            instead of directly pass one-hot vectors into RNN.
        2) share_embedding (default True):
            If the option is on, first you should make sure that the input of
            encoder and decoder are in same vector space,
            (e.g. both natural language); otherwise, it will cause some strange
            result, (it is possible that you can train the model without any
            error, but the shared embedding layer doesn't make sense, as you
            should know.)
            When the option is on, the embedding dimension will be the argument
            embedding_dim, and the vocabulary size will be vocab_size; the
            argument en_embedding_dim, de_embedding_dim, en_vocab_size and
            de_vocab_size won't be used.
        3) use_embedding (default True):
            When the option is on:
            (1) If share_embedding option is on, the shared embedding will be
            initialized with the embeddings we pass into the model.
            (2) If en_embedding is on while share_embedding option being off,
            only the embedding in decoder will be initialized with the
            pre-trained embeddings, and the encoder embeddings will be trained
            from scratch (this combination of options is NOT APPROPRIATE when
            using dialogue generation dataset, as you should know, it's kind
            of strange that we only initialize the embedding in decoder when
            both input and output of the encoder and decoder are in same vector
            space.)

        As mentioned above, since that the options are not disjoint, I'll list
        some possible combination below, which are reasonable to be tested and
        compared:

        1) en_embedding=True, share_embedding=True, \
                use_embedding=True (dialogue generation)
        2) en_embedding=True, share_embedding=True, \
                use_embedding=False (dialogue generation)
        3) en_embedding=True, share_embedding=False, \
                use_embedding=True (semantic form to NL)
        4) en_embedding=False, share_embedding=X(don't care), \
                use_embedding=True (semantic form to NL)
        5) en_embedding=True, share_embedding=False, \
                use_embedding=False (semantic form to NL)
        6) en_embedding=False, share_embedding=X(don't care), \
                use_embedding=False (semantic form to NL)

        """
        # embedding layer setting
        if not en_embedding:
            en_embed = None
            de_embed = nn.Embedding(de_vocab_size, de_embedding_dim)
            if use_embedding:
                de_embed.weight = embeddings
                if not finetune_embedding:
                    de_embed.weight.requires_grad = False
        else:
            if share_embedding:
                embed = nn.Embedding(vocab_size, embedding_dim)
                if use_embedding:
                    embed.weight = embeddings
                    if not finetune_embedding:
                        embed.weight.requires_grad = False
                en_embed = embed
                de_embed = embed
            else:
                en_embed = nn.Embedding(en_vocab_size, en_embedding_dim)
                de_embed = nn.Embedding(de_vocab_size, de_embedding_dim)
                if use_embedding:
                    # in E2ENLG dataset, only decoder use word embedding
                    de_embed.weight = embeddings
                    if not finetune_embedding:
                        de_embed.weight.requires_grad = False

        self.encoder = EncoderRNN(
            en_embedding=en_embedding,
            embedding=en_embed,
            en_vocab_size=en_vocab_size,
            en_embedding_dim=(embedding_dim if share_embedding and en_embedding
                              else en_embedding_dim),
            hidden_size=en_hidden_size,
            n_layers=n_en_layers,
            bidirectional=bidirectional,
            cell=cell)

        self.cell = cell
        self.decoders = []
        for n in range(n_decoders):
            decoder = DecoderRNN(
                embedding=de_embed,
                de_vocab_size=de_vocab_size,
                de_embedding_dim=(embedding_dim if share_embedding
                                  and en_embedding else self.de_embedding_dim),
                en_hidden_size=en_hidden_size,
                de_hidden_size=de_hidden_size,
                n_en_layers=n_en_layers,
                n_de_layers=n_de_layers,
                bidirectional=bidirectional,
                feed_last=(True if feed_last and n > 0 else False),
                batch_norm=batch_norm,
                attn_method=attn_method,
                cell=cell)
            self.decoders.append(decoder)

        self.encoder = self.encoder.cuda() if use_cuda else self.encoder
        self.decoders = [
            decoder.cuda() if use_cuda else decoder
            for decoder in self.decoders
        ]

        # Initialize data loaders and optimizers
        self.train_data_loader = DataLoader(train_data_engine,
                                            batch_size=batch_size,
                                            shuffle=True,
                                            num_workers=1,
                                            drop_last=True,
                                            collate_fn=collate_fn,
                                            pin_memory=True)

        self.test_data_loader = DataLoader(test_data_engine,
                                           batch_size=batch_size,
                                           shuffle=False,
                                           num_workers=1,
                                           drop_last=True,
                                           collate_fn=collate_fn,
                                           pin_memory=True)

        # encoder parameters optimization
        self.encoder_parameters = filter(lambda p: p.requires_grad,
                                         self.encoder.parameters())
        self.encoder_optimizer = build_optimizer(en_optimizer,
                                                 self.encoder_parameters,
                                                 en_learning_rate)
        # decoder parameters optimization
        decoder_parameters = []
        for decoder in self.decoders:
            decoder_parameters.extend(list(decoder.parameters()))
        self.decoder_parameters = filter(lambda p: p.requires_grad,
                                         decoder_parameters)
        self.decoder_optimizer = build_optimizer(de_optimizer,
                                                 self.decoder_parameters,
                                                 de_learning_rate)

        print_time_info("Model create complete")
        # check directory and model existence
        Y, M, D, h, m, s = get_time()
        if not replace_model:
            self.model_dir = os.path.join(
                self.model_dir,
                "{}{:0>2}{:0>2}_{:0>2}{:0>2}{:0>2}".format(Y, M, D, h, m, s))

        if not os.path.isdir(self.model_dir):
            os.makedirs(self.model_dir)
        else:
            if not is_load:
                check_dir(self.model_dir)

        self.log_dir = os.path.join(
            self.log_dir,
            "{}{:0>2}{:0>2}_{:0>2}{:0>2}{:0>2}".format(Y, M, D, h, m, s))

        if not os.path.isdir(self.log_dir):
            os.makedirs(self.log_dir)
            os.makedirs(os.path.join(self.log_dir, "validation"))

        with open(os.path.join(self.log_dir, "model_config"), "w+") as f:
            for arg in vars(model_config):
                f.write("{}: {}\n".format(arg, str(getattr(model_config,
                                                           arg))))
            f.close()

        if is_load:
            self.load_model(model_dir)

        # Initialize the log files
        self.logger = Logger(self.log_dir)
        self.train_log_path = os.path.join(self.log_dir, "train_log.csv")
        self.valid_batch_log_path = os.path.join(self.log_dir,
                                                 "valid_batch_log.csv")
        self.valid_epoch_log_path = os.path.join(self.log_dir,
                                                 "valid_epoch_log.csv")

        with open(self.train_log_path, 'w') as file:
            file.write("epoch, batch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n")
        with open(self.valid_batch_log_path, 'w') as file:
            file.write("epoch, batch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n")
        with open(self.valid_epoch_log_path, 'w') as file:
            file.write("epoch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n")

        # Initialize batch count
        self.batches = 0

        self.en_use_attr_init_state = en_use_attr_init_state
Exemple #7
0
    def __init__(self, batch_size, en_optimizer, de_optimizer,
                 en_learning_rate, de_learning_rate, attn_method,
                 train_data_engine, test_data_engine, use_embedding,
                 en_hidden_size, de_hidden_size, en_vocab_size, de_vocab_size,
                 vocab_size, embedding_dim, embeddings, n_decoders,
                 n_en_layers, n_de_layers, bidirectional, feed_last,
                 repeat_input, model_dir, log_dir, finetune_embedding,
                 model_config):

        # Initialize attributes
        self.data_engine = train_data_engine
        self.n_decoders = n_decoders
        self.log_dir = log_dir
        self.model_dir = model_dir
        self.embedding_dim = embedding_dim
        self.repeat_input = repeat_input

        # Initialize embeddings, encoders and decoders
        # embedding layer setting
        de_embed = nn.Embedding(de_vocab_size, embedding_dim)
        if use_embedding:
            de_embed.weight = embeddings
            if not finetune_embedding:
                de_embed.weight.requires_grad = False

        self.encoder = EncoderRNN(en_vocab_size=en_vocab_size,
                                  hidden_size=en_hidden_size,
                                  n_layers=n_en_layers,
                                  bidirectional=bidirectional)

        self.decoders = []
        for n in range(n_decoders):
            decoder = DecoderRNN(
                embedding=de_embed,
                de_vocab_size=de_vocab_size,
                de_embedding_dim=embedding_dim,
                en_hidden_size=en_hidden_size,
                de_hidden_size=de_hidden_size,
                n_en_layers=n_en_layers,
                n_de_layers=n_de_layers,
                attn_method=attn_method,
                bidirectional=bidirectional,
                feed_last=(True if feed_last and n > 0 else False))
            self.decoders.append(decoder)

        self.encoder = self.encoder.cuda() if use_cuda else self.encoder
        self.decoders = [
            decoder.cuda() if use_cuda else decoder
            for decoder in self.decoders
        ]

        # Initialize data loaders and optimizers
        self.train_data_loader = DataLoader(train_data_engine,
                                            batch_size=batch_size,
                                            shuffle=True,
                                            num_workers=1,
                                            drop_last=True,
                                            collate_fn=collate_fn,
                                            pin_memory=True)

        self.test_data_loader = DataLoader(test_data_engine,
                                           batch_size=batch_size,
                                           shuffle=False,
                                           num_workers=1,
                                           drop_last=True,
                                           collate_fn=collate_fn,
                                           pin_memory=True)

        # encoder parameters optimization
        self.encoder_parameters = filter(lambda p: p.requires_grad,
                                         self.encoder.parameters())
        self.encoder_optimizer = build_optimizer(en_optimizer,
                                                 self.encoder_parameters,
                                                 en_learning_rate)
        # decoder parameters optimization
        decoder_parameters = []
        for decoder in self.decoders:
            decoder_parameters.extend(list(decoder.parameters()))
        self.decoder_parameters = filter(lambda p: p.requires_grad,
                                         decoder_parameters)
        self.decoder_optimizer = build_optimizer(de_optimizer,
                                                 self.decoder_parameters,
                                                 de_learning_rate)

        print_time_info("Model create complete")
        # check directory and model existence
        Y, M, D, h, m, s = get_time()
        self.model_dir = os.path.join(
            self.model_dir,
            "{}{:0>2}{:0>2}_{:0>2}{:0>2}{:0>2}".format(Y, M, D, h, m, s))

        if not os.path.isdir(self.model_dir):
            os.makedirs(self.model_dir)
        else:
            check_dir(self.model_dir)

        self.log_dir = os.path.join(
            self.log_dir,
            "{}{:0>2}{:0>2}_{:0>2}{:0>2}{:0>2}".format(Y, M, D, h, m, s))

        if not os.path.isdir(self.log_dir):
            os.makedirs(self.log_dir)
            os.makedirs(os.path.join(self.log_dir, "validation"))

        with open(os.path.join(self.log_dir, "model_config"), "w+") as f:
            for arg in vars(model_config):
                f.write("{}: {}\n".format(arg, str(getattr(model_config,
                                                           arg))))
            f.close()

        # Initialize the log files
        self.train_log_path = os.path.join(self.log_dir, "train_log.csv")
        self.valid_batch_log_path = os.path.join(self.log_dir,
                                                 "valid_batch_log.csv")
        self.valid_epoch_log_path = os.path.join(self.log_dir,
                                                 "valid_epoch_log.csv")

        with open(self.train_log_path, 'w') as file:
            file.write("epoch, batch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n")
        with open(self.valid_batch_log_path, 'w') as file:
            file.write("epoch, batch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n")
        with open(self.valid_epoch_log_path, 'w') as file:
            file.write("epoch, loss, avg-bleu, avg-rouge(1,2,L,BE)\n")

        # Initialize batch count
        self.batches = 0