コード例 #1
0
    def __init__(self, model_file_path):
        self.vocab = Vocab(config.vocab_path, config.vocab_size)
        self.batcher = Batcher(config.eval_data_path, self.vocab, mode='eval',
                               batch_size=config.batch_size, single_pass=True)
        time.sleep(15)
        model_name = os.path.basename(model_file_path)

        eval_dir = os.path.join(config.log_root, 'eval_%s' % (model_name))
        if not os.path.exists(eval_dir):
            os.mkdir(eval_dir)
        self.summary_writer = tf.summary.FileWriter(eval_dir)

        self.model = Model(model_file_path, is_eval=True)
コード例 #2
0
    def __init__(self):
        self.vocab = Vocab(config.vocab_path, config.vocab_size)
        self.batcher = Batcher(config.train_data_path,
                               self.vocab,
                               mode='train',
                               batch_size=config.batch_size,
                               single_pass=False)
        time.sleep(15)

        train_dir = os.path.join(config.log_root,
                                 'train_%d' % (int(time.time())))
        if not os.path.exists(train_dir):
            os.mkdir(train_dir)

        self.model_dir = os.path.join(train_dir, 'model')
        if not os.path.exists(self.model_dir):
            os.mkdir(self.model_dir)

        self.summary_writer = tf.summary.FileWriter(train_dir)
コード例 #3
0
def main():

    tf.logging.set_verbosity(tf.logging.INFO)
    tf.logging.info('Starting seq2seq_attention in %s mode...', (args.mode))

    args.model_path = os.path.join(args.model_path, args.exp_name)
    if not os.path.exists(args.model_path):
        if args.mode == "train":
            os.makedirs(args.model_path)
        else:
            raise Exception(
                "Logdir %s doesn't exist. Run in train mode to create it." %
                (args.model_path))


#加载数据集 加载source的字典
    src_vocab = utils.Vocab(args.src_vocab_path, args.src_vocab_size)
    #加载targe的单词字典#
    tgt_vocab = utils.Vocab(args.tgt_vocab_path, args.tgt_vocab_size)
    #把数据集进行batch化,同时加入并发数据队列对数据进行并发传入
    batcher = Batcher(args.data_path, src_vocab, tgt_vocab, args)

    if args.model == "vanilla":
        model_class = VanillaSeq2seqModel
    elif args.model == "sep_dec":
        model_class = SeparateDecoderModel
    elif args.model == "shd_dec":
        model_class = SharedDecoderModel

    tf.set_random_seed(111)

    if args.mode == 'train':
        model = model_class(args, src_vocab, tgt_vocab)
        setup_training(model, batcher)
    elif args.mode == 'eval':
        model = model_class(args, src_vocab, tgt_vocab)
        run_eval(model, batcher, args.ckpt_id)
    elif args.mode == "decode":
        args.batch_size = args.beam_size
        args.arg_max_dec_steps = 1
        args.kp_max_dec_steps = 1
        model = model_class(args, src_vocab, tgt_vocab)
        decoder = BeamSearchDecoder(model, batcher, src_vocab, tgt_vocab,
                                    args.ckpt_id)
        decoder.decode()
    else:
        raise ValueError("The 'mode' flag must be one of train/eval/decode")
コード例 #4
0
class Train:
    def __init__(self):
        self.vocab = Vocab(config.vocab_path, config.vocab_size)
        self.batcher = Batcher(config.train_data_path,
                               self.vocab,
                               mode='train',
                               batch_size=config.batch_size,
                               single_pass=False)
        time.sleep(15)

        train_dir = os.path.join(config.log_root,
                                 'train_%d' % (int(time.time())))
        if not os.path.exists(train_dir):
            os.mkdir(train_dir)

        self.model_dir = os.path.join(train_dir, 'model')
        if not os.path.exists(self.model_dir):
            os.mkdir(self.model_dir)

        self.summary_writer = tf.summary.FileWriter(train_dir)

    def save_model(self, moving_avg_loss, iter):
        state = {
            'iter': iter,
            'encoder_state_dict': self.model.encoder.state_dict(),
            'decoder_state_dict': self.model.decoder.state_dict(),
            'reduce_state_dict': self.model.reduce_state.state_dict(),
            'optimizer': self.optimizer.state_dict(),
            'current_loss': moving_avg_loss
        }
        model_save_path = os.path.join(
            self.model_dir, 'model_%d_%d' % (iter, int(time.time())))
        torch.save(state, model_save_path)

    def setup_train(self, model_file_path=None):
        self.model = Model(model_file_path)

        params = list(self.model.encoder.parameters()) + list(self.model.decoder.parameters()) + \
                 list(self.model.reduce_state.parameters())
        initial_lr = config.lr_coverage if config.do_coverage else config.lr
        self.optimizer = Adagrad(
            params,
            lr=initial_lr,
            initial_accumulator_value=config.adagrad_init_acc)

        start_iter, start_loss = 0, 0
        if model_file_path is not None:
            state = torch.load(model_file_path,
                               map_location=lambda storage, location: storage)
            start_iter = state['iter']
            start_loss = state['current_loss']

            # 在训练到某个epoch,需要切换到coverage结构,因此需要使用新的optimizer状态。此处控制切换时机。
            if not config.do_coverage:
                self.optimizer.load_state_dict(state['optimizer'])
                if use_cuda:
                    for state in self.optimizer.state.values():
                        for k, v in state.items():
                            if torch.is_tensor(v):
                                state[k] = v.cuda()

        return start_iter, start_loss

    def train_one_batch(self, batch):
        enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros, context_v, coverage = \
            get_encoder_variables(batch, use_cuda)
        # dec_lens_var:一个batch的decoder目标序列长度
        dec_batch, dec_padding_mask, max_dec_len, dec_lens_var, target_batch = \
            get_decoder_variables(batch, use_cuda)

        self.optimizer.zero_grad()

        if 0 in enc_lens:
            print('=================')
            print(enc_batch.shape)
            print(enc_lens)
            print(enc_batch)
            print('=================')
        encoder_outputs, encoder_feature, encoder_hidden = self.model.encoder(
            enc_batch, enc_lens)
        d_hc = self.model.reduce_state(encoder_hidden)  # decoder初始h,c

        step_losses = []
        # for step in tqdm.tqdm(range(min(max_dec_len, config.max_dec_steps))):
        for step in range(min(max_dec_len, config.max_dec_steps)):
            d_inp = dec_batch[:, step]  # Teacher forcing
            final_dist, d_hc, context_v, attn_dist, p_gen, next_coverage = self.model.decoder(
                d_inp, d_hc, encoder_outputs, encoder_feature,
                enc_padding_mask, context_v, extra_zeros,
                enc_batch_extend_vocab, coverage, step)
            target = target_batch[:, step]
            # gather每一步target id的预测概率
            gold_probs = torch.gather(final_dist, 1,
                                      target.unsqueeze(1)).squeeze()
            step_loss = -torch.log(gold_probs + config.eps)
            if config.do_coverage:
                step_coverage_loss = torch.sum(torch.min(attn_dist, coverage),
                                               1)  # encoder的累计分布作为损失,见原论文
                step_loss = step_loss + config.cov_loss_wt * step_coverage_loss
                coverage = next_coverage

            step_mask = dec_padding_mask[:, step]
            step_loss = step_loss * step_mask
            step_losses.append(step_loss)

        sum_losses = torch.sum(torch.stack(step_losses, 1), 1)
        batch_avg_loss = sum_losses / dec_lens_var
        loss = torch.mean(batch_avg_loss)

        loss.backward()

        self.norm = clip_grad_norm_(self.model.encoder.parameters(),
                                    config.max_grad_norm)
        clip_grad_norm_(self.model.decoder.parameters(), config.max_grad_norm)
        clip_grad_norm_(self.model.reduce_state.parameters(),
                        config.max_grad_norm)

        self.optimizer.step()

        return loss.item()

    def trainIters(self, n_iters, model_file_path=None):
        iter, moving_avg_loss = self.setup_train(model_file_path)
        start = time.time()
        pbar = tqdm.tqdm(total=n_iters)
        while iter < n_iters:
            batch = self.batcher.next_batch()
            loss = self.train_one_batch(batch)

            moving_avg_loss = calc_moving_avg_loss(loss, moving_avg_loss,
                                                   self.summary_writer, iter)
            iter += 1
            pbar.update(1)

            if iter % 100 == 0:
                self.summary_writer.flush()
            print_interval = 100
            if iter % print_interval == 0:
                print('steps %d, seconds for %d batch: %.2f , loss: %f' %
                      (iter, print_interval, time.time() - start, loss))
                start = time.time()
            if iter % 5000 == 0:
                self.save_model(moving_avg_loss, iter)
        pbar.close()
コード例 #5
0
def main():

    utils.print_config(args)

    if 'train' not in args.mode:
        args.keep_rate = 1.0
    args.use_pretrain = True if args.use_pretrain == 'True' else False
    args.use_aux_task = True if args.use_aux_task == 'True' else False

    if args.mode == 'lm_train':
        args.model = 'lm'
        args.data_path = "./data/wikitext/wikitext-103/processed_wiki_train.bin"
        args.use_pretrain = False

    args.model_path = os.path.join(args.model_path, args.exp_name).format(
        args.model)  #model_path default="data/log/{}

    if not os.path.exists(args.model_path):
        if 'train' not in args.mode:
            print(args.model_path)
            raise ValueError
        os.makedirs(args.model_path)
    with open(os.path.join(args.model_path, 'config.json'),
              'w',
              encoding='utf8') as f:
        json.dump(vars(args), f)

    print("Default models path: {}".format(args.model_path))

    print('code start/ {} mode / {} models'.format(args.mode, args.model))
    utils.assign_specific_gpu(args.gpu_nums)

    vocab = utils.Vocab()

    vardicts = utils.get_pretrain_weights(
        args.true_pretrain_ckpt_path
    ) if args.use_pretrain and args.mode == 'train' else None

    if args.mode == 'decode':
        if args.model == 'mmi_bidi': args.beam_size = args.mmi_bsize
        args.batch_size = args.beam_size

    modelhps = deepcopy(args)
    if modelhps.mode == 'decode':
        modelhps.max_dec_len = 1

    if args.model == 'vanilla':
        model = BaseModel(vocab, modelhps)
    elif args.model == 'mmi_bidi':
        if args.mode == 'decode':
            bw_graph = tf.Graph()
            with bw_graph.as_default():
                bw_model = BaseModel(vocab, args)

            bw_sess = tf.Session(graph=bw_graph, config=utils.gpu_config())

            with bw_sess.as_default():
                with bw_graph.as_default():
                    bidi_ckpt_path = utils.load_ckpt(bw_model.hps,
                                                     bw_model.saver, bw_sess)

            fw_graph = tf.Graph()
            with fw_graph.as_default():
                modelhps.model_path = modelhps.model_path.replace(
                    'mmi_bidi', 'vanilla')
                modelhps.model = 'vanilla'
                fw_model = BaseModel(vocab, modelhps)
            fw_sess = tf.Session(graph=fw_graph)
            with fw_sess.as_default():
                with fw_graph.as_default():
                    ckpt_path = utils.load_ckpt(fw_model.hps, fw_model.saver,
                                                fw_sess)
        else:
            model = BaseModel(vocab, modelhps)

    elif args.model == 'lm':
        model = LMModel(vocab, modelhps)
    elif args.model == 'embmin':
        model = DiverEmbMin(vocab, modelhps)
    else:
        raise ValueError
    print('models load end')

    if args.mode in ['train', 'lm_train']:
        train(model, vocab, vardicts)
    elif args.mode == 'decode':
        import time

        if args.model == 'mmi_bidi':
            batcher = Batcher(
                vocab, bw_model.hps.data_path.replace('train_', 'test_'), args)
            decoder = BeamsearchDecoder(fw_model,
                                        batcher,
                                        vocab,
                                        fw_sess=fw_sess,
                                        bw_model=bw_model,
                                        bw_sess=bw_sess,
                                        bidi_ckpt_path=bidi_ckpt_path)
        else:
            batcher = Batcher(vocab,
                              model.hps.data_path.replace('train_', 'test_'),
                              args)
            decoder = BeamsearchDecoder(model, batcher, vocab)
        decoder.decode()
    elif args.mode == 'eval':
        pass
コード例 #6
0
class Evaluate(object):
    """在eval data上计算损失。"""
    def __init__(self, model_file_path):
        self.vocab = Vocab(config.vocab_path, config.vocab_size)
        self.batcher = Batcher(config.eval_data_path, self.vocab, mode='eval',
                               batch_size=config.batch_size, single_pass=True)
        time.sleep(15)
        model_name = os.path.basename(model_file_path)

        eval_dir = os.path.join(config.log_root, 'eval_%s' % (model_name))
        if not os.path.exists(eval_dir):
            os.mkdir(eval_dir)
        self.summary_writer = tf.summary.FileWriter(eval_dir)

        self.model = Model(model_file_path, is_eval=True)

    def eval_one_batch(self, batch):
        "train_one_batch不进行back propagation"
        enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros, context_v, coverage = \
            get_encoder_variables(batch, use_cuda)
        # dec_lens_var:一个batch的decoder目标序列长度
        dec_batch, dec_padding_mask, max_dec_len, dec_lens_var, target_batch = \
            get_decoder_variables(batch, use_cuda)

        encoder_outputs, encoder_feature, encoder_hidden = self.model.encoder(enc_batch, enc_lens)
        d_hc = self.model.reduce_state(encoder_hidden)  # decoder初始h,c

        step_losses = []
        for step in range(min(max_dec_len, config.max_dec_steps)):
            d_inp = dec_batch[:, step]  # Teacher forcing
            final_dist, d_hc, context_v, attn_dist, p_gen, next_coverage = self.model.decoder(d_inp,
                                                                                              d_hc,
                                                                                              encoder_outputs,
                                                                                              encoder_feature,
                                                                                              enc_padding_mask,
                                                                                              context_v,
                                                                                              extra_zeros,
                                                                                              enc_batch_extend_vocab,
                                                                                              coverage,
                                                                                              step)
            target = target_batch[:, step]
            gold_probs = torch.gather(final_dist, 1, target.unsqueeze(1)).squeeze()
            step_loss = -torch.log(gold_probs + config.eps)
            if config.do_coverage:
                step_coverage_loss = torch.sum(torch.min(attn_dist, coverage), 1)
                step_loss = step_loss + config.cov_loss_wt * step_coverage_loss
                coverage = next_coverage

            step_mask = dec_padding_mask[:, step]
            step_loss = step_loss * step_mask
            step_losses.append(step_loss)

        sum_step_losses = torch.sum(torch.stack(step_losses, 1), 1)
        batch_avg_loss = sum_step_losses / dec_lens_var
        loss = torch.mean(batch_avg_loss)

        return loss.data[0]

    def run_eval(self):
        moving_avg_loss, iter = 0, 0
        start = time.time()
        batch = self.batcher.next_batch()
        while batch is not None:
            loss = self.eval_one_batch(batch)

            moving_avg_loss = calc_moving_avg_loss(loss, moving_avg_loss, self.summary_writer, iter)
            iter += 1

            if iter % 100 == 0:
                self.summary_writer.flush()
            print_interval = 1000
            if iter % print_interval == 0:
                print('steps %d, seconds for %d batch: %.2f , loss: %f' % (
                    iter, print_interval, time.time() - start, moving_avg_loss))
                start = time.time()
            batch = self.batcher.next_batch()
コード例 #7
0
def train(model, vocab, pretrain_vardicts=None):
    train_data_loader = Batcher(vocab, model.hps.data_path, args)
    valid_data_loader = Batcher(vocab,
                                model.hps.data_path.replace('train_', 'dev_'),
                                args)
    if model.hps.mode == 'lm_train':
        valid_data_loader = Batcher(
            vocab, model.hps.data_path.replace('train_', 'valid_'), args)

    with tf.Session(config=utils.gpu_config()) as sess:
        train_logdir, dev_logdir = os.path.join(args.model_path,
                                                'logdir/train'), os.path.join(
                                                    args.model_path,
                                                    'logdir/dev')
        train_savedir = os.path.join(args.model_path, 'train/')
        print("[*] Train save directory is: {}".format(train_savedir))
        if not os.path.exists(train_logdir): os.makedirs(train_logdir)
        if not os.path.exists(dev_logdir): os.makedirs(dev_logdir)
        if not os.path.exists(train_savedir): os.makedirs(train_savedir)

        summary_writer1 = tf.summary.FileWriter(train_logdir, sess.graph)
        summary_writer2 = tf.summary.FileWriter(dev_logdir, sess.graph)
        """
        Initialize with pretrain variables
        """
        if model.hps.use_pretrain:
            assign_ops, uninitialized_varlist = utils.assign_pretrain_weights(
                pretrain_vardicts)
            sess.run(assign_ops)
            sess.run(tf.initialize_variables(uninitialized_varlist))
        else:
            sess.run(tf.global_variables_initializer())
        posterior = [0 for _ in range(model.hps.matrix_num)]
        prior = [0 for _ in range(model.hps.matrix_num)]
        step = 0
        while True:  # 6978 sample for one epoch
            beg_time = time()

            batch = train_data_loader.next_batch()
            sample_per_epoch = 857899 if 'lm' in model.hps.mode else 6978

            if model.hps.mode == 'lm_train':
                res = model.run_step(batch, sess, is_train=True)
            else:
                res = model.run_step(
                    batch,
                    sess,
                    is_train=True,
                    freeze_layer=(
                        model.hps.use_pretrain
                        and step < sample_per_epoch / model.hps.batch_size))
            loss, summaries, step = res['loss'], res['summaries'], res[
                'global_step']
            if model.hps.model == 'posterior':
                gumbel = res['posterior']
                gumbel_prior = res['prior']
                selected = np.argsort(-gumbel)
                selected_poste = [int(el[0]) for el in selected]
                selected_prior = [
                    int(el[0]) for el in np.argsort(-gumbel_prior)
                ]
                posterior = [
                    el1 + el2 for el1, el2 in zip(posterior, selected_poste)
                ]
                prior = [el1 + el2 for el1, el2 in zip(prior, selected_prior)]
                print("prior: {}  posterior: {}".format(prior, posterior))
            elif model.hps.model == 'embmin':
                dist = res['selected_emb_idx']
                for tmp in dist:
                    prior[tmp] += 1
                print(prior)

            end_time = time()
            print("{} epoch, {} step, {}sec, {} loss".format(
                int(step * model.hps.batch_size / sample_per_epoch), step,
                round(end_time - beg_time, 3), round(loss, 3)))
            summary_writer1.add_summary(summaries, step)

            if step % 5 == 0:
                dev_batch = valid_data_loader.next_batch()
                res = model.run_step(dev_batch, sess, is_train=False)
                loss, summaries, step = res['loss'], res['summaries'], res[
                    'global_step']
                assert step % 5 == 0
                print("[VALID] {} loss".format(round(loss, 3)))
                summary_writer2.add_summary(summaries, step)

            if step == 10 or step % 2000 == 0:
                model.saver.save(sess, train_savedir, global_step=step)

            if int(step * model.hps.batch_size /
                   sample_per_epoch) > model.hps.max_epoch:
                model.saver.save(sess, train_savedir, global_step=step)
                print("training end")
                break