コード例 #1
0
ファイル: train.py プロジェクト: sonamghosh/whack_2018
def da_rnn(train_data: TrainData, n_targs: int, 
           encoder_hidden_size=64, decoder_hidden_size=64,
           T=10, learning_rate=0.01, batch_size=128):
    
    train_cfg = TrainConfig(T, int(train_data.feats.shape[0] * 0.7), batch_size, nn.MSELoss())
    logger.info(f"Training size: {train_cfg.train_size:d}.")

    enc_kwargs = {"input_size": train_data.feats.shape[1], "hidden_size": encoder_hidden_size, "T": T}
    encoder = Encoder(**enc_kwargs).to(device)
    with open(os.path.join("data", "enc_kwargs.json"), "w") as f:
        json.dump(enc_kwargs, f, indent=4)

    dec_kwargs = {"encoder_hidden_size": encoder_hidden_size,
                  "decoder_hidden_size": decoder_hidden_size, "T": T, "out_feats": n_targs}
    decoder = Decoder(**dec_kwargs).to(device)
    with open(os.path.join("data", "dec_kwargs.json"), "w") as f:
        json.dump(dec_kwargs, f, indent=4)

    encoder_optimizer = optim.Adam(
                        params=[p for p in encoder.parameters() if p.requires_grad],
                        lr=learning_rate)
    decoder_optimizer = optim.Adam(
                        params=[p for p in decoder.parameters() if p.requires_grad],
                        lr=learning_rate)
    da_rnn_net = DaRnnNet(encoder, decoder, encoder_optimizer, decoder_optimizer)

    return train_cfg, da_rnn_net
コード例 #2
0
def instantiate_model(config, tokenizer):
    configure_devices(config)
    model = Model(config)
    optimizer = transformers.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=0)
    metrics = None

    if config.continue_training:
        state_dict = torch.load(config.continue_training, map_location='cpu')
        model.load_state_dict(state_dict['model'])
        if 'optimizer_state_dict' in state_dict:
            optimizer.load_state_dict(state_dict['optimizer_state_dict'])
            for g in optimizer.param_groups:
                g['lr'] = config.learning_rate
        
        try:
            print(f"Loaded model:\nEpochs: {state_dict['epoch']}\nLoss: {state_dict['loss']}\n", 
                  f"Recall: {state_dict['rec']}\nMRR: {state_dict['mrr']}")
        except:
            pass
        
    if config.use_cuda:
        model = model.cuda()
        optimizer_to(optimizer, config.device)
        model = torch.nn.DataParallel(model, device_ids=config.devices)
    return model, optimizer, metrics
コード例 #3
0
def train(model_path=None):
    dataloader = DataLoader(Augmentation())
    encoder = Encoder()
    dict_len = len(dataloader.data.dictionary)
    decoder = DecoderWithAttention(dict_len)

    if cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    # if model_path:
    #   text_generator.load_state_dict(torch.load(model_path))
    train_iter = 1
    encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                         lr=cfg.encoder_learning_rate)
    decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                         lr=cfg.decoder_learning_rate)

    val_bleu = list()
    losses = list()
    while True:
        batch_image, batch_label = dataloader.get_next_batch()
        batch_image = torch.from_numpy(batch_image).type(torch.FloatTensor)
        batch_label = torch.from_numpy(batch_label).type(torch.LongTensor)
        if cuda:
            batch_image = batch_image.cuda()
            batch_label = batch_label.cuda()
        # print(batch_image.size())
        # print(batch_label.size())

        print('Training')
        output = encoder(batch_image)
        # print('encoder output:', output.size())
        predictions, alphas = decoder(output, batch_label)

        loss = cal_loss(predictions, batch_label, alphas, 1)

        decoder_optimizer.zero_grad()
        encoder_optimizer.zero_grad()
        loss.backward()
        decoder_optimizer.step()
        encoder_optimizer.step()

        print('Iter', train_iter, '| loss:',
              loss.cpu().data.numpy(), '| batch size:', cfg.batch_size,
              '| encoder learning rate:', cfg.encoder_learning_rate,
              '| decoder learning rate:', cfg.decoder_learning_rate)
        losses.append(loss.cpu().data.numpy())
        if train_iter % cfg.save_model_iter == 0:
            val_bleu.append(val_eval(encoder, decoder, dataloader))
            torch.save(
                encoder.state_dict(), './models/train/encoder_' +
                cfg.pre_train_model + '_' + str(train_iter) + '.pkl')
            torch.save(decoder.state_dict(),
                       './models/train/decoder_' + str(train_iter) + '.pkl')
            np.save('./result/train_bleu4.npy', val_bleu)
            np.save('./result/losses.npy', losses)

        if train_iter == cfg.train_iter:
            break
        train_iter += 1
コード例 #4
0
def main():
    args = parse_arguments()
    n_vocab = params.n_vocab
    n_layer = params.n_layer
    n_hidden = params.n_hidden
    n_embed = params.n_embed
    n_batch = args.n_batch
    temperature = params.temperature
    train_path = params.train_path
    assert torch.cuda.is_available()

    print("loading_data...")
    # 训练时加载处理好的词典(如果有的话)
    if os.path.exists("vocab.json"):
        vocab = Vocabulary()
        with open('vocab.json', 'r') as fp:
            vocab.stoi = json.load(fp)

        for key, value in vocab.stoi.items():
            vocab.itos.append(key)
    else:
        vocab = build_vocab(train_path, n_vocab)
        # save vocab
        with open('vocab.json', 'w') as fp:
            json.dump(vocab.stoi, fp)

    train_X, train_y, train_K = load_data(train_path, vocab)
    train_loader = get_data_loader(train_X, train_y, train_K, n_batch)
    print("successfully loaded")

    encoder = Encoder(n_vocab, n_embed, n_hidden, n_layer, vocab).cuda()
    Kencoder = KnowledgeEncoder(n_vocab, n_embed, n_hidden, n_layer,
                                vocab).cuda()
    manager = Manager(n_hidden, n_vocab, temperature).cuda()
    decoder = Decoder(n_vocab, n_embed, n_hidden, n_layer, vocab).cuda()

    if args.restore:
        encoder = init_model(encoder, restore=params.encoder_restore)
        Kencoder = init_model(Kencoder, restore=params.Kencoder_restore)
        manager = init_model(manager, restore=params.manager_restore)
        decoder = init_model(decoder, restore=params.decoder_restore)

    # ToDo:目前的代码所有的embedding都是独立的,可以参考transformer源码使用直接赋值的方法共享参数:
    #if emb_src_trg_weight_sharing:
    #   self.encoder.src_word_emb.weight = self.decoder.trg_word_emb.weight

    model = [encoder, Kencoder, manager, decoder]
    parameters = list(encoder.parameters()) + list(Kencoder.parameters()) + \
                 list(manager.parameters()) + list(decoder.parameters())
    optimizer = optim.Adam(parameters, lr=args.lr)

    # pre_train knowledge manager
    print("start pre-training")
    pre_train(model, optimizer, train_loader, args)
    print("start training")
    train(model, optimizer, train_loader, args)

    # save final model
    save_models(model, params.all_restore)
コード例 #5
0
def train(train_loader, val_loader, epochnum, save_path='.', save_freq=None):
    iter_size = len(train_loader)
    net = Encoder()
    net.cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = optim.SGD(net.parameters(),
                          lr=0.01,
                          momentum=0.9,
                          weight_decay=2e-4)

    for epoch in range(epochnum):
        print('epoch : {}'.format(epoch))
        net.train()
        train_loss = 0
        train_correct = 0
        total = 0
        net.training = True
        for i, data in enumerate(train_loader):
            sys.stdout.write('iter : {} / {}\r'.format(i, iter_size))
            sys.stdout.flush()
            #print('iter: {} / {}'.format(i, iter_size))
            inputs, labels = data
            inputs, labels = Variable(inputs.cuda()), labels.cuda()
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, Variable(labels))
            loss.backward()
            optimizer.step()
            train_loss += loss.data[0]
            pred = (torch.max(outputs.data, 1)[1])
            train_correct += (pred == labels).sum()
            total += labels.size(0)
        sys.stdout.write(' ' * 20 + '\r')
        sys.stdout.flush()

        print('train_loss:{}, train_acc:{:.2%}'.format(train_loss / total,
                                                       train_correct / total))
        val_loss = 0
        val_correct = 0
        total = 0
        net.training = False
        for data in val_loader:
            net.eval()
            inputs, labels = data
            inputs, labels = Variable(inputs).cuda(), labels.cuda()
            outputs = net(inputs)
            pred = torch.max(outputs.data, 1)[1]
            total += labels.size(0)
            loss = criterion(outputs, Variable(labels))
            val_loss += loss.data[0]
            val_correct += (pred == labels).sum()

        print('val_loss:{}, val_acc:{:.2%}'.format(val_loss / total,
                                                   val_correct / total))
        optimizer.param_groups[0]['lr'] *= np.exp(-0.4)
        if save_freq and epoch % save_freq == save_freq - 1:
            net_name = os.path.join(save_path, 'epoch_{}'.format(epoch))
            torch.save(net, net_name)
    torch.save(net, os.path.join(save_path, 'trained_net'))
コード例 #6
0
def load_encoder(obs_space, args, freeze=True):
    enc = Encoder(obs_space, args.dim,
                  use_conv=args.use_conv)
    enc_state = torch.load(args.dynamics_module, map_location=lambda storage,
                           loc: storage)['enc']
    enc.load_state_dict(enc_state)
    enc.eval()
    if freeze:
        for p in enc.parameters():
            p.requires_grad = False
    return enc
コード例 #7
0
 def __init__(self, encoder: Encoder, decoder: DecoderPythonCRF, entries: EntriesProcessor,teacher_forcing_ratio = 0.5, learning_rate=0.01,
              max_input_length=40, max_output_length=20, device=None):
     self.encoder = encoder
     self.decoder = decoder
     self.entries = entries
     self.teacher_forcing_ratio = teacher_forcing_ratio
     self.encoder_optimizer = optim.Adam(encoder.parameters())
     self.decoder_optimizer = optim.Adam(decoder.parameters())
     self.max_input_length = max_input_length
     self.max_output_length = max_output_length
     if device is None:
         self.device = torch.device("cuda" if torch.cuda_is_available() else "cpu")
     else:
         self.device = device
コード例 #8
0
class PretrainingTrainer:
    def __init__(self):
        self.preprocessor = None
        self.model = None
        self.optimizer = None

    def setup_preprocessed_data(self):
        self.preprocessor = Preprocess()
        self.preprocessor.setup()

    def setup_model(self):
        # Create multilingual vocabulary
        self.model = Encoder()

        if con.CUDA:
            self.model = self.model.cuda()

    def setup_scheduler_optimizer(self):
        lr_rate = 0.001
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=lr_rate,
                                    weight_decay=0)

    def train_model(self):
        train_loader = self.preprocessor.train_loaders
        batch_size = 8

        self.model.train()
        train_loss = 0
        batch_correct = 0
        total_correct = 0
        index = 0
        for hrl_src, lrl_src, hrl_att, lrl_att in train_loader:
            logits = self.model(hrl_src)
            print(logits.shape)
            break
            # self.optimizer.zero_grad()
            # batch_loss.backward()
            # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5)
            # self.optimizer.step()
            # batch_correct += self.evaluate(masked_outputs=masked_outputs, masked_lm_ids=masked_lm_ids)
            # total_correct += (8 * 20)

    def run_pretraining(self):
        self.setup_preprocessed_data()
        self.setup_model()
        self.setup_scheduler_optimizer()
        self.train_model()
コード例 #9
0
def main():
    args = parse_arguments()
    n_vocab = params.n_vocab
    n_layer = params.n_layer
    n_hidden = params.n_hidden
    n_embed = params.n_embed
    n_batch = args.n_batch
    temperature = params.temperature
    train_path = params.train_path
    assert torch.cuda.is_available()

    print("loading_data...")
    vocab = build_vocab(train_path, n_vocab)

    # save vocab
    with open('vocab.json', 'w') as fp:
        json.dump(vocab.stoi, fp)

    train_X, train_y, train_K = load_data(train_path, vocab)
    train_loader = get_data_loader(train_X, train_y, train_K, n_batch)
    print("successfully loaded")

    encoder = Encoder(n_vocab, n_embed, n_hidden, n_layer, vocab).cuda()
    Kencoder = KnowledgeEncoder(n_vocab, n_embed, n_hidden, n_layer,
                                vocab).cuda()
    manager = Manager(n_hidden, n_vocab, temperature).cuda()
    decoder = Decoder(n_vocab, n_embed, n_hidden, n_layer, vocab).cuda()

    if args.restore:
        encoder = init_model(encoder, restore=params.encoder_restore)
        Kencoder = init_model(Kencoder, restore=params.Kencoder_restore)
        manager = init_model(manager, restore=params.manager_restore)
        decoder = init_model(decoder, restore=params.decoder_restore)

    model = [encoder, Kencoder, manager, decoder]
    parameters = list(encoder.parameters()) + list(Kencoder.parameters()) + \
                 list(manager.parameters()) + list(decoder.parameters())
    optimizer = optim.Adam(parameters, lr=args.lr)

    # pre_train knowledge manager
    print("start pre-training")
    pre_train(model, optimizer, train_loader, args)
    print("start training")
    train(model, optimizer, train_loader, args)

    # save final model
    save_models(model, params.all_restore)
コード例 #10
0
ファイル: main_eval.py プロジェクト: ryparmar/master-thesis
def instantiate_model(config, tokenizer):
    configure_devices(config)
    model = Model(config)
    optimizer = transformers.AdamW(model.parameters(),
                                   lr=config.learning_rate,
                                   weight_decay=0)
    last_epoch = 0
    epoch_avg_loss = 0
    if config.continue_training:
        state_dict = torch.load(config.continue_training, map_location='cpu')
        model.load_state_dict(state_dict['model'])
        if 'optimizer_state_dict' in state_dict:
            optimizer.load_state_dict(state_dict['optimizer_state_dict'])
        last_epoch = state_dict['epoch']
        # epoch_avg_loss = state_dict['loss']
        # del state_dict # TODO TEST
    if config.use_cuda:
        model = model.cuda()
        optimizer_to(optimizer, config.device)
        model = torch.nn.DataParallel(model, device_ids=config.devices)
    return model, optimizer, last_epoch, epoch_avg_loss
コード例 #11
0
ファイル: main.py プロジェクト: parth-collab/icp-block-mdp
def main(args):
    device = "cuda" if torch.cuda.is_available() else "cpu"

    args.work_dir = os.path.join(
        args.work_dir,
        args.domain_name + "_" + args.task_name,
        args.exp_name,
        str(args.seed),
    )
    os.makedirs(args.work_dir, exist_ok=True)
    with open(os.path.join(args.work_dir, "args.json"), "w") as f:
        json.dump(vars(args), f, sort_keys=True, indent=4)

    train_envs = [
        utils.make_env(np.random.randint(0, 255), args)
        for i in range(args.num_envs)
    ]
    eval_envs = [
        utils.make_env(np.random.randint(0, 255), args) for i in range(5)
    ]
    print("Train env backgrounds: ",
          [train_env.bg_color for train_env in train_envs])
    print("Eval env backgrounds: ",
          [eval_env.bg_color for eval_env in eval_envs])

    obs_shape = train_envs[0].observation_space.shape
    action_size = train_envs[0].action_space.shape[0]

    phi = Encoder(obs_shape, args.encoder_feature_dim).to(device)
    model = DynamicsModel(args.encoder_feature_dim, action_size).to(device)
    decoders = [
        Decoder(obs_shape, args.encoder_feature_dim).to(device)
        for i in range(args.num_envs)
    ]
    opt = torch.optim.Adam(list(phi.parameters()) + list(model.parameters()),
                           lr=args.lr)
    decoder_opt = torch.optim.Adam(np.concatenate(
        [list(decoder.parameters()) for decoder in decoders]),
                                   lr=args.lr)

    train_replay_buffer = utils.ReplayBuffer(
        obs_shape=train_envs[0].observation_space.shape,
        action_shape=train_envs[0].action_space.shape,
        capacity=args.replay_buffer_capacity,
        batch_size=args.batch_size,
        device=device,
    )
    eval_replay_buffer = utils.ReplayBuffer(
        obs_shape=train_envs[0].observation_space.shape,
        action_shape=train_envs[0].action_space.shape,
        capacity=args.replay_buffer_capacity,
        batch_size=args.batch_size,
        device=device,
    )

    logging_dict = {
        "model_error": [],
        "decoding_error": [],
        "eval_model_error": [],
        "steps": [],
    }

    # collect data across environments
    for env_id in range(args.num_envs):
        train_replay_buffer = utils.collect_random_data(
            train_envs[env_id],
            env_id,
            args.num_samples,
            train_replay_buffer,
            save_video=args.save_video,
        )
        eval_replay_buffer = utils.collect_random_data(eval_envs[env_id],
                                                       env_id,
                                                       args.num_samples,
                                                       eval_replay_buffer)

    # Train loop
    for iteration in range(args.num_iters):
        model_error = 0
        decoder_error = 0
        for i in range(args.num_envs):
            obses, actions, rewards, next_obses, not_dones = train_replay_buffer.sample(
                i)
            latent = phi(obses)
            pred_next_latent = model(latent, actions)
            true_next_latent = phi(next_obses).detach()
            error_e = F.mse_loss(pred_next_latent, true_next_latent)
            model_error += error_e

            if args.one_decoder:
                pred_next_obses = decoders[0](
                    pred_next_latent)  # only use one decoder
            else:
                pred_next_obses = decoders[i](pred_next_latent)
            decoder_error_e = F.mse_loss(pred_next_obses, next_obses)
            decoder_error += decoder_error_e

        opt.zero_grad()
        model_error.backward(retain_graph=True)
        opt.step()

        decoder_opt.zero_grad()
        decoder_error.backward()
        decoder_opt.step()
        if iteration % args.log_interval == 0:
            with torch.no_grad():
                logging_dict["steps"].append(iteration)
                logging_dict["model_error"].append(model_error.item())
                logging_dict["decoding_error"].append(decoder_error.item())
                print(
                    f"Iteration {iteration}: Mean train set model error: {model_error.mean()}, decoding error: {decoder_error.mean()}%%"
                )

                # Evaluate on test environment
                (
                    obses,
                    actions,
                    rewards,
                    next_obses,
                    not_dones,
                ) = eval_replay_buffer.sample()
                with torch.no_grad():
                    latent = phi(obses)
                    pred_next_latent = model(latent, actions)
                    true_next_latent = phi(next_obses).detach()
                    test_error = F.mse_loss(pred_next_latent, true_next_latent)
                logging_dict["eval_model_error"].append(test_error.item())
                print(f"Mean test set error: {test_error}")
            torch.save(logging_dict,
                       os.path.join(args.work_dir, "logging_dict.pt"))
コード例 #12
0
    for x in ['train', 'val']
}
dataloaders = {
    x: torch.utils.data.DataLoader(image_datasets[x],
                                   batch_size=32,
                                   shuffle=True)
    for x in ["train", "val"]
}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

encoder = Encoder(8).to(device)
model_ft = Decoder(2048, 8, 32, 64, 128, 31, 30).to(device)
criterion = nn.CrossEntropyLoss().to(device)
plist = [{
    'params': encoder.parameters(),
    'lr': 1e-5,
    "weight_decay": 1e-4
}, {
    'params': model_ft.parameters(),
    'lr': 1e-3,
    "weight_decay": 1e-4
}]

optimizer_ft = optim.Adam(plist)


def train_model(encoder, model, criterion, optimizer, num_epochs=50):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
コード例 #13
0
def train(args, logger):
    task_time = time.strftime("%Y-%m-%d %H:%M", time.localtime())
    Path("./saved_models/").mkdir(parents=True, exist_ok=True)
    Path("./pretrained_models/").mkdir(parents=True, exist_ok=True)
    MODEL_SAVE_PATH = './saved_models/'
    Pretrained_MODEL_PATH = './pretrained_models/'
    get_model_name = lambda part: f'{part}-{args.data}-{args.tasks}-{args.prefix}.pth'
    get_pretrain_model_name = lambda part: f'{part}-{args.data}-LP-{args.prefix}.pth'
    device_string = 'cuda:{}'.format(args.gpu) if torch.cuda.is_available() and args.gpu >=0 else 'cpu'
    print('Model trainging with '+device_string)
    device = torch.device(device_string)
    


    g = load_graphs(f"./data/{args.data}.dgl")[0][0]
    
    efeat_dim = g.edata['feat'].shape[1]
    nfeat_dim = efeat_dim


    train_loader, val_loader, test_loader, num_val_samples, num_test_samples = dataloader(args, g)


    encoder = Encoder(args, nfeat_dim, n_head=args.n_head, dropout=args.dropout).to(device)
    decoder = Decoder(args, nfeat_dim).to(device)
    msg2mail = Msg2Mail(args, nfeat_dim)
    fraud_sampler = frauder_sampler(g)

    optimizer = torch.optim.Adam(list(encoder.parameters()) + list(decoder.parameters()), lr=args.lr, weight_decay=args.weight_decay)
    scheduler_lr = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=40)
    if args.warmup:
        scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=1, total_epoch=3, after_scheduler=scheduler_lr)
        optimizer.zero_grad()
        optimizer.step()
    loss_fcn = torch.nn.BCEWithLogitsLoss()

    loss_fcn = loss_fcn.to(device)

    early_stopper = EarlyStopMonitor(logger=logger, max_round=args.patience, higher_better=True)

    if args.pretrain:
        logger.info(f'Loading the linkpred pretrained attention based encoder model')
        encoder.load_state_dict(torch.load(Pretrained_MODEL_PATH+get_pretrain_model_name('Encoder')))

    for epoch in range(args.n_epoch):
        # reset node state
        g.ndata['mail'] = torch.zeros((g.num_nodes(), args.n_mail, nfeat_dim+2), dtype=torch.float32) 
        g.ndata['feat'] = torch.zeros((g.num_nodes(), nfeat_dim), dtype=torch.float32) # init as zero, people can init it using others.
        g.ndata['last_update'] = torch.zeros((g.num_nodes()), dtype=torch.float32) 
        encoder.train()
        decoder.train()
        start_epoch = time.time()
        m_loss = []
        logger.info('start {} epoch, current optim lr is {}'.format(epoch, optimizer.param_groups[0]['lr']))
        for batch_idx, (input_nodes, pos_graph, neg_graph, blocks, frontier, current_ts) in enumerate(train_loader):
            

            pos_graph = pos_graph.to(device)
            neg_graph = neg_graph.to(device) if neg_graph is not None else None
            

            if not args.no_time or not args.no_pos:
                current_ts, pos_ts, num_pos_nodes = get_current_ts(args, pos_graph, neg_graph)
                pos_graph.ndata['ts'] = current_ts
            else:
                current_ts, pos_ts, num_pos_nodes = None, None, None
            
            _ = dgl.add_reverse_edges(neg_graph) if neg_graph is not None else None
            emb, _ = encoder(dgl.add_reverse_edges(pos_graph), _, num_pos_nodes)
            if batch_idx != 0:
                if 'LP' not in args.tasks and args.balance:
                    neg_graph = fraud_sampler.sample_fraud_event(g, args.bs//5, current_ts.max().cpu()).to(device)
                logits, labels = decoder(emb, pos_graph, neg_graph)

                loss = loss_fcn(logits, labels)
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                m_loss.append(loss.item())


            # MSG Passing
            with torch.no_grad():
                mail = msg2mail.gen_mail(args, emb, input_nodes, pos_graph, frontier, 'train')

                if not args.no_time:
                    g.ndata['last_update'][pos_graph.ndata[dgl.NID][:num_pos_nodes]] = pos_ts.to('cpu')
                g.ndata['feat'][pos_graph.ndata[dgl.NID]] = emb.to('cpu')
                g.ndata['mail'][input_nodes] = mail
            if batch_idx % 100 == 1:
                gpu_mem = torch.cuda.max_memory_allocated() / 1.074e9 if torch.cuda.is_available() and args.gpu >= 0 else 0
                torch.cuda.empty_cache()
                mem_perc = psutil.virtual_memory().percent
                cpu_perc = psutil.cpu_percent(interval=None)
                output_string = f'Epoch {epoch} | Step {batch_idx}/{len(train_loader)} | CPU {cpu_perc:.1f}% | Sys Mem {mem_perc:.1f}% | GPU Mem {gpu_mem:.4f}GB '
                
                output_string += f'| {args.tasks} Loss {np.mean(m_loss):.4f}'

                logger.info(output_string)

        total_epoch_time = time.time() - start_epoch
        logger.info(' training epoch: {} took {:.4f}s'.format(epoch, total_epoch_time))
        val_ap, val_auc, val_acc, val_loss = eval_epoch(args, logger, g, val_loader, encoder, decoder, msg2mail, loss_fcn, device, num_val_samples)
        logger.info('Val {} Task | ap: {:.4f} | auc: {:.4f} | acc: {:.4f} | Loss: {:.4f}'.format(args.tasks, val_ap, val_auc, val_acc, val_loss))

        if args.warmup:
            scheduler_warmup.step(epoch)
        else:
            scheduler_lr.step()

        early_stopper_metric = val_ap if 'LP' in args.tasks else val_auc

        if early_stopper.early_stop_check(early_stopper_metric):
            logger.info('No improvement over {} epochs, stop training'.format(early_stopper.max_round))
            logger.info(f'Loading the best model at epoch {early_stopper.best_epoch}')
            encoder.load_state_dict(torch.load(MODEL_SAVE_PATH+get_model_name('Encoder')))
            decoder.load_state_dict(torch.load(MODEL_SAVE_PATH+get_model_name('Decoder')))

            test_result = [early_stopper.best_ap, early_stopper.best_auc, early_stopper.best_acc, early_stopper.best_loss]
            break

        test_ap, test_auc, test_acc, test_loss = eval_epoch(args, logger, g, test_loader, encoder, decoder, msg2mail, loss_fcn, device, num_test_samples)
        logger.info('Test {} Task | ap: {:.4f} | auc: {:.4f} | acc: {:.4f} | Loss: {:.4f}'.format(args.tasks, test_ap, test_auc, test_acc, test_loss))
        test_result = [test_ap, test_auc, test_acc, test_loss]

        if early_stopper.best_epoch == epoch: 
            early_stopper.best_ap = test_ap
            early_stopper.best_auc = test_auc
            early_stopper.best_acc = test_acc
            early_stopper.best_loss = test_loss
            logger.info(f'Saving the best model at epoch {early_stopper.best_epoch}')
            torch.save(encoder.state_dict(), MODEL_SAVE_PATH+get_model_name('Encoder'))
            torch.save(decoder.state_dict(), MODEL_SAVE_PATH+get_model_name('Decoder'))
コード例 #14
0
ファイル: train.py プロジェクト: 3secondz-lab/asurada
def main():
    global epochs_since_improvement, best_loss_tr

    encoder = Encoder()
    decoder = DecoderWithAttention(encoder_dim, lstm_input_dim, decoder_dim,
                                   attention_dim, output_dim)

    encoder_optimizer = torch.optim.Adam(params=filter(
        lambda p: p.requires_grad, encoder.parameters()),
                                         lr=encoder_lr)
    decoder_optimizer = torch.optim.Adam(params=filter(
        lambda p: p.requires_grad, decoder.parameters()),
                                         lr=decoder_lr)
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    trainLoader = torch.utils.data.DataLoader(Dataset(driver, circuit_tr,
                                                      curvatureLength,
                                                      historyLength,
                                                      predLength),
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=workers,
                                              pin_memory=True)

    cMean_tr = trainLoader.dataset.cMean
    cStd_tr = trainLoader.dataset.cStd
    vMean_tr = trainLoader.dataset.vMean
    vStd_tr = trainLoader.dataset.vStd
    aMean_tr = trainLoader.dataset.aMean
    aStd_tr = trainLoader.dataset.aStd

    validLoader = torch.utils.data.DataLoader(Dataset(driver,
                                                      circuit_vl,
                                                      curvatureLength,
                                                      historyLength,
                                                      predLength,
                                                      cMean=cMean_tr,
                                                      cStd=cStd_tr,
                                                      vMean=vMean_tr,
                                                      vStd=vStd_tr,
                                                      aMean=aMean_tr,
                                                      aStd=aStd_tr),
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=workers,
                                              pin_memory=True)

    print('Training version.{} (A->V)'.format(vNumber))
    print('Training data ({} - {})'.format(driver, circuit_tr))
    print('Validation data ({} - {})'.format(driver, circuit_vl))
    print('curvature len {}'.format(curvatureLength))
    print('history len {}'.format(historyLength))
    print('pred len {}'.format(predLength))
    print('hiddenDimension {}'.format(hiddenDimension))

    print('\nTraining...\n')

    for epoch in tqdm(range(start_epoch, epochs)):

        loss, vMape, vRmse, vCorr, aCorr = train(
            trainLoader=trainLoader,
            encoder=encoder,
            decoder=decoder,
            criterion=criterion,
            encoder_optimizer=encoder_optimizer,
            decoder_optimizer=decoder_optimizer,
            epoch=epoch)

        writer.add_scalars('Loss', {'tr': loss}, epoch)
        writer.add_scalars('MAPE', {'tr': vMape}, epoch)
        writer.add_scalars('RMSE', {'tr': vRmse}, epoch)
        writer.add_scalars('vCorr', {'tr': vCorr}, epoch)
        writer.add_scalars('aCorr', {'tr': aCorr}, epoch)

        is_best = loss < best_loss_tr
        best_loss_tr = min(loss, best_loss_tr)
        if not is_best:
            epochs_since_improvement += 1
            print(
                '\nEpoch {} Epoch Epochs since last improvement (unit: 100): {}\n'
                .format(epoch, epochs_since_improvement))
        else:
            epochs_since_improvement = 0

        if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0:
            adjust_learning_rate(epoch, encoder_optimizer, 0.8)
            adjust_learning_rate(epoch, decoder_optimizer, 0.8)

        if epoch % 5 == 0:
            loss_vl, vMape_vl, vRmse_vl, vCorr_vl, aCorr_vl = validate(
                validLoader=validLoader,
                encoder=encoder,
                decoder=decoder,
                criterion=criterion)
            writer.add_scalars('Loss', {'vl': loss_vl}, epoch)
            writer.add_scalars('MAPE', {'vl': vMape_vl}, epoch)
            writer.add_scalars('RMSE', {'vl': vRmse_vl}, epoch)
            writer.add_scalars('vCorr', {'vl': vCorr_vl}, epoch)
            writer.add_scalars('aCorr', {'vl': aCorr_vl}, epoch)

        if epoch % 10 == 0:
            save_checkpoint(chptFolderPath, encoder, decoder, epoch, cMean_tr,
                            cStd_tr, vMean_tr, vStd_tr, aMean_tr, aStd_tr,
                            curvatureLength, historyLength)
    writer.close()
コード例 #15
0
def train(config, encoder_in = None, decoder_in = None):
    
    train_data, word2index, tag2index, intent2index = preprocessing(config.file_path,config.max_length)
    
    if train_data==None:
        print("Please check your data or its path")
        return
    if encoder_in != None:
        encoder = encoder_in
        decoder = decoder_in
    else:
        encoder = Encoder(len(word2index),config.embedding_size,config.hidden_size)
        decoder = Decoder(len(tag2index),len(intent2index),len(tag2index)//3,config.hidden_size*2)
        if USE_CUDA:
            encoder = encoder.cuda()
            decoder = decoder.cuda()

        encoder.init_weights()
        decoder.init_weights()

    loss_function_1 = nn.CrossEntropyLoss(ignore_index=0)
    loss_function_2 = nn.CrossEntropyLoss()
    enc_optim= optim.Adam(encoder.parameters(), lr=config.learning_rate)
    dec_optim = optim.Adam(decoder.parameters(),lr=config.learning_rate)
    
    for step in range(config.step_size):
        losses=[]
        for i, batch in enumerate(getBatch(config.batch_size,train_data)):
            x,y_1,y_2 = zip(*batch) # sin,sout,intent
            x = torch.cat(x)
            tag_target = torch.cat(y_1)
            intent_target = torch.cat(y_2)
            x_mask = torch.cat([Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))) for t in x]).view(config.batch_size,-1)
            y_1_mask = torch.cat([Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))).cuda() if USE_CUDA else Variable(torch.ByteTensor(tuple(map(lambda s: s ==0, t.data)))) for t in tag_target]).view(config.batch_size,-1)

            encoder.zero_grad()
            decoder.zero_grad()

            output, hidden_c = encoder(x,x_mask)
            start_decode = Variable(torch.LongTensor([[word2index['<SOS>']]*config.batch_size])).cuda().transpose(1,0) if USE_CUDA else Variable(torch.LongTensor([[word2index['<SOS>']]*config.batch_size])).transpose(1,0)

            tag_score, intent_score = decoder(start_decode,hidden_c,output,x_mask)

            loss_1 = loss_function_1(tag_score,tag_target.view(-1))
            loss_2 = loss_function_2(intent_score,intent_target)

            loss = loss_1+loss_2
            losses.append(loss.data.cpu().numpy() if USE_CUDA else loss.data.numpy())
            loss.backward()

            torch.nn.utils.clip_grad_norm(encoder.parameters(), 5.0)
            torch.nn.utils.clip_grad_norm(decoder.parameters(), 5.0)

            enc_optim.step()
            dec_optim.step()

            if i % 100==0:
                print("Step",step," epoch",i," : ",np.mean(losses))
                losses=[]

        t = Check()
        t.test(encoder,decoder)
        count = t.test_error_count
        rate = t.test_error_rate

        if not os.path.exists(config.model_dir):
            os.makedirs(config.model_dir)

        torch.save(decoder, os.path.join(config.model_dir, str(count)+'_'+str(rate)+'_'+'decoder.pkl'))
        torch.save(encoder, os.path.join(config.model_dir, str(count)+'_'+str(rate)+'_'+'encoder.pkl'))
    
    # if not os.path.exists(config.model_dir):
    #     os.makedirs(config.model_dir)

    # torch.save(decoder.state_dict(),os.path.join(config.model_dir,'jointnlu-decoder.pkl'))
    # torch.save(encoder.state_dict(),os.path.join(config.model_dir, 'jointnlu-encoder.pkl'))
    # torch.save(decoder,os.path.join(config.model_dir,'jointnlu-decoder.pkl'))
    # torch.save(encoder,os.path.join(config.model_dir, 'jointnlu-encoder.pkl'))
    print("Train Complete!")
コード例 #16
0
ファイル: train.py プロジェクト: vichu259/image_captioing
def main(args):
    """
    Training and validation.
    """

    global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map

    with open(args.vocab_path, 'rb') as f:
        word_map = pickle.load(f)

    # Initialize / load checkpoint
    if checkpoint is None:
        decoder = DecoderWithAttention(attention_dim=attention_dim,
                                       embed_dim=emb_dim,
                                       decoder_dim=decoder_dim,
                                       vocab_size=len(word_map),
                                       dropout=dropout)
        decoder_optimizer = torch.optim.Adam(params=filter(
            lambda p: p.requires_grad, decoder.parameters()),
                                             lr=decoder_lr)
        encoder = Encoder()
        encoder.fine_tune(fine_tune_encoder)
        encoder_optimizer = torch.optim.Adam(
            params=filter(lambda p: p.requires_grad, encoder.parameters()),
            lr=encoder_lr) if fine_tune_encoder else None

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        best_bleu4 = checkpoint['bleu-4']
        decoder = checkpoint['decoder']
        decoder_optimizer = checkpoint['decoder_optimizer']
        encoder = checkpoint['encoder']
        encoder_optimizer = checkpoint['encoder_optimizer']
        if fine_tune_encoder is True and encoder_optimizer is None:
            encoder.fine_tune(fine_tune_encoder)
            encoder_optimizer = torch.optim.Adam(params=filter(
                lambda p: p.requires_grad, encoder.parameters()),
                                                 lr=encoder_lr)

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    criterion = nn.CrossEntropyLoss()
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    train_loader = get_loader(args.train_image_dir,
                              args.caption_path,
                              word_map,
                              transform,
                              args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    val_loader = get_loader(args.val_image_dir,
                            args.caption_path,
                            word_map,
                            transform,
                            args.batch_size,
                            shuffle=True,
                            num_workers=args.num_workers)

    for epoch in range(start_epoch, epochs):
        if epochs_since_improvement == 20:
            break
        if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0:
            adjust_learning_rate(decoder_optimizer, 0.8)
            if fine_tune_encoder:
                adjust_learning_rate(encoder_optimizer, 0.8)

        train(train_loader=train_loader,
              encoder=encoder,
              decoder=decoder,
              criterion=criterion,
              encoder_optimizer=encoder_optimizer,
              decoder_optimizer=decoder_optimizer,
              epoch=epoch)

        recent_bleu4 = validate(val_loader=val_loader,
                                encoder=encoder,
                                decoder=decoder,
                                criterion=criterion)

        is_best = recent_bleu4 > best_bleu4
        best_bleu4 = max(recent_bleu4, best_bleu4)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        save_checkpoint(data_name, epoch, epochs_since_improvement, encoder,
                        decoder, encoder_optimizer, decoder_optimizer,
                        recent_bleu4, is_best)
コード例 #17
0
def train_dynamics(env, args, writer=None):
    """
    Trains the Dynamics module. Supervised.

    Arguments:
    env: the initialized environment (rllab/gym)
    args: input arguments
    writer: initialized summary writer for tensorboard
    """
    args.action_space = env.action_space

    # Initialize models
    enc = Encoder(env.observation_space.shape[0],
                  args.dim,
                  use_conv=args.use_conv)
    dec = Decoder(env.observation_space.shape[0],
                  args.dim,
                  use_conv=args.use_conv)
    d_module = D_Module(env.action_space.shape[0], args.dim, args.discrete)

    if args.from_checkpoint is not None:
        results_dict = torch.load(args.from_checkpoint)
        enc.load_state_dict(results_dict['enc'])
        dec.load_state_dict(results_dict['dec'])
        d_module.load_state_dict(results_dict['d_module'])

    all_params = chain(enc.parameters(), dec.parameters(),
                       d_module.parameters())

    if args.transfer:
        for p in enc.parameters():
            p.requires_grad = False

        for p in dec.parameters():
            p.requires_grad = False
        all_params = d_module.parameters()

    optimizer = torch.optim.Adam(all_params,
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    if args.gpu:
        enc = enc.cuda()
        dec = dec.cuda()
        d_module = d_module.cuda()

    # Initialize datasets
    val_loader = None
    train_dataset = DynamicsDataset(args.train_set,
                                    args.train_size,
                                    batch=args.train_batch,
                                    rollout=args.rollout)
    val_dataset = DynamicsDataset(args.test_set,
                                  5000,
                                  batch=args.test_batch,
                                  rollout=args.rollout)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.num_workers)

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.num_workers)

    results_dict = {
        'dec_losses': [],
        'forward_losses': [],
        'inverse_losses': [],
        'total_losses': [],
        'enc': None,
        'dec': None,
        'd_module': None,
        'd_init': None,
        'args': args
    }

    total_action_taken = 0
    correct_predicted_a_hat = 0

    # create the mask here for re-weighting
    dec_mask = None
    if args.dec_mask is not None:
        dec_mask = torch.ones(9)
        game_vocab = dict([
            (b, a)
            for a, b in enumerate(sorted(env.game.all_possible_features()))
        ])
        dec_mask[game_vocab['Agent']] = args.dec_mask
        dec_mask[game_vocab['Goal']] = args.dec_mask
        dec_mask = dec_mask.expand(args.batch_size, args.maze_length,
                                   args.maze_length, 9).contiguous().view(-1)
        dec_mask = Variable(dec_mask, requires_grad=False)
        if args.gpu:
            dec_mask = dec_mask.cuda()

    for epoch in range(1, args.num_epochs + 1):
        enc.train()
        dec.train()
        d_module.train()

        if args.framework == "mazebase":
            d_init.train()

        # for measuring the accuracy
        train_acc = 0
        current_epoch_actions = 0
        current_epoch_predicted_a_hat = 0

        start = time.time()
        for i, (states, target_actions) in enumerate(train_loader):

            optimizer.zero_grad()

            if args.framework != "mazebase":
                forward_loss, inv_loss, dec_loss, recon_loss, model_loss, _, _ = forward_planning(
                    i, states, target_actions, enc, dec, d_module, args)
            else:
                forward_loss, inv_loss, dec_loss, recon_loss, model_loss, current_epoch_predicted_a_hat, current_epoch_actions = multiple_forward(
                    i, states, target_actions, enc, dec, d_module, args,
                    d_init, dec_mask)

            loss = forward_loss + args.inv_loss_coef * inv_loss + \
                        args.dec_loss_coef * dec_loss

            if i % args.log_interval == 0:
                log(
                    'Epoch [{}/{}]\tIter [{}/{}]\t'.format(
                        epoch, args.num_epochs, i+1, len(
                        train_dataset)//args.batch_size) + \
                    'Time: {:.2f}\t'.format(time.time() - start) + \
                    'Decoder Loss: {:.2f}\t'.format(dec_loss.data[0]) + \
                    'Forward Loss: {:.2f}\t'.format(forward_loss.data[0] ) + \
                    'Inverse Loss: {:.2f}\t'.format(inv_loss.data[0]) + \
                    'Loss: {:.2f}\t'.format(loss.data[0]))

                results_dict['dec_losses'].append(dec_loss.data[0])
                results_dict['forward_losses'].append(forward_loss.data[0])
                results_dict['inverse_losses'].append(inv_loss.data[0])
                results_dict['total_losses'].append(loss.data[0])

                # write the summaries here
                if writer:
                    writer.add_scalar('dynamics/total_loss', loss.data[0],
                                      epoch)
                    writer.add_scalar('dynamics/decoder', dec_loss.data[0],
                                      epoch)
                    writer.add_scalar('dynamics/reconstruction_loss',
                                      recon_loss.data[0], epoch)
                    writer.add_scalar('dynamics/next_state_prediction_loss',
                                      model_loss.data[0], epoch)
                    writer.add_scalar('dynamics/inv_loss', inv_loss.data[0],
                                      epoch)
                    writer.add_scalar('dynamics/forward_loss',
                                      forward_loss.data[0], epoch)

                    writer.add_scalars(
                        'dynamics/all_losses', {
                            "total_loss": loss.data[0],
                            "reconstruction_loss": recon_loss.data[0],
                            "next_state_prediction_loss": model_loss.data[0],
                            "decoder_loss": dec_loss.data[0],
                            "inv_loss": inv_loss.data[0],
                            "forward_loss": forward_loss.data[0],
                        }, epoch)

            loss.backward()

            correct_predicted_a_hat += current_epoch_predicted_a_hat
            total_action_taken += current_epoch_actions

            # does it not work at all without grad clipping ?
            torch.nn.utils.clip_grad_norm(all_params, args.max_grad_norm)
            optimizer.step()

            # maybe add the generated image to add the logs
            # writer.add_image()

        # Run validation
        if val_loader is not None:
            enc.eval()
            dec.eval()
            d_module.eval()
            forward_loss, inv_loss, dec_loss = 0, 0, 0
            for i, (states, target_actions) in enumerate(val_loader):
                f_loss, i_loss, d_loss, _, _, _, _ = forward_planning(
                    i, states, target_actions, enc, dec, d_module, args)
                forward_loss += f_loss
                inv_loss += i_loss
                dec_loss += d_loss
            loss = forward_loss + args.inv_loss_coef * inv_loss + \
                    args.dec_loss_coef * dec_loss
            if writer:
                writer.add_scalar('val/forward_loss', forward_loss.data[0] / i,
                                  epoch)
                writer.add_scalar('val/inverse_loss', inv_loss.data[0] / i,
                                  epoch)
                writer.add_scalar('val/decoder_loss', dec_loss.data[0] / i,
                                  epoch)
            log(
                '[Validation]\t' + \
                'Decoder Loss: {:.2f}\t'.format(dec_loss.data[0] / i) + \
                'Forward Loss: {:.2f}\t'.format(forward_loss.data[0] / i) + \
                'Inverse Loss: {:.2f}\t'.format(inv_loss.data[0] / i) + \
                'Loss: {:.2f}\t'.format(loss.data[0] / i))
        if epoch % args.checkpoint == 0:
            results_dict['enc'] = enc.state_dict()
            results_dict['dec'] = dec.state_dict()
            results_dict['d_module'] = d_module.state_dict()
            if args.framework == "mazebase":
                results_dict['d_init'] = d_init.state_dict()
            torch.save(
                results_dict,
                os.path.join(args.out, 'dynamics_module_epoch%s.pt' % epoch))
            log('Saved model %s' % epoch)

    results_dict['enc'] = enc.state_dict()
    results_dict['dec'] = dec.state_dict()
    results_dict['d_module'] = d_module.state_dict()
    torch.save(results_dict,
               os.path.join(args.out, 'dynamics_module_epoch%s.pt' % epoch))
    print(os.path.join(args.out, 'dynamics_module_epoch%s.pt' % epoch))
コード例 #18
0
def eval_reward(args, shared_model, writer_dir=None):
    """
	For evaluation

	Arguments:
	- writer: the tensorboard summary writer directory (note: can't get it working directly with the SummaryWriter object)
	"""
    writer = SummaryWriter(log_dir=os.path.join(
        writer_dir, 'eval')) if writer_dir is not None else None

    # current episode stats
    episode_reward = episode_value_mse = episode_td_error = episode_pg_loss = episode_length = 0

    # global stats
    i_episode = 0
    total_episode = total_steps = 0
    num_goals_achieved = 0

    # intilialize the env and models
    torch.manual_seed(args.seed)
    env = create_env(args.env_name, framework=args.framework, args=args)
    set_seed(args.seed, env, args.framework)

    shared_enc, shared_dec, shared_d_module, shared_r_module = shared_model

    enc = Encoder(env.observation_space.shape[0],
                  args.dim,
                  use_conv=args.use_conv)
    dec = Decoder(env.observation_space.shape[0],
                  args.dim,
                  use_conv=args.use_conv)
    d_module = D_Module(env.action_space.shape[0], args.dim, args.discrete)
    r_module = R_Module(env.action_space.shape[0],
                        args.dim,
                        discrete=args.discrete,
                        baseline=False,
                        state_space=env.observation_space.shape[0])

    all_params = chain(enc.parameters(), dec.parameters(),
                       d_module.parameters(), r_module.parameters())

    if args.from_checkpoint is not None:
        model_state, _ = torch.load(args.from_checkpoint)
        model.load_state_dict(model_state)

    # set the model to evaluation mode
    enc.eval()
    dec.eval()
    d_module.eval()
    r_module.eval()

    # reset the state
    state = env.reset()
    state = Variable(torch.from_numpy(state).float())

    start = time.time()

    while total_episode < args.num_episodes:

        # Sync with the shared model
        r_module.load_state_dict(shared_r_module.state_dict())
        d_module.load_state_dict(shared_d_module.state_dict())
        enc.load_state_dict(shared_enc.state_dict())
        dec.load_state_dict(shared_dec.state_dict())

        # reset stuff
        cd_p = Variable(torch.zeros(1, args.lstm_dim))
        hd_p = Variable(torch.zeros(1, args.lstm_dim))

        # for the reward
        cr_p = Variable(torch.zeros(1, args.lstm_dim))
        hr_p = Variable(torch.zeros(1, args.lstm_dim))

        i_episode += 1
        episode_length = 0
        episode_reward = 0
        args.local = True
        args.d = 0
        succ, _, episode_reward, episode_length = test(1, args, args, args,
                                                       d_module, r_module, enc)
        log("Eval: succ {:.2f}, reward {:.2f}, length {:.2f}".format(
            succ, episode_reward, episode_length))
        # Episode has ended, write the summaries here
        if writer_dir is not None:
            # current episode stats
            writer.add_scalar('eval/episode_reward', episode_reward, i_episode)
            writer.add_scalar('eval/episode_length', episode_length, i_episode)
            writer.add_scalar('eval/success', succ, i_episode)

        time.sleep(args.eval_every)
        print("sleep")
コード例 #19
0
def main(args):

    # ==============================
    # Create some folders or files for saving
    # ==============================

    if not os.path.exists(args.root_folder):
        os.mkdir(args.root_folder)

    loss_path = args.loss_path
    mertics_path = args.mertics_path
    epoch_model_path = args.epoch_model_path
    best_model_path = args.best_model_path
    generated_captions_path = args.generated_captions_folder_path
    sentences_show_path = args.sentences_show_path

    # Transform the format of images
    # This function in utils.general_tools.py
    train_transform = get_train_transform()
    val_transform = get_val_trainsform()

    # Load vocabulary
    print("*** Load Vocabulary ***")
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Create data sets
    # This function in data_load.py
    train_data = train_load(root=args.train_image_dir,
                            json=args.train_caption_path,
                            vocab=vocab,
                            transform=train_transform,
                            batch_size=args.batch_size,
                            shuffle=True,
                            num_workers=args.num_workers)

    val_data = val_load(root=args.val_image_dir,
                        json=args.val_caption_path,
                        transform=val_transform,
                        batch_size=1,
                        shuffle=False,
                        num_workers=args.num_workers)

    # Build model
    encoder = Encoder(args.hidden_dim, args.fine_tuning).to(device)
    decoder = Decoder(args.embedding_dim, args.hidden_dim, vocab, len(vocab),
                      args.max_seq_length).to(device)

    # Select loss function
    criterion = nn.CrossEntropyLoss().to(device)

    if args.fine_tuning == True:
        params = list(decoder.parameters()) + list(encoder.parameters())
        optimizer = torch.optim.Adam(params, lr=args.fine_tuning_lr)
    else:
        params = decoder.parameters()
        optimizer = torch.optim.Adam(params, lr=args.fine_tuning_lr)

    # Load pretrained model
    if args.resume == True:
        checkpoint = torch.load(best_model_path)
        encoder.load_state_dict(checkpoint['encoder'])
        decoder.load_state_dict(checkpoint['decoder'])
        if args.fine_tuning == False:
            optimizer.load_state_dict(checkpoint['optimizer'])
        start_epoch = checkpoint['epoch'] + 1
        best_score = checkpoint['best_score']
        best_epoch = checkpoint['best_epoch']

    # New epoch and score
    else:
        start_epoch = 1
        best_score = 0
        best_epoch = 0

    for epoch in range(start_epoch, 10000):

        print("-" * 20)
        print("epoch:{}".format(epoch))

        # Adjust learning rate when the difference between epoch and best epoch is multiple of 3
        if (epoch - best_epoch) > 0 and (epoch - best_epoch) % 4 == 0:
            # This function in utils.general_tools.py
            adjust_lr(optimizer, args.shrink_factor)
        if (epoch - best_epoch) > 10:
            break
            print("*** Training complete ***")

        # =============
        # Training
        # =============

        print(" *** Training ***")
        decoder.train()
        encoder.train()
        total_step = len(train_data)
        epoch_loss = 0
        for (images, captions, lengths, img_ids) in tqdm(train_data):
            images = images.to(device)
            captions = captions.to(device)
            # Why do lengths cut 1 and the first dimension of captions from 1
            # Because we need to ignore the begining symbol <start>
            lengths = list(np.array(lengths) - 1)

            targets = pack_padded_sequence(captions[:, 1:],
                                           lengths,
                                           batch_first=True)[0]
            features = encoder(images)
            predictions = decoder(features, captions, lengths)
            predictions = pack_padded_sequence(predictions,
                                               lengths,
                                               batch_first=True)[0]

            loss = criterion(predictions, targets)
            epoch_loss += loss.item()
            decoder.zero_grad()
            encoder.zero_grad()
            loss.backward()
            optimizer.step()

        # Save loss information
        # This function in utils.save_tools.py
        save_loss(round(epoch_loss / total_step, 3), epoch, loss_path)

        # =============
        # Evaluating
        # =============

        print("*** Evaluating ***")
        encoder.eval()
        decoder.eval()
        generated_captions = []
        for image, img_id in tqdm(val_data):

            image = image.to(device)
            img_id = img_id[0]

            features = encoder(image)
            sentence = decoder.generate(features)
            sentence = ' '.join(sentence)
            item = {'image_id': int(img_id), 'caption': sentence}
            generated_captions.append(item)
            j = random.randint(1, 100)

        print('*** Computing metrics ***')

        # Save current generated captions
        # This function in utils.save_tools.py

        captions_json_path = save_generated_captions(generated_captions, epoch,
                                                     generated_captions_path,
                                                     args.fine_tuning)

        # Compute score of metrics
        # This function in utils.general_tools.py
        results = coco_metrics(args.val_caption_path, captions_json_path,
                               epoch, sentences_show_path)

        # Save metrics results
        # This function in utils.save_tools.py
        epoch_score = save_metrics(results, epoch, mertics_path)

        # Update the best score
        if best_score < epoch_score:

            best_score = epoch_score
            best_epoch = epoch

            save_best_model(encoder, decoder, optimizer, epoch, best_score,
                            best_epoch, best_model_path)

        print("*** Best score:{} Best epoch:{} ***".format(
            best_score, best_epoch))
        # Save every epoch model
        save_epoch_model(encoder, decoder, optimizer, epoch, best_score,
                         best_epoch, epoch_model_path, args.fine_tuning)
コード例 #20
0
def main(args):

    #create a writer
    writer = SummaryWriter('loss_plot_' + args.mode, comment='test')
    # Create model directory
    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # Image preprocessing, normalization for the pretrained resnet
    transform = T.Compose([
        T.Resize((224, 224)),
        T.ToTensor(),
        T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Load vocabulary wrapper
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    val_length = len(os.listdir(args.image_dir_val))

    # Build data loader
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    data_loader_val = get_loader(args.image_dir_val,
                                 args.caption_path_val,
                                 vocab,
                                 transform,
                                 args.batch_size,
                                 shuffle=True,
                                 num_workers=args.num_workers)

    # Build the model
    # if no-attention model is chosen:
    if args.model_type == 'no_attention':
        encoder = Encoder(args.embed_size).to(device)
        decoder = Decoder(args.embed_size, args.hidden_size, len(vocab),
                          args.num_layers).to(device)
        criterion = nn.CrossEntropyLoss()

    # if attention model is chosen:
    elif args.model_type == 'attention':
        encoder = EncoderAtt(encoded_image_size=9).to(device)
        decoder = DecoderAtt(vocab, args.encoder_dim, args.hidden_size,
                             args.attention_dim, args.embed_size,
                             args.dropout_ratio, args.alpha_c).to(device)

    # if transformer model is chosen:
    elif args.model_type == 'transformer':
        model = Transformer(len(vocab), args.embed_size,
                            args.transformer_layers, 8,
                            args.dropout_ratio).to(device)

        encoder_optimizer = torch.optim.Adam(params=filter(
            lambda p: p.requires_grad, model.encoder.parameters()),
                                             lr=args.learning_rate_enc)
        decoder_optimizer = torch.optim.Adam(params=filter(
            lambda p: p.requires_grad, model.decoder.parameters()),
                                             lr=args.learning_rate_dec)
        criterion = nn.CrossEntropyLoss(ignore_index=vocab.word2idx['<pad>'])

    else:
        print('Select model_type attention or no_attention')

    # if model is not transformer: additional step in encoder is needed: freeze lower layers of resnet if args.fine_tune == True
    if args.model_type != 'transformer':
        decoder_optimizer = torch.optim.Adam(params=filter(
            lambda p: p.requires_grad, decoder.parameters()),
                                             lr=args.learning_rate_dec)
        encoder.fine_tune(args.fine_tune)
        encoder_optimizer = torch.optim.Adam(params=filter(
            lambda p: p.requires_grad, encoder.parameters()),
                                             lr=args.learning_rate_enc)

    # initialize lists to store results:
    loss_train = []
    loss_val = []
    loss_val_epoch = []
    loss_train_epoch = []

    bleu_res_list = []
    cider_res_list = []
    rouge_res_list = []

    results = {}

    # calculate total steps fot train and validation
    total_step = len(data_loader)
    total_step_val = len(data_loader_val)

    #For each epoch
    for epoch in tqdm(range(args.num_epochs)):

        loss_val_iter = []
        loss_train_iter = []

        # set model to train mode
        if args.model_type != 'transformer':
            encoder.train()
            decoder.train()
        else:
            model.train()

        # for each entry in data_loader
        for i, (images, captions, lengths) in tqdm(enumerate(data_loader)):
            # load images and captions to device
            images = images.to(device)
            captions = captions.to(device)
            # Forward, backward and optimize

            # forward and backward path is different dependent of model type:
            if args.model_type == 'no_attention':
                # get features from encoder
                features = encoder(images)
                # pad targergets to a length
                targets = pack_padded_sequence(captions,
                                               lengths,
                                               batch_first=True)[0]
                # get output from decoder
                outputs = decoder(features, captions, lengths)
                # calculate loss
                loss = criterion(outputs, targets)

                # optimizer and backward step
                decoder_optimizer.zero_grad()
                decoder_optimizer.zero_grad()
                loss.backward()
                decoder_optimizer.step()
                encoder_optimizer.step()

            elif args.model_type == 'attention':

                # get features from encoder
                features = encoder(images)

                # get targets - starting from 2 word in captions
                #(the model not sequantial, so targets are predicted in parallel- no need to predict first word in captions)

                targets = captions[:, 1:]
                # decode length = length-1 for each caption
                decode_lengths = [length - 1 for length in lengths]
                #flatten targets
                targets = targets.reshape(targets.shape[0] * targets.shape[1])

                sampled_caption = []

                # get scores and alphas from decoder
                scores, alphas = decoder(features, captions, decode_lengths)

                scores = scores.view(-1, scores.shape[-1])

                #predicted = prediction with maximum score
                _, predicted = torch.max(scores, dim=1)

                # calculate loss
                loss = decoder.loss(scores, targets, alphas)

                # optimizer and backward step
                decoder_optimizer.zero_grad()
                decoder_optimizer.zero_grad()
                loss.backward()
                decoder_optimizer.step()
                encoder_optimizer.step()

            elif args.model_type == 'transformer':

                # input is captions without last word
                trg_input = captions[:, :-1]
                # create mask
                trg_mask = create_masks(trg_input)

                # get scores from model
                scores = model(images, trg_input, trg_mask)
                scores = scores.view(-1, scores.shape[-1])

                # get targets - starting from 2 word in captions
                targets = captions[:, 1:]

                #predicted = prediction with maximum score
                _, predicted = torch.max(scores, dim=1)

                # calculate loss
                loss = criterion(
                    scores,
                    targets.reshape(targets.shape[0] * targets.shape[1]))

                #forward and backward path
                decoder_optimizer.zero_grad()
                decoder_optimizer.zero_grad()
                loss.backward()
                decoder_optimizer.step()
                encoder_optimizer.step()

            else:
                print('Select model_type attention or no_attention')

            # append results to loss lists and writer
            loss_train_iter.append(loss.item())
            loss_train.append(loss.item())
            writer.add_scalar('Loss/train/iterations', loss.item(), i + 1)

            # Print log info
            if i % args.log_step == 0:
                print(
                    'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'
                    .format(epoch, args.num_epochs, i, total_step, loss.item(),
                            np.exp(loss.item())))

        print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}'.
              format(epoch, args.num_epochs, i, total_step, loss.item(),
                     np.exp(loss.item())))

        #append mean of last 10 batches as approximate epoch loss
        loss_train_epoch.append(np.mean(loss_train_iter[-10:]))

        writer.add_scalar('Loss/train/epoch', np.mean(loss_train_iter[-10:]),
                          epoch + 1)

        #save model
        if args.model_type != 'transformer':
            torch.save(
                decoder.state_dict(),
                os.path.join(
                    args.model_path,
                    'decoder_' + args.mode + '_{}.ckpt'.format(epoch + 1)))
            torch.save(
                encoder.state_dict(),
                os.path.join(
                    args.model_path,
                    'decoder_' + args.mode + '_{}.ckpt'.format(epoch + 1)))

        else:
            torch.save(
                model.state_dict(),
                os.path.join(
                    args.model_path,
                    'model_' + args.mode + '_{}.ckpt'.format(epoch + 1)))
        np.save(
            os.path.join(args.predict_json,
                         'loss_train_temp_' + args.mode + '.npy'), loss_train)

        #validate model:
        # set model to eval mode:
        if args.model_type != 'transformer':
            encoder.eval()
            decoder.eval()
        else:
            model.eval()
        total_step = len(data_loader_val)

        # set no_grad mode:
        with torch.no_grad():
            # for each entry in data_loader
            for i, (images, captions,
                    lengths) in tqdm(enumerate(data_loader_val)):
                targets = pack_padded_sequence(captions,
                                               lengths,
                                               batch_first=True)[0]
                images = images.to(device)
                captions = captions.to(device)

                # forward and backward path is different dependent of model type:
                if args.model_type == 'no_attention':
                    features = encoder(images)
                    outputs = decoder(features, captions, lengths)
                    loss = criterion(outputs, targets)

                elif args.model_type == 'attention':

                    features = encoder(images)
                    sampled_caption = []
                    targets = captions[:, 1:]
                    decode_lengths = [length - 1 for length in lengths]
                    targets = targets.reshape(targets.shape[0] *
                                              targets.shape[1])

                    scores, alphas = decoder(features, captions,
                                             decode_lengths)

                    _, predicted = torch.max(scores, dim=1)

                    scores = scores.view(-1, scores.shape[-1])

                    sampled_caption = []

                    loss = decoder.loss(scores, targets, alphas)

                elif args.model_type == 'transformer':

                    trg_input = captions[:, :-1]
                    trg_mask = create_masks(trg_input)
                    scores = model(images, trg_input, trg_mask)
                    scores = scores.view(-1, scores.shape[-1])
                    targets = captions[:, 1:]

                    _, predicted = torch.max(scores, dim=1)

                    loss = criterion(
                        scores,
                        targets.reshape(targets.shape[0] * targets.shape[1]))

                #display results
                if i % args.log_step == 0:
                    print(
                        'Epoch [{}/{}], Step [{}/{}], Validation Loss: {:.4f}, Validation Perplexity: {:5.4f}'
                        .format(epoch, args.num_epochs, i, total_step_val,
                                loss.item(), np.exp(loss.item())))

                # append results to loss lists and writer
                loss_val.append(loss.item())
                loss_val_iter.append(loss.item())

                writer.add_scalar('Loss/validation/iterations', loss.item(),
                                  i + 1)

        np.save(
            os.path.join(args.predict_json, 'loss_val_' + args.mode + '.npy'),
            loss_val)

        print(
            'Epoch [{}/{}], Step [{}/{}], Validation Loss: {:.4f}, Validation Perplexity: {:5.4f}'
            .format(epoch, args.num_epochs, i, total_step_val, loss.item(),
                    np.exp(loss.item())))

        # results: epoch validation loss

        loss_val_epoch.append(np.mean(loss_val_iter))
        writer.add_scalar('Loss/validation/epoch', np.mean(loss_val_epoch),
                          epoch + 1)

        #predict captions:
        filenames = os.listdir(args.image_dir_val)

        predicted = {}

        for file in tqdm(filenames):
            if file == '.DS_Store':
                continue
            # Prepare an image
            image = load_image(os.path.join(args.image_dir_val, file),
                               transform)
            image_tensor = image.to(device)

            # Generate caption starting with <start> word

            # procedure is different for each model type
            if args.model_type == 'attention':

                features = encoder(image_tensor)
                sampled_ids, _ = decoder.sample(features)
                sampled_ids = sampled_ids[0].cpu().numpy()
                #start sampled_caption with <start>
                sampled_caption = ['<start>']

            elif args.model_type == 'no_attention':
                features = encoder(image_tensor)
                sampled_ids = decoder.sample(features)
                sampled_ids = sampled_ids[0].cpu().numpy()
                sampled_caption = ['<start>']

            elif args.model_type == 'transformer':

                e_outputs = model.encoder(image_tensor)
                max_seq_length = 20
                sampled_ids = torch.zeros(max_seq_length, dtype=torch.long)
                sampled_ids[0] = torch.LongTensor([[vocab.word2idx['<start>']]
                                                   ]).to(device)

                for i in range(1, max_seq_length):

                    trg_mask = np.triu(np.ones((1, i, i)), k=1).astype('uint8')
                    trg_mask = Variable(
                        torch.from_numpy(trg_mask) == 0).to(device)

                    out = model.decoder(sampled_ids[:i].unsqueeze(0),
                                        e_outputs, trg_mask)

                    out = model.out(out)
                    out = F.softmax(out, dim=-1)
                    val, ix = out[:, -1].data.topk(1)
                    sampled_ids[i] = ix[0][0]

                sampled_ids = sampled_ids.cpu().numpy()
                sampled_caption = []

            # Convert word_ids to words
            for word_id in sampled_ids:
                word = vocab.idx2word[word_id]
                sampled_caption.append(word)
                # break at <end> of the sentence
                if word == '<end>':
                    break
            sentence = ' '.join(sampled_caption)

            predicted[file] = sentence

        # save predictions to json file:
        json.dump(
            predicted,
            open(
                os.path.join(
                    args.predict_json,
                    'predicted_' + args.mode + '_' + str(epoch) + '.json'),
                'w'))

        #validate model
        with open(args.caption_path_val, 'r') as file:
            captions = json.load(file)

        res = {}
        for r in predicted:
            res[r] = [predicted[r].strip('<start> ').strip(' <end>')]

        images = captions['images']
        caps = captions['annotations']
        gts = {}
        for image in images:
            image_id = image['id']
            file_name = image['file_name']
            list_cap = []
            for cap in caps:
                if cap['image_id'] == image_id:
                    list_cap.append(cap['caption'])
            gts[file_name] = list_cap

        #calculate BLUE, CIDER and ROUGE metrics from real and resulting captions
        bleu_res = bleu(gts, res)
        cider_res = cider(gts, res)
        rouge_res = rouge(gts, res)

        # append resuls to result lists
        bleu_res_list.append(bleu_res)
        cider_res_list.append(cider_res)
        rouge_res_list.append(rouge_res)

        # write results to writer
        writer.add_scalar('BLEU1/validation/epoch', bleu_res[0], epoch + 1)
        writer.add_scalar('BLEU2/validation/epoch', bleu_res[1], epoch + 1)
        writer.add_scalar('BLEU3/validation/epoch', bleu_res[2], epoch + 1)
        writer.add_scalar('BLEU4/validation/epoch', bleu_res[3], epoch + 1)
        writer.add_scalar('CIDEr/validation/epoch', cider_res, epoch + 1)
        writer.add_scalar('ROUGE/validation/epoch', rouge_res, epoch + 1)

    results['bleu'] = bleu_res_list
    results['cider'] = cider_res_list
    results['rouge'] = rouge_res_list

    json.dump(
        results,
        open(os.path.join(args.predict_json, 'results_' + args.mode + '.json'),
             'w'))
    np.save(
        os.path.join(args.predict_json, 'loss_train_' + args.mode + '.npy'),
        loss_train)
    np.save(os.path.join(args.predict_json, 'loss_val_' + args.mode + '.npy'),
            loss_val)
コード例 #21
0
ファイル: main.py プロジェクト: yugenlgy/MultiRD
def main(epoch_num, batch_size, verbose, UNSEEN, SEEN, MODE):
    [
        hownet_file, sememe_file, word_index_file, word_vector_file,
        dictionary_file, word_cilinClass_file
    ] = [
        'hownet.json', 'sememe.json', 'word_index.json', 'word_vector.npy',
        'dictionary_sense.json', 'word_cilinClass.json'
    ]
    word2index, index2word, word2vec, sememe_num, label_size, label_size_chara, word_defi_idx_all = load_data(
        hownet_file, sememe_file, word_index_file, word_vector_file,
        dictionary_file, word_cilinClass_file)
    (word_defi_idx_TrainDev, word_defi_idx_seen, word_defi_idx_test2000,
     word_defi_idx_test200, word_defi_idx_test272) = word_defi_idx_all
    index2word = np.array(index2word)
    length = len(word_defi_idx_TrainDev)
    valid_dataset = MyDataset(word_defi_idx_TrainDev[int(0.9 * length):])
    test_dataset = MyDataset(word_defi_idx_test2000 + word_defi_idx_test200 +
                             word_defi_idx_test272)
    if SEEN:
        mode = 'S_' + MODE
        print('*METHOD: Seen defi.')
        print('*TRAIN: [Train + allSeen(2000+200+272)]')
        print('*TEST: [2000rand1 + 200desc + 272desc]')
        train_dataset = MyDataset(word_defi_idx_TrainDev[:int(0.9 * length)] +
                                  word_defi_idx_seen)
    elif UNSEEN:
        mode = 'U_' + MODE
        print('*METHOD: Unseen All words and defi.')
        print('*TRAIN: [Train]')
        print('*TEST: [2000rand1 + 200desc + 272desc]')
        train_dataset = MyDataset(word_defi_idx_TrainDev[:int(0.9 * length)])
    print('*MODE: [%s]' % mode)

    train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   collate_fn=my_collate_fn)
    valid_dataloader = torch.utils.data.DataLoader(valid_dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   collate_fn=my_collate_fn)
    test_dataloader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        collate_fn=my_collate_fn_test)

    print('Train dataset: ', len(train_dataset))
    print('Valid dataset: ', len(valid_dataset))
    print('Test dataset: ', len(test_dataset))
    word_defi_idx = word_defi_idx_TrainDev + word_defi_idx_seen

    wd2sem = word2sememe(word_defi_idx, len(word2index), sememe_num)
    wd_sems = label_multihot(wd2sem, sememe_num)
    wd_sems = torch.from_numpy(np.array(wd_sems[:label_size])).to(device)
    wd_POSs = label_multihot(word2POS(word_defi_idx, len(word2index), 13), 13)
    wd_POSs = torch.from_numpy(np.array(wd_POSs[:label_size])).to(device)
    wd_charas = label_multihot(
        word2chara(word_defi_idx, len(word2index), label_size_chara),
        label_size_chara)
    wd_charas = torch.from_numpy(np.array(wd_charas[:label_size])).to(device)
    wd2Cilin1 = word2Cn(word_defi_idx, len(word2index), 'C1', 13)
    wd_C1 = label_multihot(wd2Cilin1, 13)  #13 96 1426 4098
    wd_C1 = torch.from_numpy(np.array(wd_C1[:label_size])).to(device)
    wd_C2 = label_multihot(word2Cn(word_defi_idx, len(word2index), 'C2', 96),
                           96)
    wd_C2 = torch.from_numpy(np.array(wd_C2[:label_size])).to(device)
    wd_C3 = label_multihot(word2Cn(word_defi_idx, len(word2index), 'C3', 1426),
                           1426)
    wd_C3 = torch.from_numpy(np.array(wd_C3[:label_size])).to(device)
    wd_C4 = label_multihot(word2Cn(word_defi_idx, len(word2index), 'C4', 4098),
                           4098)
    wd_C4 = torch.from_numpy(np.array(wd_C4[:label_size])).to(device)
    '''wd2Cilin = word2Cn(word_defi_idx, len(word2index), 'C', 5633)
    wd_C0 = label_multihot(wd2Cilin, 5633) 
    wd_C0 = torch.from_numpy(np.array(wd_C0[:label_size])).to(device)
    wd_C = [wd_C1, wd_C2, wd_C3, wd_C4, wd_C0]
    '''
    wd_C = [wd_C1, wd_C2, wd_C3, wd_C4]
    #----------mask of no sememes
    print('calculating mask of no sememes...')
    mask_s = torch.zeros(label_size, dtype=torch.float32, device=device)
    for i in range(label_size):
        sems = set(wd2sem[i].detach().cpu().numpy().tolist()) - set(
            [sememe_num])
        if len(sems) == 0:
            mask_s[i] = 1

    mask_c = torch.zeros(label_size, dtype=torch.float32, device=device)
    for i in range(label_size):
        cc = set(wd2Cilin1[i].detach().cpu().numpy().tolist()) - set([13])
        if len(cc) == 0:
            mask_c[i] = 1

    model = Encoder(vocab_size=len(word2index),
                    embed_dim=word2vec.shape[1],
                    hidden_dim=200,
                    layers=1,
                    class_num=label_size,
                    sememe_num=sememe_num,
                    chara_num=label_size_chara)
    model.embedding.weight.data = torch.from_numpy(word2vec)
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Adam
    best_valid_accu = 0
    DEF_UPDATE = True
    for epoch in range(epoch_num):
        print('epoch: ', epoch)
        model.train()
        train_loss = 0
        label_list = list()
        pred_list = list()
        for words_t, sememes_t, definition_words_t, POS_t, sememes, POSs, charas_t, C, C_t in tqdm(
                train_dataloader, disable=verbose):
            optimizer.zero_grad()
            loss, _, indices = model('train',
                                     x=definition_words_t,
                                     w=words_t,
                                     ws=wd_sems,
                                     wP=wd_POSs,
                                     wc=wd_charas,
                                     wC=wd_C,
                                     msk_s=mask_s,
                                     msk_c=mask_c,
                                     mode=MODE)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
            optimizer.step()
            predicted = indices[:, :100].detach().cpu().numpy().tolist()
            train_loss += loss.item()
            label_list.extend(words_t.detach().cpu().numpy())
            pred_list.extend(predicted)
        train_accu_1, train_accu_10, train_accu_100 = evaluate(
            label_list, pred_list)
        del label_list
        del pred_list
        gc.collect()
        print('train_loss: ', train_loss / len(train_dataset))
        print('train_accu(1/10/100): %.2f %.2F %.2f' %
              (train_accu_1, train_accu_10, train_accu_100))
        model.eval()
        with torch.no_grad():
            valid_loss = 0
            label_list = []
            pred_list = []
            for words_t, sememes_t, definition_words_t, POS_t, sememes, POSs, charas_t, C, C_t in tqdm(
                    valid_dataloader, disable=verbose):
                loss, _, indices = model('train',
                                         x=definition_words_t,
                                         w=words_t,
                                         ws=wd_sems,
                                         wP=wd_POSs,
                                         wc=wd_charas,
                                         wC=wd_C,
                                         msk_s=mask_s,
                                         msk_c=mask_c,
                                         mode=MODE)
                predicted = indices[:, :100].detach().cpu().numpy().tolist()
                valid_loss += loss.item()
                label_list.extend(words_t.detach().cpu().numpy())
                pred_list.extend(predicted)
            valid_accu_1, valid_accu_10, valid_accu_100 = evaluate(
                label_list, pred_list)
            print('valid_loss: ', valid_loss / len(valid_dataset))
            print('valid_accu(1/10/100): %.2f %.2F %.2f' %
                  (valid_accu_1, valid_accu_10, valid_accu_100))
            del label_list
            del pred_list
            gc.collect()

            if valid_accu_10 > best_valid_accu:
                best_valid_accu = valid_accu_10
                print('-----best_valid_accu-----')
                #torch.save(model, 'saved.model')
                label_list = []
                pred_list = []
                for words_t, definition_words_t in tqdm(test_dataloader,
                                                        disable=verbose):
                    indices = model('test',
                                    x=definition_words_t,
                                    w=words_t,
                                    ws=wd_sems,
                                    wP=wd_POSs,
                                    wc=wd_charas,
                                    wC=wd_C,
                                    msk_s=mask_s,
                                    msk_c=mask_c,
                                    mode=MODE)
                    predicted = indices[:, :1000].detach().cpu().numpy(
                    ).tolist()
                    label_list.extend(words_t.detach().cpu().numpy())
                    pred_list.extend(predicted)
                test_accu_1, test_accu_10, test_accu_100, median, variance = evaluate_test(
                    label_list, pred_list)
                print('test_accu(1/10/100): %.2f %.2F %.2f %.1f %.2f' %
                      (test_accu_1, test_accu_10, test_accu_100, median,
                       variance))
                if epoch > 10:
                    json.dump((index2word[label_list]).tolist(),
                              open(mode + '_label_list.json', 'w'))
                    json.dump((index2word[np.array(pred_list)]).tolist(),
                              open(mode + '_pred_list.json', 'w'))
                del label_list
                del pred_list
                gc.collect()
コード例 #22
0
ファイル: train.py プロジェクト: v-juma1/textent
def train(description_db, entity_db, word_vocab, entity_vocab,
          target_entity_vocab, out_file, embeddings, dim_size, batch_size,
          negative, epoch, optimizer, max_text_len, max_entity_len, pool_size,
          seed, save, **model_params):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    word_matrix = np.random.uniform(low=-0.05,
                                    high=0.05,
                                    size=(word_vocab.size, dim_size))
    word_matrix = np.vstack([np.zeros(dim_size),
                             word_matrix]).astype('float32')

    entity_matrix = np.random.uniform(low=-0.05,
                                      high=0.05,
                                      size=(entity_vocab.size, dim_size))
    entity_matrix = np.vstack([np.zeros(dim_size),
                               entity_matrix]).astype('float32')

    target_entity_matrix = np.random.uniform(low=-0.05,
                                             high=0.05,
                                             size=(target_entity_vocab.size,
                                                   dim_size))
    target_entity_matrix = np.vstack(
        [np.zeros(dim_size), target_entity_matrix]).astype('float32')

    for embedding in embeddings:
        for word in word_vocab:
            vec = embedding.get_word_vector(word)
            if vec is not None:
                word_matrix[word_vocab.get_index(word)] = vec

        for title in entity_vocab:
            vec = embedding.get_entity_vector(title)
            if vec is not None:
                entity_matrix[entity_vocab.get_index(title)] = vec

        for title in target_entity_vocab:
            vec = embedding.get_entity_vector(title)
            if vec is not None:
                target_entity_matrix[target_entity_vocab.get_index(
                    title)] = vec

    entity_negatives = np.arange(1, target_entity_matrix.shape[0])

    model_params.update(dict(dim_size=dim_size))
    model = Encoder(word_embedding=word_matrix,
                    entity_embedding=entity_matrix,
                    target_entity_embedding=target_entity_matrix,
                    word_vocab=word_vocab,
                    entity_vocab=entity_vocab,
                    target_entity_vocab=target_entity_vocab,
                    **model_params)

    del word_matrix
    del entity_matrix
    del target_entity_matrix

    model = model.cuda()

    model.train()
    parameters = [p for p in model.parameters() if p.requires_grad]
    optimizer_ins = getattr(optim, optimizer)(parameters)

    n_correct = 0
    n_total = 0
    cur_correct = 0
    cur_total = 0
    cur_loss = 0.0

    batch_idx = 0

    joblib.dump(
        dict(model_params=model_params,
             word_vocab=word_vocab.serialize(),
             entity_vocab=entity_vocab.serialize(),
             target_entity_vocab=target_entity_vocab.serialize()),
        out_file + '.pkl')

    if not save or 0 in save:
        state_dict = model.state_dict()
        torch.save(state_dict, out_file + '_epoch0.bin')

    for n_epoch in range(1, epoch + 1):
        logger.info('Epoch: %d', n_epoch)

        for (batch_idx, (args, target)) in enumerate(
                generate_data(description_db, word_vocab, entity_vocab,
                              target_entity_vocab, entity_negatives,
                              batch_size, negative, max_text_len,
                              max_entity_len, pool_size), batch_idx):
            args = tuple([o.cuda(async=True) for o in args])
            target = target.cuda()

            optimizer_ins.zero_grad()
            output = model(args)
            loss = F.cross_entropy(output, target)
            loss.backward()

            optimizer_ins.step()

            cur_correct += (torch.max(output, 1)[1].view(
                target.size()).data == target.data).sum()
            cur_total += len(target)
            cur_loss += loss.data
            if batch_idx != 0 and batch_idx % 1000 == 0:
                n_correct += cur_correct
                n_total += cur_total
                logger.info(
                    'Processed %d batches (epoch: %d, loss: %.4f acc: %.4f total acc: %.4f)'
                    % (batch_idx, n_epoch, cur_loss[0] / cur_total, 100. *
                       cur_correct / cur_total, 100. * n_correct / n_total))
                cur_correct = 0
                cur_total = 0
                cur_loss = 0.0
コード例 #23
0
    iterations = 0
    encoder = Encoder(encoder_weights=args.encoder_weights)
    decoder = Decoder(args.hidden_size, args.embed_size, args.attention_size,
                      args.dropout)
    encoder = encoder.to('cuda')
    decoder = decoder.to('cuda')

    snapshot = args.snapshot
    test_model = args.test_model
    train_from_scratch = args.train_from_scratch
    swa_params = eval(args.swa_params)
    finetune_encoder = args.finetune_encoder

    if not test_model:
        if finetune_encoder:
            encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                                 lr=args.encoder_lr)
        decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                             lr=args.decoder_lr)
    else:
        print "Testing the model"

    checkpoint = None
    if snapshot:
        checkpoint = torch.load(snapshot,
                                map_location=lambda storage, loc: storage)
        if (train_from_scratch and 'decoder_swa_state_dict' in checkpoint) or (
                test_model and 'decoder_swa_state_dict' in checkpoint):
            print "Inputting the swa weights."
            decoder.load_state_dict(
                convert_weights(checkpoint['decoder_swa_state_dict']))
コード例 #24
0
    from data import language_DataLoader
    train_dataloader = language_DataLoader((fr_source, en_source),
                                           (fr_config, en_config),
                                           opt,
                                           train=True)
    test_dataloader = language_DataLoader((fr_source, en_source),
                                          (fr_config, en_config),
                                          opt,
                                          train=False)

    encoder = Encoder(fr_config,
                      embedding_dimension=opt.embedding_dim,
                      hidden_size=opt.rnn_hidden,
                      num_layer=opt.num_layers)
    optimizer1 = torch.optim.Adam(encoder.parameters(), lr=opt.lr)
    decoder = BahdanauAttnDecoderRNN(opt.rnn_hidden,
                                     opt.embedding_dim,
                                     len(en_config.word2ix),
                                     n_layers=2,
                                     dropout_p=0.1)
    # decoder =
    optimizer2 = torch.optim.Adam(decoder.parameters(), lr=opt.lr)
    if opt.save_path:
        encoder.load_state_dict(torch.load(opt.save_path + 'encoder.pth'))
        decoder.load_state_dict(torch.load(opt.save_path + 'decoder.pth'))
        print('load update model')
    encoder.to(device)
    decoder.to(device)
    loss_meter = AverageValueMeter()
    '''
コード例 #25
0
                            transform=data_transform)
s_testset = datasets.MNIST('tmp', train=False, transform=data_transform)
s_trainloader = DataLoader(s_trainset, batch_size=batch_size, shuffle=True)
s_testloader = DataLoader(s_testset, batch_size=batch_size, shuffle=True)
t_trainset, t_testset = load_usps(data_per_class)  #transformの指定は禁止
t_trainloader = DataLoader(t_trainset, batch_size=batch_size, shuffle=True)
t_testloader = DataLoader(t_testset, batch_size=64, shuffle=True)

net_g = Encoder()
net_h = classifier()
net_DCD = DCD()
loss_func = torch.nn.CrossEntropyLoss()  #損失関数は共通

#ソースにおいてgとhを訓練
print("part 1 : initial training for g and h")
optimizer = torch.optim.Adam(list(net_g.parameters()) +
                             list(net_h.parameters()),
                             lr=0.001)  #optimizerが両者を更新
net_g = net_g.to(device)
net_h = net_h.to(device)
net_DCD = net_DCD.to(device)
if not device == "cpu":
    net_g = nn.DataParallel(net_g)
    net_h = nn.DataParallel(net_h)
    net_DCD = nn.DataParallel(net_DCD)

for epoch in range(num_ep_init_gh):
    for data, label in s_trainloader:
        data, label = data.to(device), label.to(device)
        optimizer.zero_grad()
        pred = net_h(net_g(data))
コード例 #26
0
ファイル: main_train.py プロジェクト: yqGANs/GroupDNet
def train(save_path, checkpoint, data_root, batch_size, dataset):
    device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
    transform = transforms.Compose(
        [transforms.Resize((128, 128)),
         transforms.ToTensor()])
    target_transform = transforms.Compose(
        [transforms.Resize((128, 128)),
         ToTensor()])
    if dataset == 'cityscapes':
        train_data = Cityscapes(str(data_root),
                                split='train',
                                mode='fine',
                                target_type='semantic',
                                transform=transform,
                                target_transform=transform)
        eG = 35
        dG = [35, 35, 20, 14, 10, 4, 1]
        eC = 8
        dC = 280
        n_classes = len(Cityscapes.classes)
        update_lr = update_lr_default
        epoch = 200
    else:
        train_data = Deepfashion(str(data_root),
                                 split='train',
                                 transform=transform,
                                 target_transform=transform)
        n_classes = len(Deepfashion.eclasses)
        eG = 8
        eC = 64
        dG = [8, 8, 4, 4, 2, 2, 1]
        dC = 160
        update_lr = update_lr_deepfashion
        epoch = 100
    data_loader = torch.utils.data.DataLoader(train_data,
                                              batch_size=batch_size,
                                              num_workers=1)

    os.makedirs(save_path, exist_ok=True)

    n_channels = 3
    encoder = Encoder(n_classes * n_channels, C=eC, G=eG)
    decoder = Decoder(8 * eG, n_channels, n_classes, C=dC, Gs=dG)
    discriminator = Discriminator(n_classes + n_channels)
    vgg = Vgg19().eval()

    encoder = torch.nn.DataParallel(encoder)
    decoder = torch.nn.DataParallel(decoder)
    discriminator = torch.nn.DataParallel(discriminator)
    vgg = torch.nn.DataParallel(vgg)

    gen_opt = optim.Adam(list(encoder.parameters()) +
                         list(decoder.parameters()),
                         lr=0.0001,
                         betas=(0, 0.9))
    dis_opt = optim.Adam(discriminator.parameters(), lr=0.0004, betas=(0, 0.9))
    gen_scheduler = optim.lr_scheduler.LambdaLR(gen_opt, update_lr)
    dis_scheduler = optim.lr_scheduler.LambdaLR(gen_opt, update_lr)
    params = [
        'encoder', 'decoder', 'discriminator', 'gen_opt', 'dis_opt',
        'gen_scheduler', 'dis_scheduler'
    ]

    if os.path.exists(checkpoint):
        cp = torch.load(checkpoint)
        print(f'Load checkpoint: {checkpoint}')
        for param in params:
            eval(param).load_state_dict(cp[param])
        # encoder.load_state_dict(cp['encoder'])
        # decoder.load_state_dict(cp['decoder'])
        # discriminator.load_state_dict(cp['discriminator'])
        # gen_opt.load_state_dict(cp['gen_opt'])
        # dis_opt.load_state_dict(cp['dis_opt'])
        # gen_scheduler.load_state_dict(cp['gen_scheduler'])
        # dis_scheduler.load_state_dict(cp['dis_scheduler'])

    def to_device_optimizer(opt):
        for state in opt.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.to(device)

    to_device_optimizer(gen_opt)
    to_device_optimizer(dis_opt)

    encoder = encoder.to(device)
    decoder = decoder.to(device)
    discriminator = discriminator.to(device)
    vgg = vgg.to(device)
    print(len(data_loader))
    for epoch in range(epoch):
        e_g_loss = []
        e_d_loss = []
        for i, batch in tqdm(enumerate(data_loader)):
            x, sem = batch
            x = x.to(device)
            sem = sem.to(device)
            sem = sem * 255.0
            sem = sem.long()
            s = split_class(x, sem, n_classes)
            sem_target = sem.clone()
            del sem
            sem = torch.zeros(x.size()[0],
                              n_classes,
                              sem_target.size()[2],
                              sem_target.size()[3],
                              device=x.device)
            sem.scatter_(1, sem_target, 1)
            s = s.detach()
            s = s.to(device)
            mu, sigma = encoder(s)
            z = mu + torch.exp(0.5 * sigma) * torch.rand(mu.size(),
                                                         device=mu.device)
            gen = decoder(z, sem)
            d_fake = discriminator(gen, sem)
            d_real = discriminator(x, sem)
            l1loss = nn.L1Loss()
            gen_opt.zero_grad()
            loss_gen = 0.5 * d_fake[0][-1].mean() + 0.5 * d_fake[1][-1].mean()
            loss_fm = sum([
                sum([l1loss(f, g) for f, g in zip(fs, rs)])
                for fs, rs in zip(d_fake, d_real)
            ]).mean()

            f_fake = vgg(gen)
            f_real = vgg(x)
            # loss_p = 1.0 / 32 * l1loss(f_fake.relu1_2, f_real.relu1_2) + \
            #     1.0 / 16 * l1loss(f_fake.relu2_2, f_real.relu2_2) + \
            #     1.0 / 8 * l1loss(f_fake.relu3_3, f_real.relu3_3) + \
            #     1.0 / 4 * l1loss(f_fake.relu4_3, f_real.relu4_3) + \
            #     l1loss(f_fake.relu5_3, f_real.relu5_3)
            loss_p = 1.0 / 32 * l1loss(f_fake[0], f_real[0]) + \
                1.0 / 16 * l1loss(f_fake[1], f_real[1]) + \
                1.0 / 8 * l1loss(f_fake[2], f_real[2]) + \
                1.0 / 4 * l1loss(f_fake[3], f_real[3]) + \
                l1loss(f_fake[4], f_real[4])
            loss_kl = -0.5 * torch.sum(1 + sigma - mu * mu - torch.exp(sigma))
            loss = loss_gen + 10.0 * loss_fm + 10.0 * loss_p + 0.05 * loss_kl
            loss.backward(retain_graph=True)
            gen_opt.step()

            dis_opt.zero_grad()
            loss_dis = torch.mean(-torch.mean(torch.min(d_real[0][-1] - 1, torch.zeros_like(d_real[0][-1]))) +
                                  -torch.mean(torch.min(-d_fake[0][-1] - 1, torch.zeros_like(d_fake[0][-1])))) + \
                                  torch.mean(-torch.mean(torch.min(d_real[1][-1] - 1, torch.zeros_like(d_real[1][-1]))) +
                                  -torch.mean(torch.min(-d_fake[1][-1] - 1, torch.zeros_like(d_fake[1][-1]))))
            loss_dis.backward()
            dis_opt.step()

            e_g_loss.append(loss.item())
            e_d_loss.append(loss_dis.item())
            #plt.imshow((gen.detach().cpu().numpy()[0]).transpose(1, 2, 0))
            #plt.pause(.01)
            #print(i, 'g_loss', e_g_loss[-1], 'd_loss', e_d_loss[-1])
            os.makedirs(save_path / str(epoch), exist_ok=True)

            Image.fromarray((gen.detach().cpu().numpy()[0].transpose(1, 2, 0) *
                             255.0).astype(np.uint8)).save(
                                 save_path / str(epoch) / f'{i}.png')
        print('g_loss', np.mean(e_g_loss), 'd_loss', np.mean(e_d_loss))

        # save
        cp = {}
        for param in params:
            cp[param] = eval(param).state_dict()
        torch.save(cp, save_path / 'latest.pth'
                   )  #{param:eval(param).state_dict() for param in params})
コード例 #27
0
ファイル: train.py プロジェクト: shawnxiaow1118/Image_Caption
dataloader = get_loader("../data/resized/",
                        "../data/annotations/captions_train2014.json",
                        vocab,
                        trans,
                        128,
                        shuffle=True)

encoder = Encoder(256)
decoder = Decoder(256, 512, len(vocab), 1)

if torch.cuda.is_available():
    encoder.cuda()
    decoder.cuda()

    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(encoder.parameters()) + list(
        encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=0.001)

    total_step = len(dataloader)
    for epoch in range(5):
        for i, (images, captions, lengths) in enumerate(dataloader):
            images = to_var(images, volatile=True)
            captions = to_var(captions)
            targets = pack_padded_sequence(captions, lengths,
                                           batch_first=True)[0]
            decoder.zero_grad()
            encoder.zero_grad()
            features = encoder(images)

            outputs = decoder(features, captions, lengths)
コード例 #28
0
def train():
    # 1.数据集整理
    data = json.load(open(Config.train_data_path, 'r'))

    input_data = data['input_data']
    input_len = data['input_len']
    output_data = data['output_data']
    mask_data = data['mask']
    output_len = data['output_len']

    total_len = len(input_data)
    step = total_len // Config.batch_size

    # 词嵌入部分
    embedding = nn.Embedding(Config.vocab_size,
                             Config.hidden_size,
                             padding_idx=Config.PAD)

    # 2. 模型准备
    encoder = Encoder(embedding)
    attn_model = 'dot'
    decoder = Decoder(
        attn_model,
        embedding,
    )

    encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                         lr=Config.learning_rate)
    decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                         lr=Config.learning_rate)

    for epoch in range(Config.num_epochs):
        for i in range(step - 1):
            start_time = time.time()
            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()

            input_ids = torch.LongTensor(
                input_data[i * Config.batch_size:(i + 1) *
                           Config.batch_size]).to(Config.device)
            inp_len = torch.LongTensor(
                input_len[i * Config.batch_size:(i + 1) *
                          Config.batch_size]).to(Config.device)
            output_ids = torch.LongTensor(
                output_data[i * Config.batch_size:(i + 1) *
                            Config.batch_size]).to(Config.device)
            mask = torch.BoolTensor(mask_data[i * Config.batch_size:(i + 1) *
                                              Config.batch_size]).to(
                                                  Config.device)
            out_len = output_len[i * Config.batch_size:(i + 1) *
                                 Config.batch_size]

            max_ans_len = max(out_len)

            mask = mask.permute(1, 0)
            output_ids = output_ids.permute(1, 0)
            encoder_outputs, hidden = encoder(input_ids, inp_len)
            encoder_outputs = encoder_outputs.permute(1, 0, 2)
            decoder_hidden = hidden.unsqueeze(0)

            # 创建解码的初始输入 (为一个batch中的每条数创建SOS)
            decoder_input = torch.LongTensor(
                [[Config.SOS for _ in range(Config.batch_size)]])
            decoder_input = decoder_input.to(Config.device)

            # Determine if we are using teacher forcing this iteration
            teacher_forcing_ratio = 0.3
            use_teacher_forcing = True if random.random(
            ) < teacher_forcing_ratio else False

            loss = 0
            print_losses = []
            n_totals = 0
            if use_teacher_forcing:
                # 这种是解码的每步我们输入上一步的真实标签
                for t in range(max_ans_len):
                    decoder_output, decoder_hidden = decoder(
                        decoder_input, decoder_hidden, encoder_outputs)
                    # print(decoder_output.size())  # torch.Size([2, 2672])
                    # print(decoder_hidden.size())   # torch.Size([1, 2, 512])

                    decoder_input = output_ids[t].view(1, -1)
                    # 计算损失
                    mask_loss, nTotal = maskNLLLoss(decoder_output,
                                                    output_ids[t], mask[t])
                    # print('1', mask_loss)
                    loss += mask_loss
                    print_losses.append(mask_loss.item() * nTotal)
                    n_totals += nTotal
            else:
                # 这种是解码的每步输入是上一步的预测结果
                for t in range(max_ans_len):
                    decoder_output, decoder_hidden = decoder(
                        decoder_input, decoder_hidden, encoder_outputs)

                    _, topi = decoder_output.topk(1)
                    decoder_input = torch.LongTensor(
                        [[topi[i][0] for i in range(Config.batch_size)]])
                    decoder_input = decoder_input.to(Config.device)
                    # Calculate and accumulate loss
                    mask_loss, nTotal = maskNLLLoss(decoder_output,
                                                    output_ids[t], mask[t])
                    # print('2', mask_loss)
                    loss += mask_loss
                    print_losses.append(mask_loss.item() * nTotal)
                    n_totals += nTotal

            # Perform backpropatation
            loss.backward()

            # 梯度裁剪
            _ = nn.utils.clip_grad_norm_(encoder.parameters(), Config.clip)
            _ = nn.utils.clip_grad_norm_(decoder.parameters(), Config.clip)

            # Adjust model weights
            encoder_optimizer.step()
            decoder_optimizer.step()
            avg_loss = sum(print_losses) / n_totals

            time_str = datetime.datetime.now().isoformat()
            log_str = 'time:{}, epoch:{}, step:{}, loss:{:5f}, spend_time:{:6f}'.format(
                time_str, epoch, i, avg_loss,
                time.time() - start_time)
            rainbow(log_str)

        if epoch % 1 == 0:
            save_path = './save_model/'
            if not os.path.exists(save_path):
                os.makedirs(save_path)

            torch.save(
                {
                    'epoch': epoch,
                    'encoder': encoder.state_dict(),
                    'decoder': decoder.state_dict(),
                    'en_opt': encoder_optimizer.state_dict(),
                    'de_opt': decoder_optimizer.state_dict(),
                    'loss': avg_loss,
                    'embedding': embedding.state_dict()
                },
                os.path.join(
                    save_path,
                    'epoch{}_{}_model.tar'.format(epoch, 'checkpoint')))
コード例 #29
0
class Model:
    def __init__(self, seq_len=20, learning_rate=3e-4):
        device = torch.device(
            "cuda: 0" if torch.cuda.is_available() else "cpu")
        self.device = device
        self.seq_len = seq_len
        time_stamp = time.strftime("%m-%d-%Y_%H:%M:%S", time.localtime())
        print("run on device", device, ",current time:", time_stamp)
        self.writer = SummaryWriter('runs/emb_graph' + time_stamp)

        # define layers
        self.categ_embedding = CategoricalEmbedding().to(device)
        self.r2s_embedding = Route2Stop(vertex_feature=105,
                                        edge_feature=112).to(device)
        self.encoder = Encoder(input_size=100, seq_len=seq_len).to(device)
        self.fcn = FCN(input_size=100).to(device)
        self.similarity = Similarity(input_size=30, device=device).to(device)

        # define training parameters
        self.criterion = nn.BCELoss()
        self.optimizer = optim.Adam(
            [{
                'params': self.categ_embedding.parameters()
            }, {
                'params': self.r2s_embedding.parameters()
            }, {
                'params': self.encoder.parameters()
            }, {
                'params': self.fcn.parameters()
            }, {
                'params': self.similarity.parameters()
            }],
            lr=learning_rate)

    def forward(self, old, real, fake, numer_list, categ_list):

        old = self.categ_embedding(old, numer_list, categ_list, self.device)
        real = self.categ_embedding(real, numer_list, categ_list, self.device)
        fake = self.categ_embedding(fake, numer_list, categ_list, self.device)

        old = self.r2s_embedding(old)
        real = self.r2s_embedding(real)
        fake = self.r2s_embedding(fake)

        old = self.encoder(old)
        real = self.fcn(real)
        fake = self.fcn(fake)

        score_real = self.similarity(old, real)
        score_fake = self.similarity(old, fake)
        return score_real, score_fake

    def metrics(self, score_real, score_fake, label_real_test,
                label_fake_test):
        y_true = np.concatenate(
            [label_real_test.cpu().numpy(),
             label_fake_test.cpu().numpy()],
            axis=0)
        y_pred = torch.cat([
            torch.argmax(score_real, dim=1, keepdim=True),
            torch.argmax(score_fake, dim=1, keepdim=True)
        ],
                           dim=0).cpu().numpy()
        acc = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred)
        recall = recall_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred)
        return acc, precision, recall, f1

    def train_and_test(self, data, batch_size=64, num_epoch=50):
        #initialize labels before training
        label_real = torch.cat(
            [torch.zeros([batch_size, 1]),
             torch.ones([batch_size, 1])], dim=1).to(self.device)
        label_fake = torch.cat(
            [torch.ones([batch_size, 1]),
             torch.zeros([batch_size, 1])], dim=1).to(self.device)

        old_test, real_test, fake_test = data.test
        test_size = real_test.shape[0]
        label_real_test = torch.ones([test_size,
                                      1]).type(torch.long).to(self.device)
        label_fake_test = torch.zeros([test_size,
                                       1]).type(torch.long).to(self.device)

        for epoch in range(num_epoch):
            total_loss = [0] * len(data)
            total_loss_real = [0] * len(data)
            # training first
            for i, chunk in enumerate(data.train):
                old_chunk, real_chunk, fake_chunk = chunk
                num_batch = real_chunk.shape[0] // batch_size
                for batch in range(num_batch):
                    # get a batch of data pair: (old, real, fake)
                    old_batch = old_chunk.iloc[batch * self.seq_len *
                                               batch_size:(batch + 1) *
                                               self.seq_len * batch_size, :]
                    real_batch = real_chunk.iloc[batch *
                                                 batch_size:(batch + 1) *
                                                 batch_size, :]
                    fake_batch = fake_chunk.iloc[batch *
                                                 batch_size:(batch + 1) *
                                                 batch_size, :]

                    score_real, score_fake = self.forward(
                        old_batch, real_batch, fake_batch, data.numer_list,
                        data.categ_list)

                    loss_real = self.criterion(score_real, label_real)
                    loss_fake = self.criterion(score_fake, label_fake)
                    loss = loss_real + loss_fake

                    total_loss[i] += loss.data
                    total_loss_real[i] += loss_real.data
                    self.optimizer.zero_grad()

                    loss.backward()
                    self.optimizer.step()

                    if (batch + 1) % 100 == 0:
                        print(
                            "epoch: %d, chunk: %d, batch: %d, loss: %.3f, real: %.3f, fake: %.3f"
                            % (epoch, i, batch + 1, loss.data, loss_real.data,
                               loss_fake.data))
                total_loss[i] = (total_loss[i] / batch).cpu().numpy()
                total_loss_real[i] = (total_loss_real[i] / batch).cpu().numpy()

            # testing
            score_real, score_fake = self.forward(old_test, real_test,
                                                  fake_test, data.numer_list,
                                                  data.categ_list)
            acc, precision, recall, f1 = self.metrics(score_real, score_fake,
                                                      label_real_test,
                                                      label_fake_test)
            print("test acc: %.4f" % acc)
            self.writer.add_scalar('testing accuracy', acc, epoch)
            self.writer.close()
            # print result and save loss in tensorboard
            print("epoch: %d, average loss: %.4f" %
                  (epoch, np.mean(total_loss)))
            self.writer.add_scalars('training loss', {
                'overall': np.mean(total_loss),
                'good': np.mean(total_loss_real)
            }, epoch)
            self.writer.close()
            return acc, precision, recall, f1
コード例 #30
0
def main():
    epoch = 1000
    batch_size = 64
    hidden_dim = 300
    use_cuda = True

    encoder = Encoder(num_words, hidden_dim)
    if args.attn:
        attn_model = 'dot'
        decoder = LuongAttnDecoderRNN(attn_model, hidden_dim, num_words)
    else:
        decoder = DecoderRhyme(hidden_dim, num_words, num_target_lengths,
                               num_rhymes)

    if args.train:
        weight = torch.ones(num_words)
        weight[word2idx_mapping[PAD_TOKEN]] = 0
        if use_cuda:
            encoder = encoder.cuda()
            decoder = decoder.cuda()
            weight = weight.cuda()
        encoder_optimizer = Adam(encoder.parameters(), lr=0.001)
        decoder_optimizer = Adam(decoder.parameters(), lr=0.001)
        criterion = nn.CrossEntropyLoss(weight=weight)

        np.random.seed(1124)
        order = np.arange(len(train_data))

        best_loss = 1e10
        best_epoch = 0

        for e in range(epoch):
            #if e - best_epoch > 20: break

            np.random.shuffle(order)
            shuffled_train_data = train_data[order]
            shuffled_x_lengths = input_lengths[order]
            shuffled_y_lengths = target_lengths[order]
            shuffled_y_rhyme = target_rhymes[order]
            train_loss = 0
            valid_loss = 0
            for b in tqdm(range(int(len(order) // batch_size))):
                #print(b, '\r', end='')
                batch_x = torch.LongTensor(
                    shuffled_train_data[b * batch_size:(b + 1) *
                                        batch_size][:, 0].tolist()).t()
                batch_y = torch.LongTensor(
                    shuffled_train_data[b * batch_size:(b + 1) *
                                        batch_size][:, 1].tolist()).t()
                batch_x_lengths = shuffled_x_lengths[b * batch_size:(b + 1) *
                                                     batch_size]
                batch_y_lengths = shuffled_y_lengths[b * batch_size:(b + 1) *
                                                     batch_size]
                batch_y_rhyme = shuffled_y_rhyme[b * batch_size:(b + 1) *
                                                 batch_size]

                if use_cuda:
                    batch_x, batch_y = batch_x.cuda(), batch_y.cuda()

                train_loss += train(batch_x, batch_y, batch_y_lengths,
                                    max(batch_y_lengths), batch_y_rhyme,
                                    encoder, decoder, encoder_optimizer,
                                    decoder_optimizer, criterion, use_cuda,
                                    False)

            train_loss /= b
            '''
            for b in range(len(valid_data) // batch_size):
                batch_x = torch.LongTensor(valid_data[b*batch_size: (b+1)*batch_size][:, 0].tolist()).t()
                batch_y = torch.LongTensor(valid_data[b*batch_size: (b+1)*batch_size][:, 1].tolist()).t()
                if use_cuda:
                    batch_x, batch_y = batch_x.cuda(), batch_y.cuda()

                valid_loss += train(batch_x, batch_y, max_seqlen, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, use_cuda, True)
            valid_loss /= b
            '''
            print(
                "epoch {}, train_loss {:.4f}, valid_loss {:.4f}, best_epoch {}, best_loss {:.4f}"
                .format(e, train_loss, valid_loss, best_epoch, best_loss))
            '''
            if valid_loss < best_loss:
                best_loss = valid_loss
                best_epoch = e
                torch.save(encoder.state_dict(), args.encoder_path + '.best')
                torch.save(decoder.state_dict(), args.decoder_path + '.best')
            '''
            torch.save(encoder.state_dict(), args.encoder_path)
            torch.save(decoder.state_dict(), args.decoder_path)
        print(encoder)
        print(decoder)
        print("==============")

    else:
        encoder.load_state_dict(torch.load(
            args.encoder_path))  #, map_location=torch.device('cpu')))
        decoder.load_state_dict(torch.load(
            args.decoder_path))  #, map_location=torch.device('cpu')))
        print(encoder)
        print(decoder)

    predict(encoder, decoder)