Esempio n. 1
0
def train(opt, th):
    ''' 训练模型
    Args:
        opt -- 参数
        th -- TextConverter对象
    Returns:
        None
    '''
    # 1. 训练数据
    data_set = TextDataset(opt.train_data_path, th)
    train_data = DataLoader(data_set,
                            opt.batch_size,
                            shuffle=True,
                            num_workers=opt.num_workers)
    # 2. 初始化模型
    model = CharRNN(th.vocab_size, opt.embed_size, opt.hidden_size,
                    opt.n_layers, opt.dropout_p, opt.bidir)
    if USE_CUDA:
        model = model.cuda(DEVICE_ID)

    # 3. 优化配置
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=opt.learning_rate)

    # 4. 训练
    for e in range(opt.max_epochs):
        epoch_loss = 0
        hidden = None
        for input_seqs, labels in train_data:
            # 都是[b, seq_len],最后一个不足b
            # 准备input和hidden
            b = input_seqs.shape[0]
            if hidden is not None:
                hidden = hidden[:, :b, :]
            labels = labels.long().view(-1)
            input_seqs, labels = get_variable(input_seqs), get_variable(labels)

            # 前向计算
            probs, hidden = model(input_seqs, hidden)
            probs = probs.view(-1, th.vocab_size)

            # loss和反向
            loss = criterion(probs, labels)
            optimizer.zero_grad()
            loss.backward(retain_graph=True)

            # 优化
            nn.utils.clip_grad_norm(model.parameters(), 5)
            optimizer.step()

            epoch_loss += loss.data[0]
        # 交叉熵
        entropy_loss = epoch_loss / len(train_data)
        perplexity = np.exp(entropy_loss)
        info = "epoch: {}, perp: {:.3f}".format(e + 1, perplexity)
        print(info)
        if perplexity <= opt.min_perplexity or e == opt.max_epochs - 1:
            print("best model")
            torch.save(model, opt.model_path)
            break
Esempio n. 2
0
def main():
    ds = Dataset('imdb')
    params = {
        'batch_size': 67,
        'shuffle': True,
        'num_workers': 8,
        'collate_fn': collate_fn
    }
    epochs = 4
    lr = 0.01
    tbptt_steps = 256
    training_generator = data.DataLoader(ds, **params)
    model = CharRNN(input_size=ds.encoder.get_vocab_size(),
                    embedding_size=8,
                    hidden_size=128,
                    output_size=ds.encoder.get_vocab_size(),
                    no_sentiments=3,
                    dense_size=32,
                    padding_idx=ds.encoder.get_id(PADDING_TOKEN),
                    n_layers=1)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    step_no = 0
    for epoch in range(epochs):
        print('Epoch: ', epoch)
        for x_i, y_i, l_i in training_generator:
            model.reset_intermediate_vars()
            step_no += 1
            print(x_i.size())
            batch_loss = 0
            for step in range(l_i[0] // tbptt_steps +
                              (l_i[0] % tbptt_steps != 0)):
                von = tbptt_steps * step
                bis = min(tbptt_steps * (step + 1), l_i[0])
                out = model(x_i[:, von:bis])
                if step % 25 == 0:
                    print(model.attn[0].detach().numpy(),
                          model.attn[-1].detach().numpy())
                loss = model.loss(out, y_i, l_i, von, bis)
                batch_loss += loss
                optimizer.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(model.parameters(), 1.5)
                for p in model.parameters():
                    p.data.add_(-lr, p.grad.data)
                optimizer.step()

                model.detach_intermediate_vars()
            print('Total loss for this batch: ', batch_loss.item())
            if step_no % 30 == 1:
                gen_sample, sentis = model.generate_text(
                    ds.encoder, 'T', 200, 0.7)
                print_colored_text(gen_sample, sentis, ds.encoder)
                # Print an example with sentiments
                print_colored_text(x_i[-1].data.numpy(),
                                   get_sentiments(model, x_i[-1], 0.7),
                                   ds.encoder)
Esempio n. 3
0
def main():
    inputs, token_to_idx, idx_to_token = load_dataset(file_name=sys.argv[2])

    #coloredlogs.install(level='DEBUG')
    num_layers = 2
    rnn_type = 'lstm'
    dropout = 0.5
    emb_size = 50
    hidden_size = 256
    learning_rate = 0.001
    n_tokens = len(idx_to_token)

    model = CharRNN(num_layers=num_layers,
                    rnn_type=rnn_type,
                    dropout=dropout,
                    n_tokens=n_tokens,
                    emb_size=emb_size,
                    hidden_size=hidden_size,
                    pad_id=token_to_idx[PAD_TOKEN])
    if torch.cuda.is_available():
        model = model.cuda()

    optimiser = optim.Adam(model.parameters(), lr=learning_rate)

    try:
        model, optimiser, epoch, valid_loss_min = load_ckp(
            checkpoint_fpath=sys.argv[1], model=model, optimiser=optimiser)
        generate_sample(model, token_to_idx, idx_to_token, n_tokens=20)
    except KeyboardInterrupt:
        print('Aborted!')
Esempio n. 4
0
def run_training(model: CharRNN, dataset, config: dict, validation: bool,
                 valid_dataset):
    optimizer = torch.optim.Adam(model.parameters(), lr=config['initial_lr'])
    epoch = load_checkpoint(optimizer, model, config['filename'])
    if not epoch:
        epoch = 0
    epoch += 1
    params = {
        'batch_size': config['batch_size'],
        'shuffle': False,
        'num_workers': 0 if os.name == 'nt' else 8
    }
    data_generator = data.DataLoader(dataset, **params)
    while epoch < config['epochs'] + 1:
        model.reset_intermediate_vars()
        for step, (x_i, y_i, l_i) in enumerate(data_generator):
            loss = run_forward_pass_and_get_loss(model, x_i, y_i, l_i)
            # Gradient descent step
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1.5)
            optimizer.step()

            model.detach_intermediate_vars()
            if step % 100 == 0:
                print('Epoch: {} Loss for step {} : {}'.format(
                    epoch, step, round(loss.item(), 4)))
            if step % 1000 == 1:
                gen_sample = model.generate_text(dataset.encoder, 't', 200)
                print_tokens(dataset.encoder.map_ids_to_tokens(gen_sample),
                             config['is_bytes'])
        save_checkpoint(optimizer, model, epoch, config['filename'])
        if validation and epoch % 2:
            bpc = validate(valid_dataset, model)
            print('BPC on validation set: ', bpc)
        if epoch in config['lr_schedule']:
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=config['lr_schedule'][epoch])
        epoch += 1
Esempio n. 5
0
def main():

    logging.root.setLevel(logging.NOTSET)

    inputs, token_to_idx, idx_to_token = load_dataset(file_name=sys.argv[2])

    #coloredlogs.install(level='DEBUG')
    num_layers = 2
    rnn_type = 'lstm'
    dropout = 0.5
    emb_size = 50
    hidden_size = 256
    learning_rate = 0.001
    n_tokens = len(idx_to_token)

    model = CharRNN(num_layers=num_layers,
                    rnn_type=rnn_type,
                    dropout=dropout,
                    n_tokens=n_tokens,
                    emb_size=emb_size,
                    hidden_size=hidden_size,
                    pad_id=token_to_idx[PAD_TOKEN])
    if torch.cuda.is_available():
        model = model.cuda()

    optimiser = optim.Adam(model.parameters(), lr=learning_rate)

    s1 = "bababac bababa bacc bac bacc"
    s2 = "bababac baba bac bacc bac"
    s3 = "baba"
    s4 = "ccab cab ccab ababab cababab"

    try:
        model, optimiser, epoch, valid_loss_min = load_ckp(
            checkpoint_fpath=sys.argv[1], model=model, optimiser=optimiser)
        score(model, token_to_idx, idx_to_token, seed_phrase=s1)
        score(model, token_to_idx, idx_to_token, seed_phrase=s2)
        score(model, token_to_idx, idx_to_token, seed_phrase=s3)
        score(model, token_to_idx, idx_to_token, seed_phrase=s4)
    except KeyboardInterrupt:
        print('Aborted!')
def main():
    """ Main function

        Here, you should instantiate
        1) DataLoaders for training and validation. 
           Try SubsetRandomSampler to create these DataLoaders.
        3) model
        4) optimizer
        5) cost function: use torch.nn.CrossEntropyLoss

    """

    parser = argparse.ArgumentParser()
    parser.add_argument('--val_ratio',
                        type=float,
                        default=.5,
                        help='The ratio for valid set')
    parser.add_argument('--n_layers',
                        type=int,
                        default=4,
                        help='Number of stacked RNN layers')
    parser.add_argument('--n_hidden',
                        type=int,
                        default=512,
                        help='Number of hidden neurons of RNN cells')
    parser.add_argument('--drop_prob',
                        type=float,
                        default=0.1,
                        help='Dropout probability')
    parser.add_argument('--num_epochs',
                        type=int,
                        default=100,
                        help='The number of epochs')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='Learning rate')
    parser.add_argument('--device',
                        type=str,
                        default='gpu',
                        help='For cpu: \'cpu\', for gpu: \'gpu\'')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        help='Size of batches for training')
    parser.add_argument('--model_save_dir',
                        type=str,
                        default='../model',
                        help='Directory for saving model.')
    parser.add_argument('--results_save_dir',
                        type=str,
                        default='../results',
                        help='Directory for saving results.')
    parser.add_argument('--rnn',
                        type=bool,
                        default=True,
                        help='Train vanilla rnn model')
    parser.add_argument('--lstm',
                        type=bool,
                        default=True,
                        help='Train lstm model')
    parser.add_argument('--chunk_size',
                        type=int,
                        default=30,
                        help='Chunk size(sequence length)')
    parser.add_argument('--s_step', type=int, default=3, help='Sequence step')

    args = parser.parse_args()

    n_cpu = multiprocessing.cpu_count()

    if args.device == 'gpu':
        args.device = 'cuda'
    device = torch.device(args.device)

    chunk_size = args.chunk_size
    s_step = args.s_step
    num_epochs = args.num_epochs
    batch_size = args.batch_size
    val_ratio = args.val_ratio
    shuffle_dataset = True
    random_seed = 42

    datasets = dataset.Shakespeare('shakespeare_train.txt', chunk_size, s_step)

    dataset_size = len(datasets)
    indices = list(range(dataset_size))
    split = int(np.floor(val_ratio * dataset_size))

    if shuffle_dataset:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_indices, val_indices = indices[split:], indices[:split]

    train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(val_indices)

    trn_loader = DataLoader(datasets,
                            batch_size=batch_size,
                            sampler=train_sampler,
                            num_workers=n_cpu)
    val_loader = DataLoader(datasets,
                            batch_size=batch_size,
                            sampler=valid_sampler,
                            num_workers=n_cpu)

    chars = datasets.chars

    print('-----Train Vanilla RNN Model-----')

    if args.rnn:

        model = CharRNN(chars, args).to(device)
        optimizer = Adam(model.parameters(), lr=args.lr)
        criterion = nn.CrossEntropyLoss()

        rnn_trn_loss, rnn_val_loss = [], []
        best_val_loss = np.inf

        for epoch in range(args.num_epochs):

            epoch_time = time.time()

            trn_loss = train(model, trn_loader, device, criterion, optimizer)
            val_loss = validate(model, val_loader, device, criterion)

            rnn_trn_loss.append(trn_loss)
            rnn_val_loss.append(val_loss)

            print('Epoch: %3s/%s...' % (epoch + 1, num_epochs),
                  'Train Loss: %.4f...' % trn_loss,
                  'Val Loss: %.4f...' % val_loss,
                  'Time: %.4f' % (time.time() - epoch_time))

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                torch.save(model.state_dict(),
                           '%s/rnn.pt' % args.model_save_dir)

        value, idx = np.array(rnn_val_loss).min(), np.array(
            rnn_val_loss).argmin()
        plt.figure(figsize=(8, 6))
        plt.title('Vanilla RNN Model training and validation loss')
        plt.plot(np.arange(1, args.num_epochs + 1),
                 rnn_trn_loss,
                 'g',
                 label='Train Loss')
        plt.plot(np.arange(1, args.num_epochs + 1),
                 rnn_val_loss,
                 'r',
                 label='Val Loss')
        plt.grid(True)
        plt.legend(loc='upper right')
        plt.annotate('min epoch: %s \n\
                     min valid loss: %.5f' % (idx, value), (idx, value),
                     xytext=(-60, 20),
                     textcoords='offset points',
                     arrowprops={'arrowstyle': '->'})
        plt.savefig('%s/rnn_loss.png' % args.results_save_dir, dpi=300)

    print('-----Train LSTM Model-----')

    if args.lstm:

        model = CharLSTM(chars, args).to(device)
        optimizer = Adam(model.parameters(), lr=args.lr)
        criterion = nn.CrossEntropyLoss()

        lstm_trn_loss, lstm_val_loss = [], []
        best_val_loss = np.inf

        for epoch in range(args.num_epochs):

            epoch_time = time.time()

            trn_loss = train(model, trn_loader, device, criterion, optimizer)
            val_loss = validate(model, val_loader, device, criterion)

            lstm_trn_loss.append(trn_loss)
            lstm_val_loss.append(val_loss)

            print('Epoch: %3s/%s...' % (epoch + 1, num_epochs),
                  'Train Loss: %.4f...' % trn_loss,
                  'Val Loss: %.4f...' % val_loss,
                  'Time: %.4f' % (time.time() - epoch_time))

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                torch.save(model.state_dict(),
                           '%s/lstm.pt' % args.model_save_dir)

        value, idx = np.array(lstm_val_loss).min(), np.array(
            lstm_val_loss).argmin()
        plt.figure(figsize=(8, 6))
        plt.title('LSTM Model training and validation loss')
        plt.plot(np.arange(1, args.num_epochs + 1),
                 lstm_trn_loss,
                 'g',
                 label='Train Loss')
        plt.plot(np.arange(1, args.num_epochs + 1),
                 lstm_val_loss,
                 'r',
                 label='Val Loss')
        plt.grid(True)
        plt.legend(loc='upper right')
        plt.annotate('min epoch: %s \n\
                     min valid loss: %.5f' % (idx, value), (idx, value),
                     xytext=(-60, 20),
                     textcoords='offset points',
                     arrowprops={'arrowstyle': '->'})
        plt.savefig('%s/lstm_loss.png' % args.results_save_dir, dpi=300)
Esempio n. 7
0
def main():

    # Parse command line arguments
    argparser = argparse.ArgumentParser()
    argparser.add_argument('--train_set', type=str, required=True)
    argparser.add_argument('--valid_set', type=str, required=True)
    argparser.add_argument('--model', type=str, default="gru")
    argparser.add_argument('--model_file', type=str, default='None')
    argparser.add_argument('--n_epochs', type=int, default=30)
    argparser.add_argument('--hidden_size', type=int, default=200)
    argparser.add_argument('--n_layers', type=int, default=3)
    argparser.add_argument('--learning_rate', type=float, default=0.01)
    argparser.add_argument('--chunk_len', type=int, default=200)
    argparser.add_argument('--batch_size', type=int, default=300)
    argparser.add_argument('--num_workers', type=int, default=8)
    argparser.add_argument('--cuda', action='store_true')
    argparser.add_argument('--cpu', action='store_true')
    args = argparser.parse_args()

    # Initialize models and start training

    if args.model_file == 'None':
        decoder = CharRNN(
            n_characters,
            args.hidden_size,
            n_characters,
            model=args.model,
            n_layers=args.n_layers,
        )
        epoch_from = 1
        prev_valid_loss = sys.maxsize
        old_filename = None
    else:
        if args.cpu:
            decoder = torch.load(args.model_file,
                                 map_location=lambda storage, loc: storage)
        else:
            decoder = torch.load(args.model_file)
        info = args.model_file.split('_')
        args.model = info[0]
        epoch_from = int(info[1][5:]) + 1
        args.n_layers = int(info[2][7:])
        args.hidden_size = int(info[5][2:])
        prev_valid_loss = float(info[7][4:-3])
        old_filename = args.model_file

        print(
            "successfully loaded model! Continuing from epoch {0} with valid loss {1}"
            .format(epoch_from, prev_valid_loss))

    optimizer = torch.optim.Adam(decoder.parameters(), lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    if args.cuda:
        decoder.cuda()

    start = time.time()

    train_dataset = WordDataset(args.train_set, args.chunk_len)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  drop_last=True)

    valid_dataset = WordDataset(args.valid_set, args.chunk_len)
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  drop_last=True)

    try:

        print('Training for maximum {} epochs...'.format(args.n_epochs))
        for epoch in range(epoch_from, args.n_epochs + 1):

            train_loss, num_samples = 0, 0
            for s in tqdm(train_dataloader):
                input_, target = prep_data(s['input'], s['target'], args.cuda)
                train_loss += train(decoder, optimizer, criterion, input_,
                                    target, args.batch_size, args.chunk_len,
                                    args.cuda)
                num_samples += 1
            train_loss /= num_samples

            valid_loss, num_samples = 0, 0
            for s in valid_dataloader:
                input_, target = prep_data(s['input'], s['target'], args.cuda)
                valid_loss += evaluate(decoder, criterion, input_, target,
                                       args.batch_size, args.chunk_len,
                                       args.cuda)
                num_samples += 1
            valid_loss /= num_samples

            elapsed = time_since(start)
            pcnt = epoch / args.n_epochs * 100
            log = (
                '{} elapsed - epoch #{} ({:.1f}%) - training loss (BPC) {:.2f} '
                '- validation loss (BPC) {:.2f}')
            print(log.format(elapsed, epoch, pcnt, train_loss, valid_loss))

            if valid_loss > prev_valid_loss:
                print('No longer learning, just overfitting, stopping here.')
                break
            else:
                filename = model_file_name(decoder, epoch, train_loss,
                                           valid_loss)
                torch.save(decoder, filename)
                print('Saved as {}'.format(filename))
                if old_filename:
                    os.remove(old_filename)
                old_filename = filename

            prev_valid_loss = valid_loss

    except KeyboardInterrupt:
        print("Saving before quit...")
        try:
            valid_loss
        except:
            valid_loss = 'no_val'
        filename = model_file_name(decoder, epoch, train_loss, valid_loss)
        torch.save(decoder, filename)
        print('Saved as {}'.format(filename))
Esempio n. 8
0
class Trainer(object):
    def __init__(self, args):
        self.args = args
        self.device = torch.device('cuda' if self.args.cuda else 'cpu')

        self.convert = None
        self.model = None
        self.optimizer = None
        self.criterion = self.get_loss
        self.meter = AverageValueMeter()
        self.train_loader = None

        self.get_data()
        self.get_model()
        self.get_optimizer()

    def get_data(self):
        self.convert = TextConverter(self.args.txt,
                                     max_vocab=self.args.max_vocab)
        dataset = TextDataset(self.args.txt, self.args.len,
                              self.convert.text_to_arr)
        self.train_loader = DataLoader(dataset,
                                       self.args.batch_size,
                                       shuffle=True,
                                       num_workers=self.args.num_workers)

    def get_model(self):
        self.model = CharRNN(self.convert.vocab_size, self.args.embed_dim,
                             self.args.hidden_size, self.args.num_layers,
                             self.args.dropout, self.args.cuda).to(self.device)
        if self.args.cuda:
            cudnn.benchmark = True

    def get_optimizer(self):
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.args.lr)
        self.optimizer = ScheduledOptim(optimizer)

    @staticmethod
    def get_loss(score, label):
        return nn.CrossEntropyLoss()(score, label.view(-1))

    def save_checkpoint(self, epoch):
        if (epoch + 1) % self.args.save_interval == 0:
            model_out_path = self.args.save_file + "epoch_{}_model.pth".format(
                epoch + 1)
            torch.save(self.model, model_out_path)
            print("Checkpoint saved to {}".format(model_out_path))

    def save(self):
        model_out_path = self.args.save_file + "final_model.pth"
        torch.save(self.model, model_out_path)
        print("Final model saved to {}".format(model_out_path))

    @staticmethod
    def pick_top_n(predictions, top_n=5):
        top_predict_prob, top_predict_label = torch.topk(predictions, top_n, 1)
        top_predict_prob /= torch.sum(top_predict_prob)
        top_predict_prob = top_predict_prob.squeeze(0).cpu().numpy()
        top_predict_label = top_predict_label.squeeze(0).cpu().numpy()
        c = np.random.choice(top_predict_label, size=1, p=top_predict_prob)
        return c

    def train(self):
        self.meter.reset()
        self.model.train()
        for x, y in tqdm(self.train_loader):
            y = y.long()
            x, y = x.to(self.device), y.to(self.device)

            # Forward.
            score, _ = self.model(x)
            loss = self.criterion(score, y)

            # Backward.
            self.optimizer.zero_grad()
            loss.backward()

            # Clip gradient.
            nn.utils.clip_grad_norm_(self.model.parameters(), 5)
            self.optimizer.step()

            self.meter.add(loss.item())

        print('perplexity: {}'.format(np.exp(self.meter.value()[0])))

    def test(self):
        self.model.eval()
        begin = np.array([i for i in self.args.begin])
        begin = np.random.choice(begin, size=1)
        text_len = self.args.predict_len
        samples = [self.convert.word_to_int(c) for c in begin]
        input_txt = torch.LongTensor(samples)[None]

        input_txt = input_txt.to(self.device)
        _, init_state = self.model(input_txt)
        result = samples
        model_input = input_txt[:, -1][:, None]

        with torch.no_grad():
            for i in range(text_len):
                out, init_state = self.model(model_input, init_state)
                prediction = self.pick_top_n(out.data)
                model_input = torch.LongTensor(prediction)[None].to(
                    self.device)
                result.append(prediction[0])

        print(self.convert.arr_to_text(result))

    def predict(self):
        self.model.eval()
        samples = [self.convert.word_to_int(c) for c in self.args.begin]
        input_txt = torch.LongTensor(samples)[None].to(self.device)
        _, init_state = self.model(input_txt)
        result = samples
        model_input = input_txt[:, -1][:, None]

        with torch.no_grad():
            for i in range(self.args.predict_len):
                out, init_state = self.model(model_input, init_state)
                prediction = self.pick_top_n(out.data)
                model_input = torch.LongTensor(prediction)[None].to(
                    self.device)
                result.append(prediction[0])

        print(self.convert.arr_to_text(result))

    def run(self):
        for e in range(self.args.max_epoch):
            print('===> EPOCH: {}/{}'.format(e + 1, self.args.max_epoch))
            self.train()
            self.test()
            self.save_checkpoint(e)
        self.save()
Esempio n. 9
0
def train(opt, x_train, x_val, dictionary_len):
    ''' Training a network

        Arguments
        ---------

        net: CharRNN network
        data: training data to train the network (text)
        epochs: Number of epochs to train
        batch_size: Number of mini-sequences per mini-batch, aka batch size
        seq_length: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        val_frac: Fraction of data to hold out for validation
        print_every: Number of steps for printing training and validation loss

    '''
    torch.manual_seed(0)
    np.random.seed(0)
    random.seed(0)
    # Declaring the hyperparameters
    batch_size = opt.batch_size
    seq_length = int(opt.seq_length)
    epochs = 50

    if torch.cuda.is_available():
        device = "cuda"
        torch.cuda.manual_seed_all(0)
    else:
        device = "cpu"
    print(device)

    date = datetime.now().strftime('%y%m%d%H%M%S')
    if opt.nologs:
        writer = SummaryWriter(log_dir=f'logs/nologs/')
    else:
        writer = SummaryWriter(log_dir=f'logs/logs_{date}/')

    y_train = get_labels_text_prediction(x_train)

    train_dataset = TextDataset(x_train, y_train, max_len=seq_length)

    if not opt.onlytrain:
        y_val = get_labels_text_prediction(x_val)
        val_dataset = TextDataset(x_val, y_val, max_len=seq_length)
        val_loader = DataLoader(dataset=val_dataset,
                                pin_memory=device == 'cuda',
                                batch_size=batch_size,
                                shuffle=False)

    train_loader = DataLoader(dataset=train_dataset,
                              pin_memory=device == 'cuda',
                              batch_size=batch_size,
                              shuffle=True)

    model_params = {
        'dictionary_len': dictionary_len,
        'dropout': opt.dropout,
        'hidden_size': opt.hidden_size,
        'layers': opt.layers,
        'embedding_len': 32,
        'device': device,
        'lr': opt.lr
    }

    model = CharRNN(**model_params).to(device)
    print(model)
    # embed()
    # summary(model, input_size=(channels, H, W))
    # summary(model, input_size=(dictionary_len, 28, 28))

    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()
    if opt.scheduler:
        scheduler = ReduceLROnPlateau(optimizer,
                                      'min',
                                      cooldown=3,
                                      factor=0.5,
                                      patience=10)

    global_step = 0
    for j in trange(epochs, desc='T raining LSTM...'):

        for i, (x, y) in enumerate(train_loader):
            if i == len(train_loader) - 1:
                print("FER PADDING -  DE MOMENT NO VA")
                continue
            model.train()

            x = x.to(device)
            y = y.to(device)

            # state_h, state_c = model.zero_state(opt.batch_size)
            # # Transfer data to GPU
            # state_h = state_h.to(device)
            # state_c = state_c.to(device)

            # DELETE PAST GRADIENTS
            optimizer.zero_grad()
            # FORWARD PASS  --> ultim state , (tots)  [ state_h[-1] == pred ]
            pred, (state_h, state_c) = model(x)
            # pred, (state_h, state_c) = model(x, (state_h, state_c))
            # CALCULATE LOSS
            # pred = pred.transpose(1, 2)
            pred2 = pred.view(-1, dictionary_len)
            y2 = y.view(-1)
            loss = criterion(pred2, y2)
            loss_value = loss.item()

            # BACKWARD PASS
            loss.backward()
            # MINIMIZE LOSS
            optimizer.step()
            global_step += 1
            if i % 100 == 0:
                writer.add_scalar('train/loss', loss_value, global_step)
                print('[Training epoch {}: {}/{}] Loss: {}'.format(
                    j, i, len(train_loader), loss_value))

        if not opt.onlytrain:
            val_loss = []

            for i, (x, y) in enumerate(val_loader):

                if i == len(val_loader) - 1:
                    # print("FER PADDING -  DE MOMENT NO VA")
                    continue

                model.eval()

                x = x.to(device)
                y = y.to(device)

                # state_h, state_c = model.zero_state(opt.batch_size)
                # state_h = state_h.to(device)
                # state_c = state_c.to(device)

                # NO BACKPROPAGATION
                # FORWARD PASS
                # pred, (state_h, state_c) = model(x, (state_h, state_c))
                pred, (state_h, state_c) = model(x)
                # CALCULATE LOSS
                # pred = pred.transpose(1, 2)

                # pred = [batch x 40 x diccionary_len]
                # y =  [batch x 40]
                pred2 = pred.view(-1, dictionary_len)
                y2 = y.view(-1)
                loss = criterion(pred2, y2)
                # loss = criterion(pred, y)

                val_loss.append(loss.item())

                if i % 50 == 0:
                    print('[Validation epoch {}: {}/{}] Loss: {}'.format(
                        j, i, len(val_loader), loss.item()))

            writer.add_scalar('val/loss', np.mean(val_loss), j)

            if opt.scheduler:
                scheduler.step(np.mean(val_loss))
                writer.add_scalar("lr", optimizer.param_groups[0]["lr"], j)

            predicted_words = inference_prediction(model, device, 500)
            # output = pred[0].unsqueeze(0)  # [1,diccionary_len, 40]
            # predicted_words = do_inference_test(output, model, device)
            print(predicted_words)
            writer.add_text('val/Generated_Samples', predicted_words, j)

        checkpoint = {
            "state_dict": model.state_dict(),
            "optimizer": optimizer.state_dict(),
        }

        # if j % 5 == 0:
        os.makedirs("weights/{}".format(date), exist_ok=True)
        torch.save(checkpoint, "weights/{}/checkpoint_{}.pt".format(date, j))
Esempio n. 10
0
def train(filename, rnn_type, num_layers, dropout, emb_size, hidden_size,
          num_epochs, batch_size, learning_rate, num_samples, seed_phrase,
          sample_every, checkpoint_path):
    """ Trains a character-level Recurrent Neural Network in PyTorch.

    Args: optional arguments [python train.py --help]
    """
    logging.info('reading `{}` for character sequences'.format(filename))
    inputs, token_to_idx, idx_to_token = load_dataset(file_name=filename)

    idx_to_token.remove('~')
    idx_to_token.remove('#')
    idx_to_token = ['~'] + idx_to_token + ['#']
    token_to_idx = {token: idx_to_token.index(token) for token in idx_to_token}

    logging.info(idx_to_token)
    logging.info(token_to_idx)

    n_tokens = len(idx_to_token)
    max_length = inputs.size(1)

    logging.debug('creating char-level RNN model')
    model = CharRNN(num_layers=num_layers,
                    rnn_type=rnn_type,
                    dropout=dropout,
                    n_tokens=n_tokens,
                    emb_size=emb_size,
                    hidden_size=hidden_size,
                    pad_id=token_to_idx[PAD_TOKEN])
    if torch.cuda.is_available():
        model = model.cuda()

    logging.debug('defining model training operations')
    # define training procedures and operations for training the model
    criterion = nn.NLLLoss(reduction='mean')
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     mode='min',
                                                     min_lr=1e-6,
                                                     factor=0.1,
                                                     patience=7,
                                                     verbose=True)

    # train-val-test split of the dataset
    split_index = int(0.9 * inputs.size(0))
    train_tensors, inputs = inputs[:split_index], inputs[split_index:]
    split_index = int(0.5 * inputs.size(0))
    val_tensors, test_tensors = inputs[:split_index], inputs[split_index:]
    del inputs
    logging.info('train tensors: {}'.format(train_tensors.size()))
    logging.info('val tensors: {}'.format(val_tensors.size()))
    logging.info('test tensors: {}'.format(test_tensors.size()))

    logging.debug('training char-level RNN model')
    # loop over epochs
    for epoch in range(1, num_epochs + 1):
        epoch_loss, n_iter = 0.0, 0
        # loop over batches
        for tensors in tqdm(iterate_minibatches(train_tensors,
                                                batchsize=batch_size),
                            desc='Epoch[{}/{}]'.format(epoch, num_epochs),
                            leave=False,
                            total=train_tensors.size(0) // batch_size):
            # optimize model parameters
            epoch_loss += optimize(model, tensors, max_length, n_tokens,
                                   criterion, optimizer)
            n_iter += 1
        # evaluate model after every epoch
        val_loss = evaluate(model, val_tensors, max_length, n_tokens,
                            criterion)
        # lr_scheduler decreases lr when stuck at local minima
        scheduler.step(val_loss)
        # log epoch status info
        logging.info(
            'Epoch[{}/{}]: train_loss - {:.4f}   val_loss - {:.4f}'.format(
                epoch, num_epochs, epoch_loss / n_iter, val_loss))

        # sample from the model every few epochs
        if epoch % sample_every == 0:
            print(
                'Epoch[{}/{}]: train_loss - {:.4f}   val_loss - {:.4f}'.format(
                    epoch, num_epochs, epoch_loss / n_iter, val_loss))
            for _ in range(num_samples):
                sample = generate_sample(model,
                                         token_to_idx,
                                         idx_to_token,
                                         max_length,
                                         n_tokens,
                                         seed_phrase=seed_phrase)
                logging.debug(sample)

        checkpoint = {
            'epoch': epoch + 1,
            'valid_loss_min': val_loss,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        # save checkpoint
        best_model_path = checkpoint_path
        save_ckp(checkpoint, False, checkpoint_path, best_model_path)
Esempio n. 11
0
    with open('./data/names.txt') as f:
        names = f.read().split('\n')

    vocab = build_vocab_from_list(names)

    names_dataset = NamesDataset(vocab, names)
    dataloader = DataLoader(names_dataset,
                            batch_size=32,
                            shuffle=True,
                            collate_fn=collate)

    model = CharRNN(len(vocab), 256, 2)
    model.to(config.device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    n_epoch = 8

    total_loss_item = 0
    total_loss = 0

    total_mini_batch = len(dataloader)

    for e in range(n_epoch):
        for i, example in enumerate(dataloader):
            for j in range(len(example[0])):
                input, target = example[0][j], example[1][j]
                output, loss = train(model, optimizer, criterion, input,
                                     target)
                total_loss += loss
Esempio n. 12
0
def train( filename = "poets.txt", hidden_size = 128, n_layers = 2, 
          learning_rate=0.01, n_epochs = 10000, chunk_len=20, batch_size = 1024,
          print_every =  100 ):
    #%% Global Configuration
    file, file_len, all_characters, n_characters = helpers.read_file( filename )
    
    sentences = file.split("\n")
    
    print( "There are %d unique characters in the dataset" % n_characters )
    print( "There are %d sentences in the dataset with total of %d characters" % ( len(sentences), len(file) ) )
    
    #%% Model Saving and Loading
    model_filename = helpers.pt_name

    if os.path.exists( model_filename ):
        decoder = load( model_filename )
    else:
        decoder = CharRNN(
            n_characters,
            hidden_size,
            n_characters,
            model = helpers.mcell,
            n_layers=n_layers,
        )
        
    decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    
    if helpers.USE_CUDA: decoder.cuda()
    
    start = time.time()
    all_losses = []
    
    try:
        print("Training for %d epochs..." % n_epochs)
        for epoch in range(n_epochs):
            
            if epoch != 0 and epoch % 1000 == 0: 
                learning_rate /= 2
                decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=learning_rate)

            inp, target = random_training_set( sentences, chunk_len, batch_size )
            
            loss = train_one_entry(decoder, decoder_optimizer, criterion, 
                                        inp, target, chunk_len, batch_size )
            
            all_losses.append( loss )
    
            if epoch != 0 and epoch % print_every == 0:
                print('%s: [%s (%d %d%%) %.4f]' % ( time.ctime(), helpers.time_since(start), epoch, epoch / n_epochs * 100, loss))
                print(generate(decoder, '新年', 100, cuda= helpers.USE_CUDA), '\n')
                    
                save( decoder, model_filename )
    
    except KeyboardInterrupt:
        save( decoder, model_filename )
        
        
    import matplotlib.pyplot as plt
    plt.plot( all_losses )
    plt.xlabel( "iteration" )
    plt.ylabel( "train loss" )
Esempio n. 13
0
def main(mode="RNN"):
    """
     Main function

        Here, you should instantiate
        1) DataLoaders for training and validation. 
            Try SubsetRandomSampler to create these DataLoaders.
        3) model
        4) optimizer
        5) cost function: use torch.nn.CrossEntropyLoss
    """

    # write your codes here
    start_time = time.time()
    data = "./shakespeare_train.txt"
    data_set = dataset.Shakespeare(data)
    all_characters = string.printable
    input_size = len(all_characters)

    if mode == "RNN":
        models_RNN = CharRNN(input_size, 512, input_size, 4).cuda()
        optim = torch.optim.Adam(models_RNN.parameters(), lr=0.001)
        criterion = nn.CrossEntropyLoss()
    elif mode == "LSTM":
        models_LSTM = CharLSTM(input_size, 512, input_size, 4).cuda()
        optim = torch.optim.Adam(models_LSTM.parameters(), lr=0.001)
        criterion = nn.CrossEntropyLoss()
    else:
        raise NotImplementedError

    all_losses = []
    loss_avg = 0
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    #sampler = SubsetRandomSampler()
    total_idx = list(range(len(data_set)))
    split_idx = int(len(data_set) * 0.7)
    trn_idx = total_idx[:split_idx]
    val_idx = total_idx[split_idx:]

    trn_loader = da.DataLoader(data_set,
                               batch_size=64,
                               sampler=SubsetRandomSampler(trn_idx))
    val_loader = da.DataLoader(data_set,
                               batch_size=64,
                               sampler=SubsetRandomSampler(val_idx))
    losses = []
    val_losses = []
    for epoch in range(1, 51):
        if mode == "RNN":
            loss = train(models_RNN,
                         trn_loader,
                         device,
                         criterion,
                         optim,
                         epoch,
                         input_size,
                         mode="RNN")
            val_loss = validate(models_RNN, val_loader, device, criterion,
                                epoch, input_size)
        elif mode == "LSTM":
            loss = train(models_LSTM,
                         trn_loader,
                         device,
                         criterion,
                         optim,
                         epoch,
                         input_size,
                         mode="LSTM")
            val_loss = validate(models_LSTM, val_loader, device, criterion,
                                epoch, input_size)
        losses.append(loss)
        val_losses.append(val_loss)

    return losses, val_losses
Esempio n. 14
0
    if args.cuda:
        print("Using CUDA")

    file, file_len = read_file(args.filename)

    n_characters = len(string.printable)

    decoder = CharRNN(
        n_characters,
        args.hidden_size,
        n_characters,
        model=args.model,
        n_layers=args.n_layers,
    )

    decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                         lr=args.learning_rate)
    criterion = nn.CrossEntropyLoss()

    if args.cuda:
        decoder.cuda()

    start = time.time()
    all_losses = []
    loss_avg = 0

    try:
        print("Training for %d epochs..." % args.n_epochs)
        for epoch in tqdm(range(1, args.n_epochs + 1)):
            loss = train(*random_training_set(args.chunk_len, args.batch_size))
            loss_avg += loss
Esempio n. 15
0
#构建字典
vo={}
for sentance in data:
    for word in sentance:
        if word not in vo:
            vo[word]=len(vo)
vo['<EOP>']=len(vo)
vo['<START>']=len(vo)

for i in range(len(data)):
    data[i]=toList(data[i])
    data[i].append("<EOP>") #给每句诗加个换行结尾

model=CharRNN(len(vo),256,256)
optimizer=opt.RMSprop(model.parameters(),lr=0.01,weight_decay=0.0001) #RMSprop算法 lr学习率 wd权重衰减
criterion=nn.NLLLoss() #Negative Log Likelihood负对数似然损失函数

one_hot_var_target={}
for a in vo:
    one_hot_var_target.setdefault(a,make_one_hot_vec_target(a,vo))
#setdefault方法是如果之前没有就设置为default的值 这里好像是把字符映射到了tensor上

epoch=10
batch=10
Train_size=len(data)

def test():
    loss=0
    counts=0
    v=int(Train_size/batch)