Ejemplo n.º 1
0
    def __init__(self, args):
        super(LSTNetModel, self).__init__()

        # Parameter Assignment
        self.train_set = Data(args, mode='train')
        self.valid_set = Data(args, mode='valid', scaler=self.train_set.scaler)
        self.epoch = 0
        self.n_epoch = args.epochs
        self.batch_size = args.batch_size
        self.lr = args.lr
        self.input_dim = self.train_set.raw_dat.shape[1]
        self.criterion = nn.L1Loss(
            size_average=False) if args.L1Loss else nn.MSELoss(
                size_average=False)
        self.model = LSTNet(args, self.input_dim).to(args.device)
        self.optimizer = Optim(self.model.parameters(), args.optim, args.lr,
                               args.clip)
        # self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size, gamma)
        self.criterion = nn.MSELoss(reduction='sum')

        self.eval = dict()
        self.eval['loss'] = []
        self.eval['mean_absolute_error'] = []
        self.eval['explained_variance_score'] = []
        self.eval['mean_squared_error'] = []
        self.eval['median_absolute_error'] = []
        self.eval['r2_score'] = []
Ejemplo n.º 2
0
class LSTMModel(BaseEstimator, RegressorMixin):
    def __init__(self, args):

        super(LSTMModel, self).__init__()

        self.train_set = Data(args, mode='train')
        self.valid_set = Data(args, mode='valid', scaler=self.train_set.scaler)
        self.n_epoch = args.epochs
        self.batch_size = args.batch_size
        self.lr = args.lr
        self.loss_hist = []
        self.criterion = nn.L1Loss(
            size_average=True) if args.L1Loss else nn.MSELoss(
                size_average=True)
        self.model = LSTM(args, self.train_set.m - 1).to(
            args.device)  # 注意:只有X预测Y需要这么干!
        self.optimizer = Optim(self.model.parameters(), args.optim, args.lr,
                               args.clip)
        # self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size, gamma)
        self.criterion = nn.MSELoss(reduction='sum')

    def fit(self):

        self.model.train()
        for i in range(self.n_epoch):

            self.loss_hist.append(0)
            train_loader = DataLoader(dataset=self.train_set,
                                      batch_size=self.batch_size,
                                      shuffle=True)
            iter = 0

            for batch_X, batch_y in train_loader:

                self.model.zero_grad()
                output = self.model(batch_X)
                loss = self.criterion(output, batch_y.reshape((-1, 1)))
                self.loss_hist[-1] += loss.item()
                loss.backward()
                self.optimizer.step()

                iter += 1

            # self.scheduler.step()

            print('Epoch: {}, Loss: {}'.format(
                i + 1, self.loss_hist[-1] / iter / args.batch_size))

        print('Optimization finished!')

        return self
Ejemplo n.º 3
0
    def __init__(self, args):

        super(LSTMModel, self).__init__()

        self.train_set = Data(args, mode='train')
        self.valid_set = Data(args, mode='valid', scaler=self.train_set.scaler)
        self.n_epoch = args.epochs
        self.batch_size = args.batch_size
        self.lr = args.lr
        self.loss_hist = []
        self.criterion = nn.L1Loss(
            size_average=True) if args.L1Loss else nn.MSELoss(
                size_average=True)
        self.model = LSTM(args, self.train_set.m - 1).to(
            args.device)  # 注意:只有X预测Y需要这么干!
        self.optimizer = Optim(self.model.parameters(), args.optim, args.lr,
                               args.clip)
        # self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size, gamma)
        self.criterion = nn.MSELoss(reduction='sum')
Ejemplo n.º 4
0
def main():

    print("Loading data from '%s'" % opt.data)

    dataset = torch.load(opt.data)

    dict_checkpoint = opt.train_from if opt.train_from else opt.train_from_state_dict
    if dict_checkpoint:
        print('Loading dicts from checkpoint at %s' % dict_checkpoint)
        checkpoint = torch.load(dict_checkpoint)
        dataset['dicts'] = checkpoint['dicts']

    trainData = Dataset(dataset['train']['src'], dataset['train']['tgt'],
                        opt.batch_size, opt.gpus)
    validData = Dataset(dataset['valid']['src'],
                        dataset['valid']['tgt'],
                        opt.batch_size,
                        opt.gpus,
                        volatile=True)

    dicts = dataset['dicts']
    print(' * vocabulary size. source = %d; target = %d' %
          (len(dicts["word2index"]['src']), len(dicts["word2index"]['tgt'])))
    print(' * number of training sentences. %d' % len(dataset['train']['src']))
    print(' * maximum batch size. %d' % opt.batch_size)

    print('Building model...')

    encoder = Model.Encoder(opt, len(dicts["word2index"]['src']))
    decoder = Model.Decoder(opt, len(dicts["word2index"]['tgt']))

    generator = nn.Sequential(
        nn.Linear(opt.hidden_size * 2, len(dicts["word2index"]['tgt'])),
        nn.LogSoftmax())

    model = Model.NMTModel(encoder, decoder)

    if opt.train_from:
        print('Loading model from checkpoint at %s' % opt.train_from)
        chk_model = checkpoint['model']
        generator_state_dict = chk_model.generator.state_dict()
        model_state_dict = {
            k: v
            for k, v in chk_model.state_dict().items() if 'generator' not in k
        }
        model.load_state_dict(model_state_dict)
        generator.load_state_dict(generator_state_dict)
        opt.start_epoch = checkpoint['epoch'] + 1

    if opt.train_from_state_dict:
        print('Loading model from checkpoint at %s' %
              opt.train_from_state_dict)
        model.load_state_dict(checkpoint['model'])
        generator.load_state_dict(checkpoint['generator'])
        opt.start_epoch = checkpoint['epoch'] + 1

    if len(opt.gpus) >= 1:
        model.cuda()
        generator.cuda()
    else:
        model.cpu()
        generator.cpu()

    if len(opt.gpus) > 1:
        model = nn.DataParallel(model, device_ids=opt.gpus, dim=1)
        generator = nn.DataParallel(generator, device_ids=opt.gpus, dim=0)

    model.generator = generator

    if not opt.train_from_state_dict and not opt.train_from:
        for p in model.parameters():
            p.data.uniform_(-opt.param_init, opt.param_init)

        encoder.load_pretrained_vectors(opt)
        decoder.load_pretrained_vectors(opt)

        optim = Optim(opt.optim,
                      opt.learning_rate,
                      opt.max_grad_norm,
                      lr_decay=opt.learning_rate_decay,
                      start_decay_at=opt.start_decay_at)
    else:
        print('Loading optimizer from checkpoint:')
        optim = checkpoint['optim']
        print(optim)

    optim.set_parameters(model.parameters())

    if opt.train_from or opt.train_from_state_dict:
        optim.optimizer.load_state_dict(
            checkpoint['optim'].optimizer.state_dict())

    nParams = sum([p.nelement() for p in model.parameters()])
    print('* number of parameters: %d' % nParams)

    criterion = NMTCriterion(len(dicts["word2index"]['tgt']))

    trainModel(model, trainData, validData, dataset, optim, criterion)
Ejemplo n.º 5
0
    # dis = discriminator.Discriminator(emb_src, emb_tgt, emb_ans, gpu=CUDA)
    # dis = discriminator.PQANet(emb_src, emb_tgt)
    # dis = discriminator.TransormerNet(emb_src, emb_tgt)
    dis = discriminator.BiLSTM(emb_src, emb_tgt)

    print(dis)

    if CUDA:
        enc = enc.to(device)
        dec = dec.to(device)
        gen = gen.to(device)
        dis = dis.to(device)

    # GENERATOR MLE TRAINING
    # print('Starting Generator MLE Training...')
    gen_optimizer = Optim('myadam', 1e-3, lr_decay=0.5, start_decay_at=8, max_grad_norm=5)
    gen_optimizer.set_parameters(gen.parameters())
    # train_generator_MLE(gen, gen_optimizer, train_iter, MLE_TRAIN_EPOCHS)

    print('load the best metric model')
    gen.load_state_dict(torch.load('./model/params.pkl'))
    print('evaluating the best model')
    gen.eval()
    # print("Set gen to {0} mode".format('train' if model.decoder.dropout.training else 'eval'))
    # valid_bleu = evaluation.evalModel(gen, val_iter, 100, rev, src_special, tgt_special, tgt_ref, src_rev)
    # print('Validation bleu-4 of the best model= %g' % (valid_bleu * 100))

    emb_ans.weight.requires_grad = False
    # torch.save(gen.state_dict(), pretrained_gen_path)
    # gen.load_state_dict(torch.load(pretrained_gen_path))
Ejemplo n.º 6
0
    if not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )
    else:
        torch.cuda.manual_seed(args.seed)
cudnn.benchmark = True

Data = Data_utility(args)

# setup model and optimizer
model = eval(args.model).Model(args, Data)
model.cuda()

loss = nn.CrossEntropyLoss()
optimizer = Optim(model.parameters(), 'sgd', lr=0.1, weight_decay=1e-4)
nParams = sum([p.nelement() for p in model.parameters()])
print('* number of parameters: %d' % nParams)

total_time = 0.0
best_loss = 0.0
state = None
for iteration in range(max_iter + 1):
    # training
    model.train()
    train_res = 0.0
    time_st = timeit.default_timer()
    for data in Data.get_batches([Data.x_train, Data.y_train],
                                 batch_size,
                                 shuffle=True):
        inputs = data[0]
Ejemplo n.º 7
0
class LSTNetModel(BaseEstimator, RegressorMixin):

    # Initialization
    def __init__(self, args):
        super(LSTNetModel, self).__init__()

        # Parameter Assignment
        self.train_set = Data(args, mode='train')
        self.valid_set = Data(args, mode='valid', scaler=self.train_set.scaler)
        self.epoch = 0
        self.n_epoch = args.epochs
        self.batch_size = args.batch_size
        self.lr = args.lr
        self.input_dim = self.train_set.raw_dat.shape[1]
        self.criterion = nn.L1Loss(
            size_average=False) if args.L1Loss else nn.MSELoss(
                size_average=False)
        self.model = LSTNet(args, self.input_dim).to(args.device)
        self.optimizer = Optim(self.model.parameters(), args.optim, args.lr,
                               args.clip)
        # self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size, gamma)
        self.criterion = nn.MSELoss(reduction='sum')

        self.eval = dict()
        self.eval['loss'] = []
        self.eval['mean_absolute_error'] = []
        self.eval['explained_variance_score'] = []
        self.eval['mean_squared_error'] = []
        self.eval['median_absolute_error'] = []
        self.eval['r2_score'] = []

    def fit(self):

        self.model.train()
        for i in range(self.n_epoch):

            self.eval['loss'].append(0)
            train_loader = DataLoader(dataset=self.train_set,
                                      batch_size=self.batch_size,
                                      shuffle=True)

            idx = 0
            for batch_X, batch_y in train_loader:

                self.model.zero_grad()
                output = self.model(batch_X)
                loss = self.criterion(output, batch_y)  # shape
                self.eval['loss'][-1] += loss.item()
                loss.backward()
                self.optimizer.step()
                idx += 1

            # self.scheduler.step()
            self.epoch += 1
            print('Epoch: {}, Loss: {}'.format(
                self.epoch + 1, self.eval['loss'][-1] / idx / args.batch_size /
                self.input_dim))

        print('Optimization finished!')

    def valid(self):

        with torch.no_grad():
            self.model.eval()

            batchy_list = torch.tensor([],
                                       dtype=torch.float32,
                                       device=args.device)
            output_list = torch.tensor([],
                                       dtype=torch.float32,
                                       device=args.device)
            valid_loader = DataLoader(dataset=self.valid_set,
                                      batch_size=self.batch_size,
                                      shuffle=True)
            loss_valid = 0
            idx = 0

            for batch_X, batch_y in valid_loader:

                self.model.zero_grad()
                output = self.model(batch_X)
                loss = self.criterion(output, batch_y)  # shape
                loss_valid += loss.item()

                batchy_list = torch.cat((batchy_list, batch_y), axis=0)
                output_list = torch.cat((output_list, output), axis=0)
                idx += 1

            batchy_list = batchy_list.cpu()
            output_list = output_list.cpu()
            self.eval['mean_squared_error'].append(
                mean_squared_error(batchy_list,
                                   output_list,
                                   multioutput='raw_values'))
            self.eval['mean_absolute_error'].append(
                mean_absolute_error(batchy_list,
                                    output_list,
                                    multioutput='raw_values'))
            self.eval['median_absolute_error'].append(
                median_absolute_error(batchy_list,
                                      output_list,
                                      multioutput='raw_values'))
            self.eval['explained_variance_score'].append(
                explained_variance_score(batchy_list,
                                         output_list,
                                         multioutput='raw_values'))
            self.eval['r2_score'].append(
                r2_score(batchy_list, output_list, multioutput='raw_values'))

        print('Epoch: {}, Valid Loss: {}'.format(
            self.epoch,
            np.array(self.eval['mean_squared_error']).mean(axis=1)))
Ejemplo n.º 8
0
if args.L1Loss:
    criterion = nn.L1Loss(reduction='sum')
else:
    criterion = nn.MSELoss(reduction='sum')
evaluateL2 = nn.MSELoss(reduction='sum')
evaluateL1 = nn.L1Loss(reduction='sum')
if args.cuda:
    criterion = criterion.cuda()
    evaluateL1 = evaluateL1.cuda()
    evaluateL2 = evaluateL2.cuda()

best_val = 10000000
optim = Optim(
    model.parameters(),
    args.optim,
    args.lr,
    args.clip,
)

# At any point you can hit Ctrl + C to break out of training early.
try:
    print('Start training....')
    for epoch in range(1, args.epochs + 1):
        epoch_start_time = time.time()
        train_loss = train(Data, Data.train[0], Data.train[1], model,
                           criterion, optim, args.batch_size)
        val_loss, val_rae, val_corr = evaluate(Data, Data.valid[0],
                                               Data.valid[1], model,
                                               evaluateL2, evaluateL1,
                                               args.batch_size)
        print(
Ejemplo n.º 9
0
                        self.linear2, \
                        self.linear3, \
                        self.linear4]

    def forward(self, inputs):
        l0 = inputs[0]
        l1 = fn.sigmoid(self.linear1(l0))
        l2 = fn.sigmoid(self.linear2(l1))
        l3 = fn.sigmoid(self.linear3(l2))
        l4 = self.linear4(l3)
        return l4


net = Net()
batch_size = 100
optim = Optim(net, 0.001 / batch_size)

for iter in range(1000):
    optim.zero_grad()
    loss = Var(np.array([0.0]))
    for i in range(batch_size):
        x = np.array([np.random.uniform(-1, 1)])
        y = x**2
        var_x, var_y = Var(x), Var(y)
        output = net([var_x])
        loss_ = fn.mse_loss(output, var_y)
        loss = fn.add(loss, loss_)
    print(loss.data / batch_size)
    loss.backward()
    optim.step()
    print(dis)

    if CUDA:
        enc = enc.to(device)
        dec = dec.to(device)
        gen = gen.to(device)
        dis = dis.to(device)

    emb_ans.weight.requires_grad = False
    # torch.save(gen.state_dict(), pretrained_gen_path)
    # gen.load_state_dict(torch.load(pretrained_gen_path))

    # PRETRAIN DISCRIMINATOR
    print('\nStarting Discriminator Training...')
    # dis_optimizer = optim.Adam(dis.parameters(), lr=1e-3)
    dis_optimizer = Optim('adam', 1e-3, lr_decay=0.5, max_weight_value=1.0)
    dis_optimizer.set_parameters(dis.parameters())
    train_discriminator(dis, dis_optimizer, train_iter, gen, pretrain_acc,
                        PRETRAIN_DISC_EPOCHS)

    # torch.save(dis.state_dict(), pretrained_dis_path)
    # dis.load_state_dict(torch.load(pretrained_dis_path))
    # ADVERSARIAL TRAINING
    pg_count = 10000
    best_advbleu = 0

    pg_optimizer = Optim('myadam', 1e-3, max_grad_norm=5)
    pg_optimizer.set_parameters(gen.parameters())
    gen_optimizer.reset_learningrate(1e-3)
    dis_optimizer.reset_learningrate(1e-3)