def initialize_model(gpu, vocab_size, v_vec, emb_requires_grad, args):
    emb_dim = args.emb_dim
    h_dim = None
    class_num = 2
    is_gpu = True
    if gpu == -1:
        is_gpu = False
    if args.emb_type == 'ELMo' or args.emb_type == 'ELMoForManyLangs':
        bilstm = BiLSTM(emb_dim,
                        h_dim,
                        class_num,
                        vocab_size,
                        is_gpu,
                        v_vec,
                        emb_type=args.emb_type,
                        elmo_model_dir=args.emb_path)
    elif args.emb_type == 'None':
        bilstm = BiLSTM(emb_dim,
                        h_dim,
                        class_num,
                        vocab_size,
                        is_gpu,
                        v_vec,
                        emb_type=args.emb_type)
    else:
        bilstm = BiLSTM(emb_dim,
                        h_dim,
                        class_num,
                        vocab_size,
                        is_gpu,
                        v_vec,
                        emb_type=args.emb_type)
    if is_gpu:
        bilstm = bilstm.cuda()

    for m in bilstm.modules():
        print(m.__class__.__name__)
        weights_init(m)

    if args.emb_type != 'ELMo' and args.emb_type != 'ELMoForManyLangs' and args.emb_type != 'None':
        for param in bilstm.word_embed.parameters():
            param.requires_grad = emb_requires_grad

    return bilstm
Esempio n. 2
0
def initialize_model(gpu, vocab_size, v_vec, dropout_ratio, n_layers, model,
                     statistics_of_each_case_type):
    is_gpu = True
    if gpu == -1:
        is_gpu = False
    if model == 'Base' or model == 'FT':
        bilstm = BiLSTM(vocab_size, v_vec, dropout_ratio, n_layers, gpu=is_gpu)
    elif model == 'OneH':
        bilstm = OneHot(vocab_size, v_vec, dropout_ratio, n_layers, gpu=is_gpu)
    elif model == 'FA':
        bilstm = FeatureAugmentation(vocab_size,
                                     v_vec,
                                     dropout_ratio,
                                     n_layers,
                                     gpu=is_gpu)
    elif model == 'CPS':
        bilstm = ClassProbabilityShift(
            vocab_size,
            v_vec,
            dropout_ratio,
            n_layers,
            statistics_of_each_case_type=statistics_of_each_case_type,
            gpu=is_gpu)
    elif model == 'MIX':
        bilstm = Mixture(
            vocab_size,
            v_vec,
            dropout_ratio,
            n_layers,
            statistics_of_each_case_type=statistics_of_each_case_type,
            gpu=is_gpu)
    if is_gpu:
        bilstm = bilstm.cuda()

    for m in bilstm.modules():
        print(m.__class__.__name__)
        weights_init(m)

    return bilstm
Esempio n. 3
0
    embeddings=embeddings,
    hidden_size=hidden_size,
    num_labels=len(vocab),  #num_labels,
    bidirectional=bidirectional,
    num_layers=num_layers,
    color_representation_size=54)  #54)

model_id = str(int(time.time())) + "w_fourier"
save_path = os.path.join(output_path, model_id)
if not os.path.isdir(save_path):
    os.makedirs(save_path)

writer = SummaryWriter(save_path)

if cuda:
    model.cuda()

print(model)

print(str(datetime.now()), 'Generating batches')
train_batches = BatchIterator(train_colors, vocab, batch_size, cuda=cuda)
test_batches = BatchIterator(test_colors, vocab, batch_size, cuda=cuda)

#optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
optimizer = torch.optim.Adagrad(model.parameters(), lr=0.5)

pbar = tqdm.trange(epochs, desc='Training...')

delta = 0
delta_test = 0
Esempio n. 4
0
class Seq_MNIST_Trainer():

    def __init__(self, trainer_params, args):
        self.args = args
        self.trainer_params = trainer_params
        
        random.seed(trainer_params.random_seed)
        torch.manual_seed(trainer_params.random_seed)
        if args.cuda:
                torch.cuda.manual_seed_all(trainer_params.random_seed)

        kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}    
        self.train_data = seq_mnist_train(trainer_params)
        self.val_data = seq_mnist_val(trainer_params) 
        self.train_loader = DataLoader(self.train_data, batch_size=trainer_params.batch_size, shuffle=True, **kwargs)
        self.val_loader = DataLoader(self.val_data, batch_size=trainer_params.test_batch_size, shuffle=True, **kwargs)
        self.starting_epoch = 1
        self.prev_loss = 10000
    
        self.model = BiLSTM(trainer_params) 
        self.criterion = wp.CTCLoss(size_average=True)
        self.labels = [i for i in range(trainer_params.num_classes-1)]
        self.decoder = seq_mnist_decoder(labels=self.labels)

        if args.resume or args.eval or args.export:
            print("Loading model from {}".format(args.save_path))
            package = torch.load(args.save_path, map_location=lambda storage, loc: storage)
            self.model.load_state_dict(package['state_dict'])

        if args.cuda:
            torch.cuda.set_device(args.gpus)
            self.model = self.model.cuda()

        self.optimizer = optim.Adam(self.model.parameters(), lr=trainer_params.lr)

        if args.resume:
            self.optimizer.load_state_dict(package['optim_dict']) 
            self.starting_epoch = package['starting_epoch']
            self.prev_loss = package['prev_loss']
            if args.cuda:
                for state in self.optimizer.state.values():
                    for k, v in state.items():
                        if torch.is_tensor(v):
                            state[k] = v.cuda()

        if args.init_bn_fc_fusion:
            if not trainer_params.prefused_bn_fc:
                self.model.batch_norm_fc.init_fusion()
                self.trainer_params.prefused_bn_fc = True
            else:
                raise Exception("BN and FC are already fused.")

    def serialize(self, model, trainer_params, optimizer, starting_epoch, prev_loss):
        package = {'state_dict': model.state_dict(),
            'trainer_params': trainer_params,
            'optim_dict' : optimizer.state_dict(),
            'starting_epoch' : starting_epoch,
            'prev_loss': prev_loss
        }
        return package

    def save_model(self, epoch, loss_value):
        print("Model saved at: {}\n".format(self.args.save_path))
        self.prev_loss = loss_value
        torch.save(self.serialize(model=self.model, trainer_params=self.trainer_params, 
            optimizer=self.optimizer, starting_epoch=epoch + 1, prev_loss=self.prev_loss), self.args.save_path)


    def train(self, epoch):
        self.model.train()
        for i, (item) in enumerate(self.train_loader):
            data, labels, output_len, lab_len = item
            
            data = Variable(data.transpose(1,0), requires_grad=False)
            labels = Variable(labels.view(-1), requires_grad=False)
            output_len = Variable(output_len.view(-1), requires_grad=False)
            lab_len = Variable(lab_len.view(-1), requires_grad=False)
            
            if self.args.cuda:
                data = data.cuda()
 
            output = self.model(data)

            # print("Input = ", data.shape)
            # print("model output (x) = ", output)
            # print("GTs (y) = ", labels.type())
            # print("model output len (xs) = ", output_len.type())
            # print("GTs len (ys) = ", lab_len.type())
            # exit(0)

            loss = self.criterion(output, labels, output_len, lab_len)
            loss_value = loss.data[0]
            print("Loss value for epoch = {}/{} and batch {}/{} is = {:.4f}".format(epoch, 
                self.trainer_params.epochs, (i+1)*self.trainer_params.batch_size, len(self.train_data) , loss_value))
            
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            if self.args.cuda:
                torch.cuda.synchronize()                   

    def test(self, epoch=0, save_model_flag=False):
        self.model.eval()
        loss_value = 0
        for i, (item) in enumerate(self.val_loader):           
            data, labels, output_len, lab_len = item
            
            data = Variable(data.transpose(1,0), requires_grad=False)
            labels = Variable(labels.view(-1), requires_grad=False)
            output_len = Variable(output_len.view(-1), requires_grad=False)
            lab_len = Variable(lab_len.view(-1), requires_grad=False)
            
            if self.args.cuda:
                data = data.cuda()

            output = self.model(data)
            
            # print("Input = ", data)
            # print("model output (x) = ", output.shape)
            # print("model output (x) = ", output)        
            # print("Label = ", labels)
            # print("model output len (xs) = ", output_len)
            # print("GTs len (ys) = ", lab_len)
            
            index = random.randint(0,self.trainer_params.test_batch_size-1)      
            label = labels[index*self.trainer_params.word_size:(index+1)*self.trainer_params.word_size].data.numpy()
            label = label-1
            prediction = self.decoder.decode(output[:,index,:], output_len[index], lab_len[index])
            accuracy = self.decoder.hit(prediction, label)

            print("Sample Label      = {}".format(self.decoder.to_string(label))) 
            print("Sample Prediction = {}".format(self.decoder.to_string(prediction)))
            print("Accuracy on Sample = {:.2f}%\n\n".format(accuracy))

            loss = self.criterion(output, labels, output_len, lab_len)
            loss_value += loss.data.numpy()

        loss_value /= (len(self.val_data)//self.trainer_params.test_batch_size)
        print("Average Loss Value for Val Data is = {:.4f}\n".format(float(loss_value)))
        
        if loss_value < self.prev_loss and save_model_flag:
            self.save_model(epoch, loss_value)

    def eval_model(self):
        self.test()


    def train_model(self):
        for epoch in range(self.starting_epoch, self.trainer_params.epochs + 1):
            self.train(epoch)
            self.test(epoch=epoch, save_model_flag=True)
            if epoch%20==0:
                self.optimizer.param_groups[0]['lr'] = self.optimizer.param_groups[0]['lr']*0.98

    def export_model(self, simd_factor, pe):
        self.model.eval()
        self.model.export('r_model_fw_bw.hpp', simd_factor, pe)

    def export_image(self, idx=100):
        img, label = self.val_data.images[:,idx,:], self.val_data.labels[0][idx]
        img = img.transpose(1, 0)
        label -= 1
        label = self.decoder.to_string(label)
        
        from PIL import Image
        from matplotlib import cm

        im = Image.fromarray(np.uint8(cm.gist_earth(img)*255))
        im.save('test_image.png')
        img = img.transpose(1, 0)

        img = np.reshape(img, (-1, 1))
        np.savetxt("test_image.txt", img, fmt='%.10f')
        f = open('test_image_gt.txt','w')
        f.write(label)
        f.close()
        print("Exported image with label = {}".format(label))
Esempio n. 5
0
def main(options):

    use_cuda = (len(options.gpuid) >= 1)
    if options.gpuid:
        cuda.set_device(options.gpuid[0])

    train, dev, test, vocab = torch.load(open(options.data_file, 'rb'),
                                         pickle_module=dill)

    batched_train, batched_train_mask, _ = utils.tensor.advanced_batchize(
        train, options.batch_size, vocab.stoi["<pad>"])
    batched_dev, batched_dev_mask, _ = utils.tensor.advanced_batchize(
        dev, options.batch_size, vocab.stoi["<pad>"])

    vocab_size = len(vocab)

    if options.load_file:
        rnnlm = torch.load(options.load_file)
    else:
        rnnlm = BiLSTM(vocab_size)
    if use_cuda > 0:
        rnnlm.cuda()
    else:
        rnnlm.cpu()

    criterion = torch.nn.NLLLoss()
    optimizer = eval("torch.optim." + options.optimizer)(rnnlm.parameters(),
                                                         options.learning_rate)

    # main training loop
    last_dev_avg_loss = float("inf")
    rnnlm.train()
    for epoch_i in range(options.epochs):
        logging.info("At {0}-th epoch.".format(epoch_i))
        # srange generates a lazy sequence of shuffled range
        for i, batch_i in enumerate(utils.rand.srange(len(batched_train))):

            train_batch = Variable(
                batched_train[batch_i])  # of size (seq_len, batch_size)
            train_mask = Variable(batched_train_mask[batch_i])
            if use_cuda:
                train_batch = train_batch.cuda()
                train_mask = train_mask.cuda()

            sys_out_batch = rnnlm(
                train_batch
            )  # (seq_len, batch_size, vocab_size) # TODO: substitute this with your module
            train_in_mask = train_mask.view(-1)
            train_in_mask = train_in_mask.unsqueeze(1).expand(
                len(train_in_mask), vocab_size)
            train_out_mask = train_mask.view(-1)
            sys_out_batch = sys_out_batch.view(-1, vocab_size)
            train_out_batch = train_batch.view(-1)
            sys_out_batch = sys_out_batch.masked_select(train_in_mask).view(
                -1, vocab_size)
            train_out_batch = train_out_batch.masked_select(train_out_mask)
            loss = criterion(sys_out_batch, train_out_batch)
            logging.debug("loss at batch {0}: {1}".format(i, loss.data[0]))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # validation -- this is a crude esitmation because there might be some paddings at the end
        dev_loss = 0.0
        rnnlm.eval()
        for batch_i in range(len(batched_dev)):
            dev_batch = Variable(batched_dev[batch_i], volatile=True)
            dev_mask = Variable(batched_dev_mask[batch_i], volatile=True)
            if use_cuda:
                dev_batch = dev_batch.cuda()
                dev_mask = dev_mask.cuda()

            sys_out_batch = rnnlm(dev_batch)
            dev_in_mask = dev_mask.view(-1)
            dev_in_mask = dev_in_mask.unsqueeze(1).expand(
                len(dev_in_mask), vocab_size)
            dev_out_mask = dev_mask.view(-1)
            sys_out_batch = sys_out_batch.view(-1, vocab_size)
            dev_out_batch = dev_batch.view(-1)
            sys_out_batch = sys_out_batch.masked_select(dev_in_mask).view(
                -1, vocab_size)
            dev_out_batch = dev_out_batch.masked_select(dev_out_mask)
            loss = criterion(sys_out_batch, dev_out_batch)
            dev_loss += loss
        dev_avg_loss = dev_loss / len(batched_dev)
        logging.info(
            "Average loss value per instance is {0} at the end of epoch {1}".
            format(dev_avg_loss.data[0], epoch_i))

        #if (last_dev_avg_loss - dev_avg_loss).data[0] < options.estop:
        #  logging.info("Early stopping triggered with threshold {0} (previous dev loss: {1}, current: {2})".format(epoch_i, last_dev_avg_loss.data[0], dev_avg_loss.data[0]))
        #  break
        torch.save(
            rnnlm,
            open(
                options.model_file +
                ".nll_{0:.2f}.epoch_{1}".format(dev_avg_loss.data[0], epoch_i),
                'wb'),
            pickle_module=dill)
        last_dev_avg_loss = dev_avg_loss
Esempio n. 6
0
                                         "./data/wsj0_train_merged_labels.npy",
                                         batch_size=batch_size,
                                         shuffle=True)

    test_dataloader = create_dataloader("./data/wsj0_test",
                                        None,
                                        batch_size=batch_size,
                                        test=True,
                                        shuffle=False)
    model = BiLSTM(40, 256, 47, 5, use_gpu=True)
    # model = Model(40, 47, 256)

    if checkpoint:
        model.load_state_dict(torch.load(checkpoint))

    model = model.cuda()
    ctc_loss = nn.CTCLoss()

    def criterion(out, label, data_len, label_len):
        loss = ctc_loss(out, label, data_len, label_len)
        reg_loss = 0
        for param in model.parameters():
            reg_loss += (param**2).sum()

        factor = 0.00001
        loss += factor * reg_loss
        return loss

    optimizer = Adam(model.parameters(), lr=1e-4, weight_decay=5e-5)
    # scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20, 30, 40, 50, 60, 70, 80], gamma=0.5)