Beispiel #1
0
    def __init__(self, use_cuda, load_model, model_folder, train_directory, validation_directory, builder, args, multi_gpu=True):
        self.use_cuda = use_cuda
        self.load_model = load_model
        self.model_folder = model_folder
        self.validation_directory = validation_directory
        self.train_directory = train_directory
        self.args = args

        self.builder = builder
        self.logdir = join(model_folder, 'logs')
        self.writer = SummaryWriter(self.logdir)
        self.logger = Logger(self.args.log_file)
        self.itr = 0

        # Create Model
        self.model = self.create_model()
        if multi_gpu:
            self.model = torch.nn.DataParallel(self.model, device_ids=range(torch.cuda.device_count()))
        copy2(os.path.realpath(__file__).strip('.pyc') + '.py', self.logdir)
        copy2('/'.join(os.path.realpath(__file__).split('/')[:-1])+ '/models/pose_predictor_euler.py', self.logdir)
        copy2('/'.join(os.path.realpath(__file__).split('/')[:-1])+ '/utils/builder_utils.py', self.logdir)
        copy2('/'.join(os.path.realpath(__file__).split('/')[:-1])+ '/utils/builders.py', self.logdir)
        # Build validation set
        validation_builder = builder(self.args.n_views, validation_directory, IMAGE_SIZE, self.args, sample_size=SAMPLE_SIZE)
        validation_set = [validation_builder.build_set() for i in range(6)]
        validation_set = ConcatDataset(validation_set)
        self.len_validation_set = len(validation_set)
        del validation_builder
        self.validation_loader = DataLoader(
                        validation_set, 
                        batch_size=16, 
                        shuffle=False, 
                        pin_memory=self.use_cuda,
                        )
        self.validation_calls = 0
        # Build Training Set
        self.triplet_builder = builder(self.args.n_views, \
            train_directory, IMAGE_SIZE, self.args, sample_size=SAMPLE_SIZE)
        self.training_queue = multiprocessing.Queue(1)
        dataset_builder_process = multiprocessing.Process(target=self.build_set, args=(self.training_queue, self.triplet_builder, self.logger), daemon=True)
        dataset_builder_process.start()

        # Get Logger
   

        # Model specific setup
        self.optimizer = optim.SGD(self.model.parameters(), lr=self.args.lr_start, momentum=0.9)
        # This will diminish the learning rate at the milestones ///// 0.1, 0.01, 0.001 if not using automized scheduler
        self.learning_rate_scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min')
Beispiel #2
0
class Trainer(object):
    def __init__(self,
                 use_cuda,
                 load_model,
                 model_folder,
                 train_directory,
                 validation_directory,
                 builder,
                 loss_fn,
                 args,
                 multi_gpu=True):
        self.use_cuda = use_cuda
        self.load_model = load_model
        self.model_folder = model_folder
        self.validation_directory = validation_directory
        self.train_directory = train_directory
        self.args = args

        self.builder = builder
        self.loss_fn = loss_fn
        self.logdir = join(model_folder, 'logs')
        self.writer = SummaryWriter(self.logdir)
        self.logger = Logger(self.args.log_file)
        self.itr = 0

        # Create Model
        self.model = self.create_model()
        if multi_gpu:
            self.model = torch.nn.DataParallel(self.model,
                                               device_ids=range(
                                                   torch.cuda.device_count()))

        # Build validation set
        validation_builder = builder(self.args.n_views,
                                     validation_directory,
                                     IMAGE_SIZE,
                                     self.args,
                                     sample_size=SAMPLE_SIZE)
        validation_set = [
            validation_builder.build_set() for i in range(VAL_SEQS)
        ]
        validation_set = ConcatDataset(validation_set)
        self.len_validation_set = len(validation_set)
        del validation_builder
        self.validation_loader = DataLoader(
            validation_set,
            batch_size=8,
            shuffle=False,
            pin_memory=self.use_cuda,
        )
        self.validation_calls = 0
        # Build Training Set
        self.triplet_builder = builder(self.args.n_views, \
            train_directory, IMAGE_SIZE, self.args, sample_size=SAMPLE_SIZE)
        self.training_queue = multiprocessing.Queue(1)
        dataset_builder_process = multiprocessing.Process(
            target=self.build_set,
            args=(self.training_queue, self.triplet_builder, self.logger),
            daemon=True)
        dataset_builder_process.start()

        # Get Logger

        # Model specific setup
        self.optimizer = optim.SGD(self.model.parameters(),
                                   lr=self.args.lr_start,
                                   momentum=0.9)
        # This will diminish the learning rate at the milestones ///// 0.1, 0.01, 0.001 if not using automized scheduler
        self.learning_rate_scheduler = lr_scheduler.ReduceLROnPlateau(
            self.optimizer, 'min')
        # self.criterion = nn.CrossEntropyLoss()

    def train(self):

        trn_losses_ = []
        val_losses_ = []
        val_acc_ = []
        trn_acc_ = []

        for epoch in range(self.args.start_epoch,
                           self.args.start_epoch + self.args.epochs):
            print("=" * 20)
            self.logger.info("Starting epoch: {0} ".format(epoch))

            dataset = self.training_queue.get()
            data_loader = DataLoader(
                dataset=dataset,
                batch_size=self.args.
                minibatch_size,  # batch_size(epoch, self.args.max_minibatch_size),
                shuffle=True,
                pin_memory=self.use_cuda,
            )

            train_embedding_features_buffer = []
            train_images_buffer = []

            correct = 0

            for _ in range(0, 1):
                losses = []

                for minibatch in data_loader:
                    if self.use_cuda:
                        anchor_frames = minibatch[0].cuda()
                        #anchor_euler_reparam = minibatch[1].cuda() # load as 3x3 rotation matrix
                        anchor_rots = minibatch[1].cuda(
                        )  # load as 3x3 rotation matrix
                    # frames = Variable(minibatch)
                    loss, a_pred = self.loss_fn(self.model, anchor_frames,
                                                anchor_rots)
                    losses.append(loss.data.cpu().numpy())
                    anchor_euler = euler_XYZ_to_reparam(
                        apply(rotationMatrixToEulerAngles, anchor_rots))
                    correct += (torch.norm(
                        a_pred - anchor_euler, 2) < 0.1).data.cpu().numpy(
                        ).sum()  # print(gradcheck(loss_fn, (tcn, minibatch,)))
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()

                    # Add embeddings
                    train_embedding_features_buffer.append(
                        apply(rotationMatrixToEulerAngles, anchor_rots))
                    train_images_buffer.append(anchor_frames)
            print("logging to {}".format(self.logdir))

            self.writer.add_scalar('data/train_loss', np.mean(losses),
                                   self.itr)
            self.writer.add_scalar('data/train_correct',
                                   correct / len(data_loader), self.itr)
            self.itr += 1
            trn_losses_.append(np.mean(losses))
            self.logger.info('train loss: ', np.mean(losses))
            self.logger.info(
                "Training score correct  {correct}/{total}".format(
                    correct=correct, total=len(data_loader)))
            trn_acc_.append(correct)

            self.writer.add_image('frame_1', minibatch[0][0], self.itr)

            # Get embeddings
            features = torch.cat(
                train_embedding_features_buffer[:30]).squeeze_()
            # features = train_embedding_features_buffer.view(train_embedding_features_buffer.shape[0]*train_embedding_features_buffer.shape[1], -1)
            # label = torch.Tensor(np.asarray(label_buffer))
            images = torch.cat(
                train_images_buffer[:30]).squeeze_()  #/255.0, [0, 3, 1, 2]
            self.writer.add_embedding(features,
                                      label_img=images,
                                      global_step=epoch)

            if epoch % 1 == 0:
                loss, correct = self.validate()
                self.learning_rate_scheduler.step(loss)
                val_losses_.append(loss)
                val_acc_.append(correct)

            if epoch % self.args.save_every == 0 and epoch != 0:
                self.logger.info('Saving model.')
                self.save_model(
                    self.model, self.model_filename(self.args.model_name,
                                                    epoch),
                    join(self.model_folder, 'weight_files'))
                print("logging to {}".format(self.logdir))

            plot_mean(trn_losses_, self.model_folder, 'train_loss')
            plot_mean(val_losses_, self.model_folder, 'validation_loss')
            plot_mean(trn_acc_, self.model_folder, 'train_acc')
            plot_mean(val_acc_, self.model_folder, 'validation_accuracy')
            # plot_mean(val_acc_no_margin_, self.model_folder, 'validation_accuracy_no_margin')

    def validate(self):
        # Run model on validation data and log results
        correct = 0
        losses = []
        for minibatch in self.validation_loader:
            if self.use_cuda:
                anchor_frames = minibatch[0].cuda()
                #anchor_euler_reparam = minibatch[1].cuda() # load as 3x3 rotation matrix
                anchor_rots = minibatch[1].cuda(
                )  # load as 3x3 rotation matrix
            loss, a_pred = self.loss_fn(self.model, anchor_frames, anchor_rots)
            losses.append(loss.data.cpu().numpy())
            anchor_euler = euler_XYZ_to_reparam(
                apply(rotationMatrixToEulerAngles, anchor_rots))
            correct += (torch.norm(a_pred - anchor_euler, 2) <
                        0.1).data.cpu().numpy().sum()

        self.writer.add_scalar('data/valid_loss', np.mean(losses),
                               self.validation_calls)
        self.writer.add_scalar('data/validation_correct',
                               correct / self.len_validation_set,
                               self.validation_calls)

        self.validation_calls += 1
        loss = np.mean(losses)
        self.logger.info("Validation score correct  {correct}/{total}".format(
            correct=correct, total=self.len_validation_set))
        self.logger.info('val loss: ', loss)
        return loss, correct

    def model_filename(self, model_name, epoch):
        return "{model_name}-epoch-{epoch}.pk".format(model_name=model_name,
                                                      epoch=epoch)

    def save_model(self, model, filename, model_folder):
        ensure_folder(model_folder)
        model_path = os.path.join(model_folder, filename)
        torch.save(model.state_dict(), model_path)

    def build_set(self, queue, triplet_builder, log):
        while 1:
            datasets = []
            for i in range(TRAIN_SEQS_PER_EPOCH):
                dataset = triplet_builder.build_set()
                datasets.append(dataset)
            dataset = ConcatDataset(datasets)
            # log.info('Created {0} triplets'.format(len(dataset)))
            queue.put(dataset)

    def create_model(self):
        model = define_model(pretrained=True)
        # model = PosNet()
        if self.load_model:
            model_path = os.path.join(self.model_folder, self.load_model)
            # map_location allows us to load models trained on cuda to cpu.
            model.load_state_dict(
                torch.load(model_path,
                           map_location=lambda storage, loc: storage))

        if self.use_cuda:
            model = model.cuda()
        return model

    def batch_size(self, epoch, max_size):
        exponent = epoch // 100
        return min(max(2**(exponent), 2), max_size)
Beispiel #3
0
    parser.add_argument('--num_layers',
                        type=int,
                        default=1,
                        help='number of layers in lstm')

    # parser.add_argument('--num_epochs', type=int, default=5)
    # parser.add_argument('--batch_size', type=int, default=128)
    # parser.add_argument('--num_workers', type=int, default=2)
    # parser.add_argument('--learning_rate', type=float, default=0.001)
    return parser.parse_args()


args = get_args()
print(args)

logger = Logger(args.log_file)


def batch_size(epoch, max_size):
    exponent = epoch // 100
    return min(max(2**(exponent), 2), max_size)


validation_builder = builder(args.n_views,
                             args.validation_directory,
                             IMAGE_SIZE,
                             args,
                             sample_size=int(SAMPLE_SIZE / 2.0))
validation_set = [validation_builder.build_set() for i in range(5)]
validation_set = ConcatDataset(validation_set)
del validation_builder
Beispiel #4
0
class Trainer(object):
    def __init__(self, use_cuda, load_model, model_folder, train_directory, validation_directory, builder, args, multi_gpu=True):
        self.use_cuda = use_cuda
        self.load_model = load_model
        self.model_folder = model_folder
        self.validation_directory = validation_directory
        self.train_directory = train_directory
        self.args = args

        self.builder = builder
        self.logdir = join(model_folder, 'logs')
        self.writer = SummaryWriter(self.logdir)
        self.logger = Logger(self.args.log_file)
        self.itr = 0

        # Create Model
        self.model = self.create_model()
        if multi_gpu:
            self.model = torch.nn.DataParallel(self.model, device_ids=range(torch.cuda.device_count()))
        copy2(os.path.realpath(__file__).strip('.pyc') + '.py', self.logdir)
        copy2('/'.join(os.path.realpath(__file__).split('/')[:-1])+ '/models/pose_predictor_euler.py', self.logdir)
        copy2('/'.join(os.path.realpath(__file__).split('/')[:-1])+ '/utils/builder_utils.py', self.logdir)
        copy2('/'.join(os.path.realpath(__file__).split('/')[:-1])+ '/utils/builders.py', self.logdir)
        # Build validation set
        validation_builder = builder(self.args.n_views, validation_directory, IMAGE_SIZE, self.args, sample_size=SAMPLE_SIZE)
        validation_set = [validation_builder.build_set() for i in range(6)]
        validation_set = ConcatDataset(validation_set)
        self.len_validation_set = len(validation_set)
        del validation_builder
        self.validation_loader = DataLoader(
                        validation_set, 
                        batch_size=16, 
                        shuffle=False, 
                        pin_memory=self.use_cuda,
                        )
        self.validation_calls = 0
        # Build Training Set
        self.triplet_builder = builder(self.args.n_views, \
            train_directory, IMAGE_SIZE, self.args, sample_size=SAMPLE_SIZE)
        self.training_queue = multiprocessing.Queue(1)
        dataset_builder_process = multiprocessing.Process(target=self.build_set, args=(self.training_queue, self.triplet_builder, self.logger), daemon=True)
        dataset_builder_process.start()

        # Get Logger
   

        # Model specific setup
        self.optimizer = optim.SGD(self.model.parameters(), lr=self.args.lr_start, momentum=0.9)
        # This will diminish the learning rate at the milestones ///// 0.1, 0.01, 0.001 if not using automized scheduler
        self.learning_rate_scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min')
        # self.criterion = nn.CrossEntropyLoss()

    def train(self):

        trn_losses_ = []
        val_losses_= []
        val_acc_margin_ = []
        val_acc_no_margin_ = []

        for epoch in range(self.args.start_epoch, self.args.start_epoch + self.args.epochs):
            print("=" * 20)
            self.logger.info("Starting epoch: {0} ".format(epoch))

            dataset = self.training_queue.get()
            data_loader = DataLoader(
                dataset=dataset,
                batch_size=self.args.minibatch_size, # batch_size(epoch, self.args.max_minibatch_size),
                shuffle=True,
                pin_memory=self.use_cuda,
            )
            
            train_embedding_features_buffer = []
            train_images_buffer = []
            
            for _ in range(0, ITERATE_OVER_TRIPLETS):
                losses = []

                for minibatch, _ in data_loader:
                    # frames = Variable(minibatch)
                    if self.use_cuda:
                        frames = minibatch.cuda()
                    anchor_frames = frames[:, 0, :, :, :]
                    positive_frames = frames[:, 1, :, :, :]
                    negative_frames = frames[:, 2, :, :, :]
            
                    anchor_output, unnormalized, _ = self.model(anchor_frames)
                    positive_output, _, _ = self.model(positive_frames)
                    negative_output, _, _ = self.model(negative_frames)

                    d_positive = distance(anchor_output, positive_output)
                    d_negative = distance(anchor_output, negative_output)

                    loss_triplet = torch.clamp(self.args.margin + d_positive - d_negative, min=0.0).mean()
                    loss = loss_triplet
                    losses.append(loss.data.cpu().numpy())

                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()

                    # Add embeddings
                    train_embedding_features_buffer.append(anchor_output)
                    train_images_buffer.append(anchor_frames)
            print("logging to {}".format(self.logdir))

            self.writer.add_scalar('data/train_triplet_loss', np.mean(losses), self.itr)
            self.itr += 1  
            trn_losses_.append(np.mean(losses))
            self.logger.info('train loss: ', np.mean(losses))
            self.writer.add_image('frame_anchor', minibatch[0][0], 0) 
            self.writer.add_image('frame_positive', minibatch[0][1], 1) 
            self.writer.add_image('frame_negative', minibatch[0][2], 2) 

            # Get embeddings
            features = torch.cat(train_embedding_features_buffer).squeeze_()
            # features = train_embedding_features_buffer.view(train_embedding_features_buffer.shape[0]*train_embedding_features_buffer.shape[1], -1)
            # label = torch.Tensor(np.asarray(label_buffer))
            images = torch.cat(train_images_buffer).squeeze_()#/255.0, [0, 3, 1, 2]
            self.writer.add_embedding(features, label_img=images, global_step=epoch)
            
            if epoch % 1 == 0:
                acc_margin, acc_no_margin, loss  = self.validate()
                self.learning_rate_scheduler.step(loss)
                val_losses_.append(loss)
                val_acc_margin_.append(acc_margin)
                val_acc_no_margin_.append(acc_no_margin)

            if epoch % self.args.save_every == 0 and epoch != 0:
                self.logger.info('Saving model.')
                self.save_model(self.model, self.model_filename(self.args.model_name, epoch), join(self.model_folder, 'weight_files'))
                print("logging to {}".format(self.logdir))

            plot_mean(trn_losses_, self.model_folder, 'train_loss')
            plot_mean(val_losses_, self.model_folder, 'validation_loss')
            # plot_mean(train_acc_, self.args.model_folder, 'train_acc')
            plot_mean(val_acc_margin_, self.model_folder, 'validation_accuracy_margin')
            plot_mean(val_acc_no_margin_, self.model_folder, 'validation_accuracy_no_margin')

    def validate(self):
        # Run model on validation data and log results
        correct_with_margin = 0
        correct_without_margin = 0
        losses = []
        for minibatch, _ in self.validation_loader:
            # frames = Variable(minibatch, require_grad=False)

            if self.use_cuda:
                frames = minibatch.cuda()

            anchor_frames = frames[:, 0, :, :, :]
            positive_frames = frames[:, 1, :, :, :]
            negative_frames = frames[:, 2, :, :, :]

            anchor_output, unnormalized, _ = self.model(anchor_frames)
            positive_output, _, _ = self.model(positive_frames)
            negative_output, _, _ = self.model(negative_frames)
            
            d_positive = distance(anchor_output, positive_output)
            d_negative = distance(anchor_output, negative_output)

            assert(d_positive.size()[0] == minibatch.size()[0])

            correct_with_margin += ((d_positive + self.args.margin) < d_negative).data.cpu().numpy().sum()
            correct_without_margin += (d_positive < d_negative).data.cpu().numpy().sum()

            loss_triplet = torch.clamp(self.args.margin + d_positive - d_negative, min=0.0).mean()
            loss = loss_triplet
            losses.append(loss.data.cpu().numpy())
        self.writer.add_scalar('data/validation_loss', np.mean(losses), self.validation_calls) 
        self.writer.add_scalar('data/validation_correct_with_margin', correct_with_margin / self.len_validation_set, self.validation_calls)
        self.writer.add_scalar('data/validation_correct_without_margin', correct_without_margin / self.len_validation_set, self.validation_calls)
        self.validation_calls += 1
        loss = np.mean(losses)
        self.logger.info('val loss: ',loss)

        message = "Validation score correct with margin {with_margin}/{total} and without margin {without_margin}/{total}".format(
            with_margin=correct_with_margin,
            without_margin=correct_without_margin,
            total=self.len_validation_set
        )
        self.logger.info(message)
        return correct_with_margin, correct_without_margin, loss

    def model_filename(self, model_name, epoch):
        return "{model_name}-epoch-{epoch}.pk".format(model_name=model_name, epoch=epoch)

    def save_model(self, model, filename, model_folder):
        ensure_folder(model_folder)
        model_path = os.path.join(model_folder, filename)
        torch.save(model.state_dict(), model_path)


    def build_set(self, queue, triplet_builder, log):
        while 1:
            datasets = []
            for i in range(3):
                dataset = triplet_builder.build_set()
                datasets.append(dataset)
            dataset = ConcatDataset(datasets)
            # log.info('Created {0} triplets'.format(len(dataset)))
            queue.put(dataset)

    def create_model(self):
        model = define_model(pretrained=True)
        # model = PosNet()
        if self.load_model:
            model_path = os.path.join(
                self.model_folder,
                self.load_model
            )
            # map_location allows us to load models trained on cuda to cpu.
            model.load_state_dict(torch.load(model_path, map_location=lambda storage, loc: storage))

        if self.use_cuda:
            model = model.cuda()
        return model

    def batch_size(self, epoch, max_size):
        exponent = epoch // 100
        return min(max(2 ** (exponent), 2), max_size)
Beispiel #5
0
    def __init__(self,
                 use_cuda,
                 load_model,
                 model_folder,
                 train_directory,
                 validation_directory,
                 builder,
                 loss_fn,
                 args,
                 multi_gpu=True):
        self.use_cuda = use_cuda
        self.load_model = load_model
        self.model_folder = model_folder
        self.validation_directory = validation_directory
        self.train_directory = train_directory
        self.args = args

        self.builder = builder
        self.loss_fn = loss_fn
        self.logdir = join(model_folder, 'logs')
        self.writer = SummaryWriter(self.logdir)
        self.logger = Logger(self.args.log_file)
        self.itr = 0

        # Create Model
        self.model = self.create_model()
        if multi_gpu:
            self.model = torch.nn.DataParallel(self.model,
                                               device_ids=range(
                                                   torch.cuda.device_count()))

        # Build validation set
        validation_builder = builder(self.args.n_views,
                                     validation_directory,
                                     IMAGE_SIZE,
                                     self.args,
                                     toRot=True,
                                     sample_size=SAMPLE_SIZE)
        validation_set = [
            validation_builder.build_set() for i in range(VAL_SEQS)
        ]
        validation_set = ConcatDataset(validation_set)
        self.len_validation_set = len(validation_set)
        del validation_builder
        self.validation_loader = DataLoader(
            validation_set,
            batch_size=8,
            shuffle=False,
            pin_memory=self.use_cuda,
        )
        self.validation_calls = 0
        # Build Training Set
        self.triplet_builder = builder(self.args.n_views, \
            train_directory, IMAGE_SIZE, self.args, toRot=True, sample_size=SAMPLE_SIZE)
        self.training_queue = multiprocessing.Queue(1)
        dataset_builder_process = multiprocessing.Process(
            target=self.build_set,
            args=(self.training_queue, self.triplet_builder, self.logger),
            daemon=True)
        dataset_builder_process.start()

        # Get Logger

        # Model specific setup
        # self.optimizer = optim.SGD(self.model.parameters(), lr=self.args.lr_start, momentum=0.9)
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=0.001,
                                    betas=(0.9, 0.999),
                                    eps=1e-08)
        # This will diminish the learning rate at the milestones ///// 0.1, 0.01, 0.001 if not using automized scheduler
        self.learning_rate_scheduler = lr_scheduler.ReduceLROnPlateau(
            self.optimizer, 'min')