class Demo(BaseDemo): def __init__(self, args): super(Demo, self).__init__(args) self.model, self.model_gt = self.init_model(self.data.m_kernel) self.visualizer = Visualizer(args, self.data.reverse_m_dict) def init_model(self, m_kernel): self.model = Net(self.im_size, self.im_size, self.im_channel, self.num_frame, m_kernel.shape[1], self.m_range, self.net_depth) self.model_gt = GtNet(self.im_size, self.im_size, self.im_channel, self.num_frame, m_kernel.shape[1], self.m_range) if torch.cuda.is_available(): # model = torch.nn.DataParallel(model).cuda() self.model = self.model.cuda() self.model_gt = self.model_gt.cuda() if self.init_model_path is not '': self.model.load_state_dict(torch.load(self.init_model_path)) return self.model, self.model_gt def train(self): optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) base_loss, train_loss = [], [] for epoch in range(self.train_epoch): optimizer.zero_grad() if self.data.name in ['box', 'mnist', 'box_complex']: im, _, _, _ = self.data.get_next_batch(self.data.train_images) elif self.data.name in ['box2', 'mnist2']: im, _, _ = self.data.get_next_batch(self.data.train_images) elif self.data.name in [ 'robot64', 'mpii64', 'mpi128', 'nyuv2', 'robot128', 'viper64', 'viper128', 'robot128c' ]: im = self.data.get_next_batch(self.data.train_images) else: logging.error('%s data not supported' % self.data.name) sys.exit() im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, -1, :, :, :] im_input = Variable(torch.from_numpy(im_input).float()) im_output = Variable(torch.from_numpy(im_output).float()) if torch.cuda.is_available(): im_input, im_output = im_input.cuda(), im_output.cuda() im_pred, flow = self.model(im_input, im_output) flow = flow * self.im_size / 2 # resize flow from [-1, 1] back to image scale im_diff = im_pred - im_input[:, -self. im_channel:, :, :] # inverse warping loss loss = torch.abs(im_diff).sum() # loss = (im_diff * im_diff).sum() flow_diff1 = flow[:, :, 1:, :] - flow[:, :, :-1, :] flow_diff2 = flow[:, :, :, 1:] - flow[:, :, :, :-1] loss = loss + 0.1 * (torch.abs(flow_diff1).sum() + torch.abs(flow_diff2).sum()) # loss = loss + 0.1 * ((flow_diff1 * flow_diff1).sum() + (flow_diff2 * flow_diff2).sum()) loss.backward() optimizer.step() train_loss.append(loss.data[0]) if len(train_loss) > 100: train_loss.pop(0) ave_train_loss = sum(train_loss) / float(len(train_loss)) base_loss.append( torch.abs(im_input[:, -self.im_channel:, :, :] - im_output).sum().data[0]) if len(base_loss) > 100: base_loss.pop(0) ave_base_loss = sum(base_loss) / float(len(base_loss)) logging.info( 'epoch %d, train loss: %.2f, average train loss: %.2f, base loss: %.2f', epoch, loss.data[0], ave_train_loss, ave_base_loss) if (epoch + 1) % self.test_interval == 0: logging.info('epoch %d, testing', epoch) self.validate() def test(self): base_loss, test_loss = [], [] test_epe = [] motion = None for epoch in range(self.test_epoch): if self.data.name in ['box', 'mnist', 'box_complex']: im, motion, _, _ = self.data.get_next_batch( self.data.test_images) elif self.data.name in ['box2', 'mnist2']: im, motion, _ = self.data.get_next_batch(self.data.test_images) elif self.data.name in [ 'robot64', 'mpii64', 'mpi128', 'nyuv2', 'robot128', 'viper64', 'viper128', 'robot128c' ]: im, motion = self.data.get_next_batch( self.data.test_images), None elif self.data.name in ['mpii64_sample']: im, motion = self.data.get_next_batch( self.data.test_images), None im = im[:, -self.num_frame:, :, :, :] else: logging.error('%s data not supported' % self.data.name) sys.exit() im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, -1, :, :, :] im_input = Variable(torch.from_numpy(im_input).float(), volatile=True) im_output = Variable(torch.from_numpy(im_output).float(), volatile=True) if torch.cuda.is_available(): im_input, im_output = im_input.cuda(), im_output.cuda() im_pred, flow = self.model(im_input, im_output) flow = flow * self.im_size / 2 # resize flow from [-1, 1] back to image scale im_diff = im_pred - im_input[:, -self. im_channel:, :, :] # inverse warping loss loss = torch.abs(im_diff).sum() # loss = (im_diff * im_diff).sum() flow_diff1 = flow[:, :, 1:, :] - flow[:, :, :-1, :] flow_diff2 = flow[:, :, :, 1:] - flow[:, :, :, :-1] loss = loss + 0.1 * (torch.abs(flow_diff1).sum() + torch.abs(flow_diff2).sum()) # loss = loss + 0.01 * ((flow_diff1 * flow_diff1).sum() + (flow_diff2 * flow_diff2).sum()) test_loss.append(loss.data[0]) base_loss.append( torch.abs(im_input[:, -self.im_channel:, :, :] - im_output).sum().data[0]) if motion is None: gt_motion = None else: gt_motion = motion[:, -2, :, :, :] gt_motion = Variable(torch.from_numpy(gt_motion).float()) if torch.cuda.is_available(): gt_motion = gt_motion.cuda() epe = (flow - gt_motion) * (flow - gt_motion) epe = torch.sqrt(epe.sum(1)) epe = epe.sum() / epe.numel() test_epe.append(epe.cpu().data[0]) if self.display: self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion, 'test_%d.png' % epoch) if self.display_all: for i in range(self.batch_size): self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion, 'test_%d.png' % i, i) test_loss = numpy.mean(numpy.asarray(test_loss)) base_loss = numpy.mean(numpy.asarray(base_loss)) improve_loss = base_loss - test_loss improve_percent = improve_loss / (base_loss + 1e-5) logging.info('average test loss: %.2f, base loss: %.2f', test_loss, base_loss) logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent) if motion is not None: test_epe = numpy.mean(numpy.asarray(test_epe)) logging.info('average test endpoint error: %.2f', test_epe) return improve_percent def test_gt(self): base_loss, test_loss = [], [] test_epe = [] for epoch in range(self.test_epoch): if self.data.name in ['box', 'mnist', 'box_complex']: im, motion, _, _ = self.data.get_next_batch( self.data.test_images) elif self.data.name in ['box2', 'mnist2']: im, motion, _ = self.data.get_next_batch(self.data.test_images) else: logging.error('%s data not supported in test_gt' % self.data.name) sys.exit() im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, -1, :, :, :] gt_motion = motion[:, -2, :, :, :] im_input = Variable(torch.from_numpy(im_input).float(), volatile=True) im_output = Variable(torch.from_numpy(im_output).float(), volatile=True) gt_motion = Variable(torch.from_numpy(gt_motion).float()) if torch.cuda.is_available(): im_input, im_output = im_input.cuda(), im_output.cuda() gt_motion = gt_motion.cuda() if self.data.name in ['box', 'mnist', 'box_complex']: im_pred, flow = self.model_gt(im_input, im_output, gt_motion) elif self.data.name in ['box2', 'mnist2']: im_pred, flow = self.model_gt(im_input, im_output, gt_motion) flow = flow * self.im_size / 2 # resize flow from [-1, 1] back to image scale im_diff = im_pred - im_input[:, -self. im_channel:, :, :] # inverse warping loss loss = torch.abs(im_diff).sum() flow_diff1 = flow[:, :, 1:, :] - flow[:, :, :-1, :] flow_diff2 = flow[:, :, :, 1:] - flow[:, :, :, :-1] loss = loss + 0.1 * (torch.abs(flow_diff1).sum() + torch.abs(flow_diff2).sum()) test_loss.append(loss.data[0]) base_loss.append( torch.abs(im_input[:, -self.im_channel:, :, :] - im_output).sum().data[0]) epe = (flow - gt_motion) * (flow - gt_motion) epe = torch.sqrt(epe.sum(1)) epe = epe.sum() / epe.numel() test_epe.append(epe.cpu().data[0]) if self.display: self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion, 'test_gt.png') if self.display_all: for i in range(self.batch_size): self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion, 'test_gt_%d.png' % i, i) test_loss = numpy.mean(numpy.asarray(test_loss)) base_loss = numpy.mean(numpy.asarray(base_loss)) improve_loss = base_loss - test_loss improve_percent = improve_loss / (base_loss + 1e-5) logging.info('average ground truth test loss: %.2f, base loss: %.2f', test_loss, base_loss) logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent) test_epe = numpy.mean(numpy.asarray(test_epe)) logging.info('average ground truth test endpoint error: %.2f', test_epe) return improve_percent
class Demo(BaseDemo): def __init__(self, args): super(Demo, self).__init__(args) self.model, self.model_gt = self.init_model(self.data.m_kernel) self.visualizer = Visualizer(args, self.data.reverse_m_dict) def init_model(self, m_kernel): self.model = Net(self.im_size, self.im_size, self.im_channel, self.num_frame - 1, m_kernel.shape[1], self.m_range, m_kernel) self.model_gt = GtNet(self.im_size, self.im_size, self.im_channel, self.num_frame - 1, m_kernel.shape[1], self.m_range, m_kernel) if torch.cuda.is_available(): # model = torch.nn.DataParallel(model).cuda() self.model = self.model.cuda() self.model_gt = self.model_gt.cuda() if self.init_model_path is not '': self.model.load_state_dict(torch.load(self.init_model_path)) return self.model, self.model_gt def train(self): optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) base_loss, train_loss = [], [] for epoch in range(self.train_epoch): optimizer.zero_grad() if self.data.name in ['box', 'mnist', 'box_complex']: im, _, _, _ = self.data.get_next_batch(self.data.train_images) elif self.data.name in ['box2', 'mnist2']: im, _, _ = self.data.get_next_batch(self.data.train_images) elif self.data.name in ['robot64', 'mpii64', 'nyuv2']: im = self.data.get_next_batch(self.data.train_meta) else: logging.error('%s data not supported' % self.data.name) sys.exit() im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, -1, :, :, :] im_input = Variable(torch.from_numpy(im_input).float()) im_output = Variable(torch.from_numpy(im_output).float()) if torch.cuda.is_available(): im_input, im_output = im_input.cuda(), im_output.cuda() im_pred, m_mask, d_mask = self.model(im_input) im_diff = im_pred - im_output loss = torch.abs(im_diff).sum() loss.backward() optimizer.step() train_loss.append(loss.data[0]) if len(train_loss) > 100: train_loss.pop(0) ave_train_loss = sum(train_loss) / float(len(train_loss)) base_loss.append( torch.abs(im_input[:, -self.im_channel:, :, :] - im_output).sum().data[0]) if len(base_loss) > 100: base_loss.pop(0) ave_base_loss = sum(base_loss) / float(len(base_loss)) logging.info( 'epoch %d, train loss: %.2f, average train loss: %.2f, base loss: %.2f', epoch, loss.data[0], ave_train_loss, ave_base_loss) if (epoch + 1) % self.test_interval == 0: logging.info('epoch %d, testing', epoch) self.validate() def test(self): base_loss, test_loss = [], [] test_epe = [] for epoch in range(self.test_epoch): if self.data.name in ['box', 'mnist', 'box_complex']: im, motion, _, _ = self.data.get_next_batch( self.data.test_images) elif self.data.name in ['box2', 'mnist2']: im, motion, _ = self.data.get_next_batch(self.data.test_images) elif self.data.name in ['robot64', 'mpii64', 'nyuv2']: im, motion = self.data.get_next_batch( self.data.test_meta), None elif self.data.name in ['mpii64_sample']: im, motion = self.data.get_next_batch( self.data.test_meta), None im = im[:, -self.num_frame:, :, :, :] else: logging.error('%s data not supported' % self.data.name) sys.exit() im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, -1, :, :, :] im_input = Variable(torch.from_numpy(im_input).float()) im_output = Variable(torch.from_numpy(im_output).float()) if torch.cuda.is_available(): im_input, im_output = im_input.cuda(), im_output.cuda() im_pred, m_mask, d_mask = self.model(im_input) im_diff = im_pred - im_output loss = torch.abs(im_diff).sum() test_loss.append(loss.data[0]) base_loss.append( torch.abs(im_input[:, -self.im_channel:, :, :] - im_output).sum().data[0]) flow = self.motion2flow(m_mask) depth = self.mask2depth(d_mask) if motion is None: gt_motion = None else: gt_motion = motion[:, -2, :, :, :] gt_motion = Variable(torch.from_numpy(gt_motion).float()) if torch.cuda.is_available(): gt_motion = gt_motion.cuda() epe = (flow - gt_motion) * (flow - gt_motion) epe = torch.sqrt(epe.sum(1)) epe = epe.sum() / epe.numel() test_epe.append(epe.cpu().data[0]) if self.display: self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion, depth, 'test_%d.png' % epoch) if self.display_all: for i in range(self.batch_size): self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion, depth, 'test_%d.png' % i, i) test_loss = numpy.mean(numpy.asarray(test_loss)) base_loss = numpy.mean(numpy.asarray(base_loss)) improve_loss = base_loss - test_loss improve_percent = improve_loss / (base_loss + 1e-5) logging.info('average test loss: %.2f, base loss: %.2f', test_loss, base_loss) logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent) if gt_motion is not None: test_epe = numpy.mean(numpy.asarray(test_epe)) logging.info('average test endpoint error: %.2f', test_epe) return improve_percent def test_gt(self): base_loss, test_loss = [], [] test_epe = [] for epoch in range(self.test_epoch): if self.data.name in ['box', 'mnist', 'box_complex']: im, motion, motion_label, depth = self.data.get_next_batch( self.data.test_images) gt_motion_label = motion_label[:, -2, :, :, :] gt_motion_label = Variable(torch.from_numpy(gt_motion_label)) if torch.cuda.is_available(): gt_motion_label = gt_motion_label.cuda() elif self.data.name in ['box2', 'mnist2']: im, motion, depth = self.data.get_next_batch( self.data.test_images) else: logging.error('%s data not supported in test_gt' % self.data.name) sys.exit() im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, -1, :, :, :] gt_motion = motion[:, -2, :, :, :] gt_depth = depth[:, -2, :, :, :] im_input = Variable(torch.from_numpy(im_input).float()) im_output = Variable(torch.from_numpy(im_output).float()) gt_motion = Variable(torch.from_numpy(gt_motion).float()) gt_depth = Variable(torch.from_numpy(gt_depth).float()) if torch.cuda.is_available(): im_input, im_output = im_input.cuda(), im_output.cuda() gt_motion = gt_motion.cuda() gt_depth = gt_depth.cuda() if self.data.name in ['box', 'mnist', 'box_complex']: im_pred, m_mask, d_mask = self.model_gt( im_input, gt_motion_label, gt_depth, 'label') elif self.data.name in ['box2', 'mnist2']: im_pred, m_mask, d_mask = self.model_gt( im_input, gt_motion, gt_depth) im_diff = im_pred - im_output loss = torch.abs(im_diff).sum() test_loss.append(loss.data[0]) base_loss.append( torch.abs(im_input[:, -self.im_channel:, :, :] - im_output).sum().data[0]) flow = self.motion2flow(m_mask) depth = self.mask2depth(d_mask) epe = (flow - gt_motion) * (flow - gt_motion) epe = torch.sqrt(epe.sum(1)) epe = epe.sum() / epe.numel() test_epe.append(epe.cpu().data[0]) if self.display: self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion, depth, 'test_gt.png') if self.display_all: for i in range(self.batch_size): self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion, depth, 'test_gt_%d.png' % i, i) test_loss = numpy.mean(numpy.asarray(test_loss)) base_loss = numpy.mean(numpy.asarray(base_loss)) improve_loss = base_loss - test_loss improve_percent = improve_loss / (base_loss + 1e-5) logging.info('average ground truth test loss: %.2f, base loss: %.2f', test_loss, base_loss) logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent) test_epe = numpy.mean(numpy.asarray(test_epe)) logging.info('average ground truth test endpoint error: %.2f', test_epe) return improve_percent def mask2depth(self, d_mask): [batch_size, num_depth, height, width] = d_mask.size() depth_number = Variable( torch.zeros(batch_size, num_depth, height, width)) if torch.cuda.is_available(): depth_number = depth_number.cuda() for i in range(num_depth): depth_number[:, i, :, :] = i depth = Variable(torch.zeros(batch_size, 1, height, width)) if torch.cuda.is_available(): depth = depth.cuda() depth[:, 0, :, :] = (d_mask * depth_number).sum(1) return depth
class Demo(BaseDemo): def __init__(self, args): super(Demo, self).__init__(args) self.model, self.model_gt = self.init_model(self.data.m_kernel) self.visualizer = Visualizer(args, self.data.reverse_m_dict) def init_model(self, m_kernel): self.model = Net(self.im_size, self.im_size, 3, self.num_frame - 1, m_kernel.shape[1], self.m_range, m_kernel) self.model_gt = GtNet(self.im_size, self.im_size, 3, self.num_frame - 1, m_kernel.shape[1], self.m_range, m_kernel) if torch.cuda.is_available(): # model = torch.nn.DataParallel(model).cuda() self.model = self.model.cuda() self.model_gt = self.model_gt.cuda() if self.init_model_path is not '': self.model.load_state_dict(torch.load(self.init_model_path)) return self.model, self.model_gt def train_unsupervised(self): optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) base_loss, train_loss = [], [] for epoch in range(self.train_epoch): optimizer.zero_grad() im, _, _, _ = self.data.get_next_batch(self.data.train_images) im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, -1, :, :, :] im_input = Variable(torch.from_numpy(im_input).float()) im_output = Variable(torch.from_numpy(im_output).float()) if torch.cuda.is_available(): im_input, im_output = im_input.cuda(), im_output.cuda() im_pred, m_mask, occlude, unocclude = self.model(im_input) im_diff = unocclude.expand_as(im_output) * (im_pred - im_output) im_diff = im_diff / unocclude.sum(3).sum(2).expand_as(im_diff) loss = torch.abs(im_diff).sum() * im_diff.size(2) * im_diff.size(3) loss.backward() optimizer.step() train_loss.append(loss.data[0]) if len(train_loss) > 100: train_loss.pop(0) ave_train_loss = sum(train_loss) / float(len(train_loss)) base_loss.append(torch.abs(im_input[:, -3:, :, :] - im_output).sum().data[0]) if len(base_loss) > 100: base_loss.pop(0) ave_base_loss = sum(base_loss) / float(len(base_loss)) logging.info('epoch %d, train loss: %.2f, average train loss: %.2f, base loss: %.2f', epoch, loss.data[0], ave_train_loss, ave_base_loss) if (epoch+1) % self.test_interval == 0: logging.info('epoch %d, testing', epoch) self.validate() def test_unsupervised(self): base_loss, test_loss = [], [] test_epe = [] for epoch in range(self.test_epoch): im, motion, _, _ = self.data.get_next_batch(self.data.test_images) im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, -1, :, :, :] gt_motion = motion[:, -2, :, :, :] im_input = Variable(torch.from_numpy(im_input).float()) im_output = Variable(torch.from_numpy(im_output).float()) gt_motion = Variable(torch.from_numpy(gt_motion).float()) if torch.cuda.is_available(): im_input, im_output = im_input.cuda(), im_output.cuda() gt_motion = gt_motion.cuda() im_pred, m_mask, occlude, unocclude = self.model(im_input) im_diff = unocclude.expand_as(im_output) * (im_pred - im_output) im_diff = im_diff / unocclude.sum(3).sum(2).expand_as(im_diff) loss = torch.abs(im_diff).sum() * im_diff.size(2) * im_diff.size(3) test_loss.append(loss.data[0]) base_loss.append(torch.abs(im_input[:, -3:, :, :] - im_output).sum().data[0]) flow = self.motion2flow(m_mask) epe = (flow - gt_motion) * (flow - gt_motion) epe = torch.sqrt(epe.sum(1)) epe = epe.sum() / epe.numel() test_epe.append(epe.cpu().data[0]) if self.display: self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion, unocclude, occlude, 'test_%d.png' % epoch) test_loss = numpy.mean(numpy.asarray(test_loss)) base_loss = numpy.mean(numpy.asarray(base_loss)) improve_loss = base_loss - test_loss improve_percent = improve_loss / (base_loss + 1e-5) logging.info('average test loss: %.2f, base loss: %.2f', test_loss, base_loss) logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent) test_epe = numpy.mean(numpy.asarray(test_epe)) logging.info('average test endpoint error: %.2f', test_epe) return improve_percent def test_gt_unsupervised(self): base_loss, test_loss = [], [] test_epe = [] for epoch in range(self.test_epoch): im, motion, motion_label, _ = self.data.get_next_batch(self.data.test_images) im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, -1, :, :, :] gt_motion = motion[:, -2, :, :, :] gt_motion_label = motion_label[:, -2, :, :, :] im_input = Variable(torch.from_numpy(im_input).float()) im_output = Variable(torch.from_numpy(im_output).float()) gt_motion = Variable(torch.from_numpy(gt_motion).float()) gt_motion_label = Variable(torch.from_numpy(gt_motion_label)) if torch.cuda.is_available(): im_input, im_output = im_input.cuda(), im_output.cuda() gt_motion = gt_motion.cuda() gt_motion_label = gt_motion_label.cuda() im_pred, m_mask, occlude, unocclude = self.model_gt(im_input, gt_motion_label) im_diff = unocclude.expand_as(im_output) * (im_pred - im_output) im_diff = im_diff / unocclude.sum(3).sum(2).expand_as(im_diff) loss = torch.abs(im_diff).sum() * im_diff.size(2) * im_diff.size(3) test_loss.append(loss.data[0]) base_loss.append(torch.abs(im_input[:, -3:, :, :] - im_output).sum().data[0]) flow = self.motion2flow(m_mask) epe = (flow - gt_motion) * (flow - gt_motion) epe = torch.sqrt(epe.sum(1)) epe = epe.sum() / epe.numel() test_epe.append(epe.cpu().data[0]) if self.display: self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion, unocclude, occlude, 'test_gt.png') test_loss = numpy.mean(numpy.asarray(test_loss)) base_loss = numpy.mean(numpy.asarray(base_loss)) improve_loss = base_loss - test_loss improve_percent = improve_loss / (base_loss + 1e-5) logging.info('average ground truth test loss: %.2f, base loss: %.2f', test_loss, base_loss) logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent) test_epe = numpy.mean(numpy.asarray(test_epe)) logging.info('average ground truth test endpoint error: %.2f', test_epe) return improve_percent
class Demo(BaseDemo): def __init__(self, args): super(Demo, self).__init__(args) if args.data == 'box': self.data = BoxDataBidirect(args) elif args.data == 'mnist': self.data = MnistDataBidirect(args) self.model, self.model_gt = self.init_model(self.data.m_kernel) self.visualizer = Visualizer(args, self.data.reverse_m_dict) self.num_inputs = (self.num_frame - 1) / 2 def init_model(self, m_kernel): num_inputs = (self.num_frame - 1) / 2 self.model = Net(self.im_size, self.im_size, 3, num_inputs, m_kernel.shape[1], self.m_range, m_kernel) self.model_gt = GtNet(self.im_size, self.im_size, 3, num_inputs, m_kernel.shape[1], self.m_range, m_kernel) if torch.cuda.is_available(): # model = torch.nn.DataParallel(model).cuda() self.model = self.model.cuda() self.model_gt = self.model_gt.cuda() if self.init_model_path is not '': self.model.load_state_dict(torch.load(self.init_model_path)) return self.model, self.model_gt def train_unsupervised(self): optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) base_loss, train_loss = [], [] for epoch in range(self.train_epoch): optimizer.zero_grad() im, _, _, _, _, _ = self.data.get_next_batch( self.data.train_images) im_input_f = im[:, :self.num_inputs, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, self.num_inputs, :, :, :] im_input_f = Variable(torch.from_numpy(im_input_f).float()) im_input_b = Variable(torch.from_numpy(im_input_b).float()) im_output = Variable(torch.from_numpy(im_output).float()) if torch.cuda.is_available(): im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda() im_output = im_output.cuda() im_pred, m_mask_f, disappear_f, attn_f, m_mask_b, disappear_b, attn_b = \ self.model(im_input_f, im_input_b) loss = torch.abs(im_pred - im_output).sum() loss.backward() optimizer.step() train_loss.append(loss.data[0]) if len(train_loss) > 100: train_loss.pop(0) ave_train_loss = sum(train_loss) / float(len(train_loss)) base_loss.append( torch.abs(im_input_f[:, -3:, :, :] - im_output).sum().data[0]) base_loss.append( torch.abs(im_input_b[:, -3:, :, :] - im_output).sum().data[0]) if len(base_loss) > 100: base_loss.pop(0) ave_base_loss = sum(base_loss) / float(len(base_loss)) logging.info( 'epoch %d, train loss: %.2f, average train loss: %.2f, base loss: %.2f', epoch, loss.data[0], ave_train_loss, ave_base_loss) if (epoch + 1) % self.test_interval == 0: logging.info('epoch %d, testing', epoch) self.validate() def test_unsupervised(self): base_loss, test_loss = [], [] test_epe = [] for epoch in range(self.test_epoch): im, motion, motion_r, _, _, _ = self.data.get_next_batch( self.data.test_images) im_input_f = im[:, :self.num_inputs, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, self.num_inputs, :, :, :] gt_motion_f = motion[:, self.num_inputs - 1, :, :, :] gt_motion_b = motion_r[:, self.num_inputs + 1, :, :, :] im_input_f = Variable(torch.from_numpy(im_input_f).float()) im_input_b = Variable(torch.from_numpy(im_input_b).float()) im_output = Variable(torch.from_numpy(im_output).float()) gt_motion_f = Variable(torch.from_numpy(gt_motion_f).float()) gt_motion_b = Variable(torch.from_numpy(gt_motion_b).float()) if torch.cuda.is_available(): im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda() im_output = im_output.cuda() gt_motion_f, gt_motion_b = gt_motion_f.cuda( ), gt_motion_b.cuda() im_pred, m_mask_f, disappear_f, attn_f, m_mask_b, disappear_b, attn_b = \ self.model(im_input_f, im_input_b) loss = torch.abs(im_pred - im_output).sum() test_loss.append(loss.data[0]) base_loss.append( torch.abs(im_input_f[:, -3:, :, :] - im_output).sum().data[0]) # base_loss.append(torch.abs(im_input_b[:, -3:, :, :] - im_output).sum().data[0]) flow_f = self.motion2flow(m_mask_f) epe = (flow_f - gt_motion_f) * (flow_f - gt_motion_f) epe = torch.sqrt(epe.sum(1)) epe = epe.sum() / epe.numel() test_epe.append(epe.cpu().data[0]) flow_b = self.motion2flow(m_mask_b) # epe = (flow_b - gt_motion_b) * (flow_b - gt_motion_b) # epe = torch.sqrt(epe.sum(1)) # epe = epe.sum() / epe.numel() # test_epe.append(epe.cpu().data[0]) if self.display: self.visualizer.visualize_result_bidirect( im_input_f, im_input_b, im_output, im_pred, flow_f, gt_motion_f, disappear_f, attn_f, flow_b, gt_motion_b, disappear_b, attn_b, 'test_%d.png' % epoch) test_loss = numpy.mean(numpy.asarray(test_loss)) base_loss = numpy.mean(numpy.asarray(base_loss)) improve_loss = base_loss - test_loss improve_percent = improve_loss / (base_loss + 1e-5) logging.info('average test loss: %.2f, base loss: %.2f', test_loss, base_loss) logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent) test_epe = numpy.mean(numpy.asarray(test_epe)) logging.info('average test endpoint error: %.2f', test_epe) return improve_percent def test_gt_unsupervised(self): base_loss, test_loss = [], [] test_epe = [] for epoch in range(self.test_epoch): im, motion, motion_r, motion_label, motion_label_r, gt_depth = self.data.get_next_batch( self.data.test_images) im_input_f = im[:, :self.num_inputs, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, self.num_inputs, :, :, :] gt_motion_f = motion[:, self.num_inputs - 1, :, :, :] gt_motion_b = motion_r[:, self.num_inputs + 1, :, :, :] gt_motion_label_f = motion_label[:, self.num_inputs - 1, :, :, :] gt_motion_label_b = motion_label_r[:, self.num_inputs + 1, :, :, :] im_input_f = Variable(torch.from_numpy(im_input_f).float()) im_input_b = Variable(torch.from_numpy(im_input_b).float()) im_output = Variable(torch.from_numpy(im_output).float()) gt_motion_f = Variable(torch.from_numpy(gt_motion_f).float()) gt_motion_b = Variable(torch.from_numpy(gt_motion_b).float()) gt_motion_label_f = Variable(torch.from_numpy(gt_motion_label_f)) gt_motion_label_b = Variable(torch.from_numpy(gt_motion_label_b)) if torch.cuda.is_available(): im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda() im_output = im_output.cuda() gt_motion_f, gt_motion_b = gt_motion_f.cuda( ), gt_motion_b.cuda() gt_motion_label_f, gt_motion_label_b = gt_motion_label_f.cuda( ), gt_motion_label_b.cuda() im_pred, m_mask_f, disappear_f, attn_f, m_mask_b, disappear_b, attn_b = \ self.model_gt(im_input_f, im_input_b, gt_motion_f, gt_motion_b) loss = torch.abs(im_pred - im_output).sum() test_loss.append(loss.data[0]) base_loss.append( torch.abs(im_input_f[:, -3:, :, :] - im_output).sum().data[0]) # base_loss.append(torch.abs(im_input_b[:, -3:, :, :] - im_output).sum().data[0]) flow_f = self.motion2flow(m_mask_f) epe = (flow_f - gt_motion_f) * (flow_f - gt_motion_f) epe = torch.sqrt(epe.sum(1)) epe = epe.sum() / epe.numel() test_epe.append(epe.cpu().data[0]) flow_b = self.motion2flow(m_mask_b) # epe = (flow_b - gt_motion_b) * (flow_b - gt_motion_b) # epe = torch.sqrt(epe.sum(1)) # epe = epe.sum() / epe.numel() # test_epe.append(epe.cpu().data[0]) if self.display: self.visualizer.visualize_result_bidirect( im_input_f, im_input_b, im_output, im_pred, flow_f, gt_motion_f, disappear_f, attn_f, flow_b, gt_motion_b, disappear_b, attn_b, 'test_gt.png') test_loss = numpy.mean(numpy.asarray(test_loss)) base_loss = numpy.mean(numpy.asarray(base_loss)) improve_loss = base_loss - test_loss improve_percent = improve_loss / (base_loss + 1e-5) logging.info('average ground truth test loss: %.2f, base loss: %.2f', test_loss, base_loss) logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent) test_epe = numpy.mean(numpy.asarray(test_epe)) logging.info('average ground truth test endpoint error: %.2f', test_epe) return improve_percent
class Demo(BaseDemo): def __init__(self, args): super(Demo, self).__init__(args) self.model, self.model_gt = self.init_model(self.data.m_kernel) self.visualizer = Visualizer(args, self.data.reverse_m_dict) def init_model(self, m_kernel): self.model = Net(self.im_size, self.im_size, self.im_channel, self.num_frame, m_kernel.shape[1], self.m_range, m_kernel, self.net_depth) self.model_gt = GtNet(self.im_size, self.im_size, self.im_channel, self.num_frame, m_kernel.shape[1], self.m_range, m_kernel) if torch.cuda.is_available(): # model = torch.nn.DataParallel(model).cuda() self.model = self.model.cuda() self.model_gt = self.model_gt.cuda() if self.init_model_path is not '': self.model.load_state_dict(torch.load(self.init_model_path)) return self.model, self.model_gt def train(self): writer = SummaryWriter(self.tensorboard_path) optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) base_loss_all, train_loss_all = [], [] for epoch in range(self.train_epoch): optimizer.zero_grad() if self.data.name in ['box', 'mnist', 'chair']: im, _, _, _ = self.data.get_next_batch(self.data.train_images) elif self.data.name in [ 'robot', 'mpii', 'viper', 'kitti', 'robotc' ]: im = self.data.get_next_batch(self.data.train_images) else: logging.error('%s data not supported' % self.data.name) sys.exit() im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, -1, :, :, :] im_input = Variable(torch.from_numpy(im_input).float()) im_output = Variable(torch.from_numpy(im_output).float()) if torch.cuda.is_available(): im_input, im_output = im_input.cuda(), im_output.cuda() im_pred, m_mask = self.model(im_input, im_output) im_diff = im_pred - im_output loss = torch.abs(im_diff).sum() / self.batch_size loss.backward() optimizer.step() writer.add_scalar('train_loss', loss.data[0], epoch) train_loss_all.append(loss.data[0]) if len(train_loss_all) > 100: train_loss_all.pop(0) ave_train_loss = sum(train_loss_all) / float(len(train_loss_all)) im_base = im_input[:, -self.im_channel:, :, :] base_loss = torch.abs(im_base - im_output).sum() / self.batch_size base_loss_all.append(base_loss.data[0]) if len(base_loss_all) > 100: base_loss_all.pop(0) ave_base_loss = sum(base_loss_all) / float(len(base_loss_all)) logging.info( 'epoch %d, train loss: %.2f, average train loss: %.2f, base loss: %.2f', epoch, loss.data[0], ave_train_loss, ave_base_loss) if (epoch + 1) % self.save_interval == 0: logging.info('epoch %d, saving model', epoch) with open(os.path.join(self.save_dir, '%d.pth' % epoch), 'w') as handle: torch.save(self.model.state_dict(), handle) if (epoch + 1) % self.test_interval == 0: logging.info('epoch %d, testing', epoch) test_loss, test_epe = self.validate() writer.add_scalar('test_loss', test_loss, epoch) if test_epe is not None: writer.add_scalar('test_epe', test_epe, epoch) writer.close() def test(self): base_loss_all, test_loss_all = [], [] test_epe_all = [] motion = None for epoch in range(self.test_epoch): if self.data.name in ['box', 'mnist', 'chair']: im, motion, _, _ = self.data.get_next_batch( self.data.test_images) elif self.data.name in [ 'robot', 'mpii', 'viper', 'kitti', 'robotc' ]: im, motion = self.data.get_next_batch( self.data.test_images), None elif self.data.name in ['mpii_sample', 'kitti_sample']: im, motion = self.data.get_next_batch( self.data.test_images), None im = im[:, -self.num_frame:, :, :, :] else: logging.error('%s data not supported' % self.data.name) sys.exit() im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, -1, :, :, :] im_input = Variable(torch.from_numpy(im_input).float(), volatile=True) im_output = Variable(torch.from_numpy(im_output).float(), volatile=True) if torch.cuda.is_available(): im_input, im_output = im_input.cuda(), im_output.cuda() im_pred, m_mask = self.model(im_input, im_output) flow = self.motion2flow(m_mask) im_diff = im_pred - im_output loss = torch.abs(im_diff).sum() / self.batch_size test_loss_all.append(loss.data[0]) im_base = im_input[:, -self.im_channel:, :, :] base_loss = torch.abs(im_base - im_output).sum() / self.batch_size base_loss_all.append(base_loss.data[0]) if motion is None: gt_motion = None else: gt_motion = motion[:, -2, :, :, :] gt_motion = Variable(torch.from_numpy(gt_motion).float()) if torch.cuda.is_available(): gt_motion = gt_motion.cuda() epe = (flow - gt_motion) * (flow - gt_motion) epe = torch.sqrt(epe.sum(1)) epe = epe.sum() / epe.numel() test_epe_all.append(epe.cpu().data[0]) if self.display: self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion, 'test_%d.png' % epoch) if self.display_all: for i in range(self.batch_size): self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion, 'test_%d.png' % i, i) test_loss = numpy.mean(numpy.asarray(test_loss_all)) base_loss = numpy.mean(numpy.asarray(base_loss_all)) improve_loss = base_loss - test_loss improve_percent = improve_loss / (base_loss + 1e-5) logging.info('average test loss: %.2f, base loss: %.2f', test_loss, base_loss) logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent) if motion is not None: test_epe = numpy.mean(numpy.asarray(test_epe_all)) logging.info('average test endpoint error: %.2f', test_epe) else: test_epe = None return test_loss, test_epe, improve_percent def test_gt(self): base_loss_all, test_loss_all = [], [] test_epe_all = [] for epoch in range(self.test_epoch): if self.data.name in ['box', 'mnist', 'chair']: im, motion, motion_label, _ = self.data.get_next_batch( self.data.test_images) gt_motion_label = motion_label[:, -2, :, :, :] gt_motion_label = Variable(torch.from_numpy(gt_motion_label)) if torch.cuda.is_available(): gt_motion_label = gt_motion_label.cuda() else: logging.error('%s data not supported in test_gt' % self.data.name) sys.exit() im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, -1, :, :, :] gt_motion = motion[:, -2, :, :, :] im_input = Variable(torch.from_numpy(im_input).float(), volatile=True) im_output = Variable(torch.from_numpy(im_output).float(), volatile=True) gt_motion = Variable(torch.from_numpy(gt_motion).float(), volatile=True) if torch.cuda.is_available(): im_input, im_output = im_input.cuda(), im_output.cuda() gt_motion = gt_motion.cuda() if self.data.name in ['box', 'mnist', 'chair']: im_pred, m_mask = self.model_gt(im_input, im_output, gt_motion_label, 'label') flow = self.motion2flow(m_mask) im_diff = im_pred - im_output loss = torch.abs(im_diff).sum() / self.batch_size test_loss_all.append(loss.data[0]) im_base = im_input[:, -self.im_channel:, :, :] base_loss = torch.abs(im_base - im_output).sum() / self.batch_size base_loss_all.append(base_loss.data[0]) epe = (flow - gt_motion) * (flow - gt_motion) epe = torch.sqrt(epe.sum(1)) epe = epe.sum() / epe.numel() test_epe_all.append(epe.cpu().data[0]) if self.display: self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion, 'test_gt.png') if self.display_all: for i in range(self.batch_size): self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion, 'test_gt_%d.png' % i, i) test_loss = numpy.mean(numpy.asarray(test_loss_all)) base_loss = numpy.mean(numpy.asarray(base_loss_all)) improve_loss = base_loss - test_loss improve_percent = improve_loss / (base_loss + 1e-5) logging.info('average ground truth test loss: %.2f, base loss: %.2f', test_loss, base_loss) logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent) test_epe = numpy.mean(numpy.asarray(test_epe_all)) logging.info('average ground truth test endpoint error: %.2f', test_epe) return improve_percent
class Demo(BaseBiDemo): def __init__(self, args): super(Demo, self).__init__(args) self.model, self.model_gt = self.init_model(self.data.m_kernel) self.visualizer = Visualizer(args, self.data.reverse_m_dict) def init_model(self, m_kernel): self.model = Net(self.im_size, self.im_size, self.im_channel, self.num_inputs, m_kernel.shape[1], self.m_range, m_kernel) self.model_gt = GtNet(self.im_size, self.im_size, self.im_channel, self.num_inputs, m_kernel.shape[1], self.m_range, m_kernel) if torch.cuda.is_available(): # model = torch.nn.DataParallel(model).cuda() self.model = self.model.cuda() self.model_gt = self.model_gt.cuda() if self.init_model_path is not '': self.model.load_state_dict(torch.load(self.init_model_path)) return self.model, self.model_gt def train(self): optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) base_loss, train_loss = [], [] for epoch in range(self.train_epoch): optimizer.zero_grad() if self.data.name in ['box', 'mnist', 'box_complex']: im, _, _, _, _, _ = self.data.get_next_batch(self.data.train_images) elif self.data.name in ['robot64', 'mpii64', 'mpi128', 'nyuv2', 'robot128', 'viper64', 'viper128']: im = self.data.get_next_batch(self.data.train_images) else: logging.error('%s data not supported' % self.data.name) sys.exit() im_input_f = im[:, :self.num_inputs, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, self.num_inputs, :, :, :] im_input_f = Variable(torch.from_numpy(im_input_f).float()) im_input_b = Variable(torch.from_numpy(im_input_b).float()) im_output = Variable(torch.from_numpy(im_output).float()) if torch.cuda.is_available(): im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda() im_output = im_output.cuda() im_pred, m_mask_f, d_mask_f, attn_f, m_mask_b, d_mask_b, attn_b = \ self.model(im_input_f, im_input_b) im_diff = im_pred - im_output loss = torch.abs(im_diff[:, :, self.m_range:-self.m_range, self.m_range:-self.m_range]).sum() loss.backward() optimizer.step() train_loss.append(loss.data[0]) if len(train_loss) > 100: train_loss.pop(0) ave_train_loss = sum(train_loss) / float(len(train_loss)) im_base = 0.5 * im_input_f[:, -self.im_channel:, :, :] + \ 0.5 * im_input_b[:, -self.im_channel:, :, :] base_loss.append(torch.abs(im_base - im_output).sum().data[0]) if len(base_loss) > 100: base_loss.pop(0) ave_base_loss = sum(base_loss) / float(len(base_loss)) logging.info('epoch %d, train loss: %.2f, average train loss: %.2f, base loss: %.2f', epoch, loss.data[0], ave_train_loss, ave_base_loss) if (epoch + 1) % self.save_interval == 0: logging.info('epoch %d, saving model', epoch) with open(os.path.join(self.save_dir, '%d.pth' % epoch), 'w') as handle: torch.save(self.model.state_dict(), handle) if (epoch + 1) % self.test_interval == 0: logging.info('epoch %d, testing', epoch) self.validate() def test(self): base_loss, test_loss = [], [] test_epe = [] motion = None for epoch in range(self.test_epoch): if self.data.name in ['box', 'mnist', 'box_complex']: im, motion, motion_r, _, _, _ = self.data.get_next_batch(self.data.test_images) elif self.data.name in ['robot64', 'mpii64', 'mpi128', 'nyuv2', 'robot128', 'viper64', 'viper128']: im, motion, motion_r = self.data.get_next_batch(self.data.test_images), None, None elif self.data.name in ['mpii64_sample']: im, motion, motion_r = self.data.get_next_batch(self.data.test_images), None, None im = im[:, -self.num_frame:, :, :, :] else: logging.error('%s data not supported' % self.data.name) sys.exit() im_input_f = im[:, :self.num_inputs, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, self.num_inputs, :, :, :] im_input_f = Variable(torch.from_numpy(im_input_f).float(), volatile=True) im_input_b = Variable(torch.from_numpy(im_input_b).float(), volatile=True) im_output = Variable(torch.from_numpy(im_output).float(), volatile=True) if torch.cuda.is_available(): im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda() im_output = im_output.cuda() im_pred, m_mask_f, d_mask_f, attn_f, m_mask_b, d_mask_b, attn_b = \ self.model(im_input_f, im_input_b) im_diff = im_pred - im_output loss = torch.abs(im_diff[:, :, self.m_range:-self.m_range, self.m_range:-self.m_range]).sum() test_loss.append(loss.data[0]) im_base = 0.5 * im_input_f[:, -self.im_channel:, :, :] + \ 0.5 * im_input_b[:, -self.im_channel:, :, :] base_loss.append(torch.abs(im_base - im_output).sum().data[0]) flow_f = self.motion2flow(m_mask_f) depth_f = self.mask2depth(d_mask_f) flow_b = self.motion2flow(m_mask_b) depth_b = self.mask2depth(d_mask_b) if motion is None: gt_motion_f = None gt_motion_b = None else: gt_motion_f = motion[:, self.num_inputs - 1, :, :, :] gt_motion_f = Variable(torch.from_numpy(gt_motion_f).float()) if torch.cuda.is_available(): gt_motion_f = gt_motion_f.cuda() epe = (flow_f - gt_motion_f) * (flow_f - gt_motion_f) epe = torch.sqrt(epe.sum(1)) epe = epe.sum() / epe.numel() test_epe.append(epe.cpu().data[0]) gt_motion_b = motion_r[:, self.num_inputs + 1, :, :, :] gt_motion_b = Variable(torch.from_numpy(gt_motion_b).float()) if torch.cuda.is_available(): gt_motion_b = gt_motion_b.cuda() epe = (flow_b - gt_motion_b) * (flow_b - gt_motion_b) epe = torch.sqrt(epe.sum(1)) epe = epe.sum() / epe.numel() test_epe.append(epe.cpu().data[0]) if self.display: self.visualizer.visualize_result(im_input_f, im_input_b, im_output, im_pred, flow_f, gt_motion_f, depth_f, attn_f, flow_b, gt_motion_b, depth_b, attn_b, 'test_%d.png' % epoch) if self.display_all: for i in range(self.batch_size): self.visualizer.visualize_result(im_input_f, im_input_b, im_output, im_pred, flow_f, gt_motion_f, depth_f, attn_f, flow_b, gt_motion_b, depth_b, attn_b, 'test_%d.png' % i, i) test_loss = numpy.mean(numpy.asarray(test_loss)) base_loss = numpy.mean(numpy.asarray(base_loss)) improve_loss = base_loss - test_loss improve_percent = improve_loss / (base_loss + 1e-5) logging.info('average test loss: %.2f, base loss: %.2f', test_loss, base_loss) logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent) if motion is not None: test_epe = numpy.mean(numpy.asarray(test_epe)) logging.info('average test endpoint error: %.2f', test_epe) return improve_percent def test_gt(self): base_loss, test_loss = [], [] test_epe = [] for epoch in range(self.test_epoch): if self.data.name in ['box', 'mnist', 'box_complex']: im, motion, motion_r, motion_label, motion_label_r, depth = \ self.data.get_next_batch(self.data.test_images) else: logging.error('%s data not supported in test_gt' % self.data.name) sys.exit() im_input_f = im[:, :self.num_inputs, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, self.num_inputs, :, :, :] gt_motion_f = motion[:, self.num_inputs - 1, :, :, :] gt_motion_b = motion_r[:, self.num_inputs + 1, :, :, :] gt_motion_label_f = motion_label[:, self.num_inputs - 1, :, :, :] gt_motion_label_b = motion_label_r[:, self.num_inputs + 1, :, :, :] gt_depth_f = depth[:, self.num_inputs - 1, :, :] gt_depth_b = depth[:, self.num_inputs + 1, :, :] im_input_f = Variable(torch.from_numpy(im_input_f).float(), volatile=True) im_input_b = Variable(torch.from_numpy(im_input_b).float(), volatile=True) im_output = Variable(torch.from_numpy(im_output).float(), volatile=True) gt_motion_f = Variable(torch.from_numpy(gt_motion_f).float()) gt_motion_b = Variable(torch.from_numpy(gt_motion_b).float()) gt_motion_label_f = Variable(torch.from_numpy(gt_motion_label_f)) gt_motion_label_b = Variable(torch.from_numpy(gt_motion_label_b)) gt_depth_f = Variable(torch.from_numpy(gt_depth_f)) gt_depth_b = Variable(torch.from_numpy(gt_depth_b)) if torch.cuda.is_available(): im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda() im_output = im_output.cuda() gt_motion_f, gt_motion_b = gt_motion_f.cuda(), gt_motion_b.cuda() gt_motion_label_f = gt_motion_label_f.cuda() gt_motion_label_b = gt_motion_label_b.cuda() gt_depth_f, gt_depth_b = gt_depth_f.cuda(), gt_depth_b.cuda() im_pred, m_mask_f, d_mask_f, attn_f, m_mask_b, d_mask_b, attn_b = self.model_gt( im_input_f, im_input_b, gt_motion_label_f, gt_depth_f, gt_motion_label_b, gt_depth_b, 'label') im_diff = im_pred - im_output loss = torch.abs(im_diff[:, :, self.m_range:-self.m_range, self.m_range:-self.m_range]).sum() test_loss.append(loss.data[0]) im_base = 0.5 * im_input_f[:, -self.im_channel:, :, :] + \ 0.5 * im_input_b[:, -self.im_channel:, :, :] base_loss.append(torch.abs(im_base - im_output).sum().data[0]) flow_f = self.motion2flow(m_mask_f) depth_f = self.mask2depth(d_mask_f) epe = (flow_f - gt_motion_f) * (flow_f - gt_motion_f) epe = torch.sqrt(epe.sum(1)) epe = epe.sum() / epe.numel() test_epe.append(epe.cpu().data[0]) flow_b = self.motion2flow(m_mask_b) depth_b = self.mask2depth(d_mask_b) epe = (flow_b - gt_motion_b) * (flow_b - gt_motion_b) epe = torch.sqrt(epe.sum(1)) epe = epe.sum() / epe.numel() test_epe.append(epe.cpu().data[0]) if self.display: self.visualizer.visualize_result(im_input_f, im_input_b, im_output, im_pred, flow_f, gt_motion_f, depth_f, attn_f, flow_b, gt_motion_b, depth_b, attn_b, 'test_gt.png') if self.display_all: for i in range(self.batch_size): self.visualizer.visualize_result(im_input_f, im_input_b, im_output, im_pred, flow_f, gt_motion_f, depth_f, attn_f, flow_b, gt_motion_b, depth_b, attn_b, 'test_gt_%d.png' % i, i) test_loss = numpy.mean(numpy.asarray(test_loss)) base_loss = numpy.mean(numpy.asarray(base_loss)) improve_loss = base_loss - test_loss improve_percent = improve_loss / (base_loss + 1e-5) logging.info('average ground truth test loss: %.2f, base loss: %.2f', test_loss, base_loss) logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent) test_epe = numpy.mean(numpy.asarray(test_epe)) logging.info('average ground truth test endpoint error: %.2f', test_epe) return improve_percent @staticmethod def mask2depth(d_mask): [batch_size, num_depth, height, width] = d_mask.size() depth_number = Variable(torch.zeros(batch_size, num_depth, height, width)) if torch.cuda.is_available(): depth_number = depth_number.cuda() for i in range(num_depth): depth_number[:, i, :, :] = i depth = Variable(torch.zeros(batch_size, 1, height, width)) if torch.cuda.is_available(): depth = depth.cuda() depth[:, 0, :, :] = (d_mask * depth_number).sum(1) return depth
class Demo(BaseDemo): def __init__(self, args): super(Demo, self).__init__(args) if args.data == 'box': self.data = BoxDataBidirect(args) elif args.data == 'mnist': self.data = MnistDataBidirect(args) self.model, self.model_gt = self.init_model(self.data.m_kernel) self.visualizer = Visualizer(args, self.data.reverse_m_dict) self.num_inputs = (self.num_frame - 1) / 2 def init_model(self, m_kernel): num_inputs = (self.num_frame - 1) / 2 self.model = Net(self.im_size, self.im_size, 3, num_inputs, m_kernel.shape[1], self.m_range, m_kernel) self.model_gt = GtNet(self.im_size, self.im_size, 3, num_inputs, m_kernel.shape[1], self.m_range, m_kernel) if torch.cuda.is_available(): # model = torch.nn.DataParallel(model).cuda() self.model = self.model.cuda() self.model_gt = self.model_gt.cuda() if self.init_model_path is not '': self.model.load_state_dict(torch.load(self.init_model_path)) return self.model, self.model_gt def train_unsupervised(self): optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) base_loss, train_loss = [], [] for epoch in range(self.train_epoch): optimizer.zero_grad() im, motion, motion_r = self.data.get_next_batch( self.data.train_images) im_input_f = im[:, :self.num_inputs, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, self.num_inputs, :, :, :] im_input_f = Variable(torch.from_numpy(im_input_f).float()) im_input_b = Variable(torch.from_numpy(im_input_b).float()) im_output = Variable(torch.from_numpy(im_output).float()) if torch.cuda.is_available(): im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda() im_output = im_output.cuda() im_pred, m_mask_f, disappear_f, attn_f, m_mask_b, disappear_b, attn_b = \ self.model(im_input_f, im_input_b) im_diff = im_pred - im_output loss = torch.abs(im_diff).sum() loss.backward() optimizer.step() train_loss.append(loss.data[0]) if len(train_loss) > 100: train_loss.pop(0) ave_train_loss = sum(train_loss) / float(len(train_loss)) base_loss.append( torch.abs(im_input_f[:, -3:, :, :] - im_output).sum().data[0]) base_loss.append( torch.abs(im_input_b[:, -3:, :, :] - im_output).sum().data[0]) if len(base_loss) > 100: base_loss.pop(0) ave_base_loss = sum(base_loss) / float(len(base_loss)) logging.info( 'epoch %d, train loss: %.2f, average train loss: %.2f, base loss: %.2f', epoch, loss.data[0], ave_train_loss, ave_base_loss) if (epoch + 1) % self.test_interval == 0: logging.info('epoch %d, testing', epoch) self.validate() def test_unsupervised(self): base_loss, test_loss = [], [] test_accuracy = [] for epoch in range(self.test_epoch): im, motion, motion_r = self.data.get_next_batch( self.data.test_images) im_input_f = im[:, :self.num_inputs, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, self.num_inputs, :, :, :] gt_motion_f = motion[:, self.num_inputs - 1, :, :, :] gt_motion_b = motion_r[:, self.num_inputs + 1, :, :, :] im_input_f = Variable(torch.from_numpy(im_input_f).float()) im_input_b = Variable(torch.from_numpy(im_input_b).float()) im_output = Variable(torch.from_numpy(im_output).float()) gt_motion_f = Variable(torch.from_numpy(gt_motion_f)) gt_motion_b = Variable(torch.from_numpy(gt_motion_b)) if torch.cuda.is_available(): im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda() im_output = im_output.cuda() gt_motion_f, gt_motion_b = gt_motion_f.cuda( ), gt_motion_b.cuda() im_pred, m_mask_f, disappear_f, attn_f, m_mask_b, disappear_b, attn_b = \ self.model(im_input_f, im_input_b) im_diff = im_pred - im_output loss = torch.abs(im_diff).sum() test_loss.append(loss.data[0]) base_loss.append( torch.abs(im_input_f[:, -3:, :, :] - im_output).sum().data[0]) base_loss.append( torch.abs(im_input_b[:, -3:, :, :] - im_output).sum().data[0]) pred_motion_f = m_mask_f.max(1)[1] pred_motion_b = m_mask_b.max(1)[1] accuracy_f = pred_motion_f.eq( gt_motion_f).float().sum() / gt_motion_f.numel() accuracy_b = pred_motion_b.eq( gt_motion_b).float().sum() / gt_motion_b.numel() test_accuracy.append(accuracy_f.cpu().data[0]) test_accuracy.append(accuracy_b.cpu().data[0]) if self.display: flow_f = self.motion2flow(m_mask_f) flow_b = self.motion2flow(m_mask_b) self.visualizer.visualize_result_bidirect( im_input_f, im_input_b, im_output, im_pred, flow_f, gt_motion_f, disappear_f, attn_f, flow_b, gt_motion_b, disappear_b, attn_b, 'test_%d.png' % epoch) test_loss = numpy.mean(numpy.asarray(test_loss)) base_loss = numpy.mean(numpy.asarray(base_loss)) improve_loss = base_loss - test_loss improve_percent = improve_loss / (base_loss + 1e-5) logging.info('average test loss: %.2f, base loss: %.2f', test_loss, base_loss) logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent) test_accuracy = numpy.mean(numpy.asarray(test_accuracy)) logging.info('average test accuracy: %.2f', test_accuracy) return improve_percent def test_gt_unsupervised(self): base_loss, test_loss = [], [] test_accuracy = [] for epoch in range(self.test_epoch): im, motion, motion_r = self.data.get_next_batch( self.data.test_images) im_input_f = im[:, :self.num_inputs, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape( self.batch_size, -1, self.im_size, self.im_size) im_output = im[:, self.num_inputs, :, :, :] gt_motion_f = motion[:, self.num_inputs - 1, :, :, :] gt_motion_b = motion_r[:, self.num_inputs + 1, :, :, :] im_input_f = Variable(torch.from_numpy(im_input_f).float()) im_input_b = Variable(torch.from_numpy(im_input_b).float()) im_output = Variable(torch.from_numpy(im_output).float()) gt_motion_f = Variable(torch.from_numpy(gt_motion_f)) gt_motion_b = Variable(torch.from_numpy(gt_motion_b)) if torch.cuda.is_available(): im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda() im_output = im_output.cuda() gt_motion_f, gt_motion_b = gt_motion_f.cuda( ), gt_motion_b.cuda() im_pred, m_mask_f, disappear_f, attn_f, m_mask_b, disappear_b, attn_b = \ self.model_gt(im_input_f, im_input_b, gt_motion_f, gt_motion_b) im_diff = im_pred - im_output loss = torch.abs(im_diff).sum() test_loss.append(loss.data[0]) base_loss.append( torch.abs(im_input_f[:, -3:, :, :] - im_output).sum().data[0]) base_loss.append( torch.abs(im_input_b[:, -3:, :, :] - im_output).sum().data[0]) pred_motion_f = m_mask_f.max(1)[1] pred_motion_b = m_mask_b.max(1)[1] accuracy_f = pred_motion_f.eq( gt_motion_f).float().sum() / gt_motion_f.numel() accuracy_b = pred_motion_b.eq( gt_motion_b).float().sum() / gt_motion_b.numel() test_accuracy.append(accuracy_f.cpu().data[0]) test_accuracy.append(accuracy_b.cpu().data[0]) if self.display: flow_f = self.motion2flow(m_mask_f) flow_b = self.motion2flow(m_mask_b) self.visualizer.visualize_result_bidirect( im_input_f, im_input_b, im_output, im_pred, flow_f, gt_motion_f, disappear_f, attn_f, flow_b, gt_motion_b, disappear_b, attn_b, 'test_gt.png') test_loss = numpy.mean(numpy.asarray(test_loss)) base_loss = numpy.mean(numpy.asarray(base_loss)) improve_loss = base_loss - test_loss improve_percent = improve_loss / (base_loss + 1e-5) logging.info('average groundtruth test loss: %.2f, base loss: %.2f', test_loss, base_loss) logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent) test_accuracy = numpy.mean(numpy.asarray(test_accuracy)) logging.info('average groundtruth test accuracy: %.2f', test_accuracy) return improve_percent def motion2flow(self, m_mask): reverse_m_dict = self.data.reverse_m_dict [batch_size, num_class, height, width] = m_mask.size() kernel_x = Variable(torch.zeros(batch_size, num_class, height, width)) kernel_y = Variable(torch.zeros(batch_size, num_class, height, width)) if torch.cuda.is_available(): kernel_x = kernel_x.cuda() kernel_y = kernel_y.cuda() for i in range(num_class): (m_x, m_y) = reverse_m_dict[i] kernel_x[:, i, :, :] = m_x kernel_y[:, i, :, :] = m_y flow = Variable(torch.zeros(batch_size, 2, height, width)) flow[:, 0, :, :] = (m_mask * kernel_x).sum(1) flow[:, 1, :, :] = (m_mask * kernel_y).sum(1) return flow