Example #1
0
def evaluate(eval_loader, model, writer, step, Save_model, epoch):

    top_prec = AverageMeter()
    softmax = nn.Softmax().cuda()

    for i, (images, labels, names) in enumerate(eval_loader):
        images = Variable(images).cuda()
        labels = Variable(labels).cuda()
        gender_pred = model(images)
        gender_pred = softmax(gender_pred)

        prec = accuracy(gender_pred, labels, topk=(1, ))

        top_prec.update(prec[0].item())

    print('evaluate * Prec@1 {top:.3f}'.format(top=top_prec.avg))

    writer.add_scalar('prec', top_prec.avg, step)

    Save_model.save(model, top_prec.avg, epoch)
Example #2
0
    def __init__(self,
                 seq,
                 node_name,
                 cloud_topic_name,
                 tf_topic_name,
                 dataset,
                 global_tf_name="map",
                 child_tf_name="car"):
        rospy.init_node(node_name)
        self.cloud_pub = rospy.Publisher(cloud_topic_name,
                                         PointCloud2,
                                         queue_size=queue_size)
        self.transform_broadcaster = tf2_ros.TransformBroadcaster()
        self.est_tf_pub = rospy.Publisher(
            tf_topic_name, TransformStamped,
            queue_size=queue_size)  # for visualization
        self.gt_tf_pub = rospy.Publisher(
            "gt_pose", TransformStamped,
            queue_size=queue_size)  # for visualization
        self.cap_pub = rospy.Publisher("CAP",
                                       CloudAndPose,
                                       queue_size=queue_size)
        self.rate = rospy.Rate(sleep_rate)
        self.header = Header(frame_id=global_tf_name)
        self.child_tf_name = child_tf_name  # base name before appending prefix
        self.dataset = dataset
        self.seq = seq

        transform_dict = OrderedDict()
        transform_dict[GridSampling([args.grid_size] * 3)] = ["train", "test"]
        transform_dict[NormalizeScale()] = ["train", "test"]
        transform = ComposeAdapt(transform_dict)
        self.model = Net(graph_input=LOAD_GRAPH,
                         act="LeakyReLU",
                         transform=transform,
                         dof=7)
        if args.model_path is not None and osp.exists(args.model_path):
            self.model.load_state_dict(
                torch.load(args.model_path, map_location=torch.device("cpu")))
            print("loaded weights from", args.model_path)
        self.model.eval()

        self.absolute_gt_pose = np.eye(4)[:3, :]
        self.absolute_est_pose = np.eye(4)[:3, :]
        self.infer_time_meter = AverageMeter()
        self.tr_error_meter = AverageMeter()
        self.rot_error_meter = AverageMeter()

        self.fields = [
            PointField('x', 0, PointField.FLOAT32, 1),
            PointField('y', 4, PointField.FLOAT32, 1),
            PointField('z', 8, PointField.FLOAT32, 1),
            PointField('intensity', 12, PointField.FLOAT32, 1)
        ]
        self.pose_list = []
Example #3
0
	def train_amortized(self, imgs, classes, model_dir, tensorboard_dir):
		self.amortized_model = AmortizedModel(self.config)
		self.amortized_model.modulation.load_state_dict(self.latent_model.modulation.state_dict())
		self.amortized_model.generator.load_state_dict(self.latent_model.generator.state_dict())

		data = dict(
			img=torch.from_numpy(imgs).permute(0, 3, 1, 2),
			img_id=torch.from_numpy(np.arange(imgs.shape[0])),
			class_id=torch.from_numpy(classes.astype(np.int64))
		)

		dataset = NamedTensorDataset(data)
		data_loader = DataLoader(
			dataset, batch_size=self.config['train']['batch_size'],
			shuffle=True, sampler=None, batch_sampler=None,
			num_workers=1, pin_memory=True, drop_last=True
		)

		self.latent_model.to(self.device)
		self.amortized_model.to(self.device)

		reconstruction_criterion = VGGDistance(self.config['perceptual_loss']['layers']).to(self.device)
		embedding_criterion = nn.MSELoss()

		optimizer = Adam(
			params=self.amortized_model.parameters(),
			lr=self.config['train_encoders']['learning_rate']['max'],
			betas=(0.5, 0.999)
		)

		scheduler = CosineAnnealingLR(
			optimizer,
			T_max=self.config['train_encoders']['n_epochs'] * len(data_loader),
			eta_min=self.config['train_encoders']['learning_rate']['min']
		)

		summary = SummaryWriter(log_dir=tensorboard_dir)

		train_loss = AverageMeter()
		for epoch in range(self.config['train_encoders']['n_epochs']):
			self.latent_model.eval()
			self.amortized_model.train()

			train_loss.reset()

			pbar = tqdm(iterable=data_loader)
			for batch in pbar:
				batch = {name: tensor.to(self.device) for name, tensor in batch.items()}

				optimizer.zero_grad()

				target_content_code = self.latent_model.content_embedding(batch['img_id'])
				target_class_code = self.latent_model.class_embedding(batch['class_id'])

				out = self.amortized_model(batch['img'])

				loss_reconstruction = reconstruction_criterion(out['img'], batch['img'])
				loss_content = embedding_criterion(out['content_code'], target_content_code)
				loss_class = embedding_criterion(out['class_code'], target_class_code)

				loss = loss_reconstruction + 10 * loss_content + 10 * loss_class

				loss.backward()
				optimizer.step()
				scheduler.step()

				train_loss.update(loss.item())
				pbar.set_description_str('epoch #{}'.format(epoch))
				pbar.set_postfix(loss=train_loss.avg)

			pbar.close()
			self.save(model_dir, latent=False, amortized=True)

			summary.add_scalar(tag='loss-amortized', scalar_value=loss.item(), global_step=epoch)
			summary.add_scalar(tag='rec-loss-amortized', scalar_value=loss_reconstruction.item(), global_step=epoch)
			summary.add_scalar(tag='content-loss-amortized', scalar_value=loss_content.item(), global_step=epoch)
			summary.add_scalar(tag='class-loss-amortized', scalar_value=loss_class.item(), global_step=epoch)

			fixed_sample_img = self.generate_samples_amortized(dataset, randomized=False)
			random_sample_img = self.generate_samples_amortized(dataset, randomized=True)

			summary.add_image(tag='sample-fixed-amortized', img_tensor=fixed_sample_img, global_step=epoch)
			summary.add_image(tag='sample-random-amortized', img_tensor=random_sample_img, global_step=epoch)

		summary.close()
Example #4
0
def infer(args):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # prepare model
    if args.model_type == "GLU_Transformer":
        model = GLU_Transformer(phone_size=args.phone_size,
                                embed_size=args.embedding_size,
                                hidden_size=args.hidden_size,
                                glu_num_layers=args.glu_num_layers,
                                dropout=args.dropout,
                                output_dim=args.feat_dim,
                                dec_nhead=args.dec_nhead,
                                n_mels=args.n_mels,
                                local_gaussian=args.local_gaussian,
                                dec_num_block=args.dec_num_block)
    elif args.model_type == "PureTransformer":
        model = TransformerSVS(
            phone_size=args.phone_size,
            embed_size=args.embedding_size,
            hidden_size=args.hidden_size,
            glu_num_layers=args.glu_num_layers,
            dropout=args.dropout,
            output_dim=args.feat_dim,
            dec_nhead=args.dec_nhead,
            dec_num_block=args.dec_num_block,
            n_mels=args.n_mels,
            local_gaussian=args.local_gaussian,
        )
    elif args.model_type == "PureTransformer_norm":
        model = TransformerSVS_norm(
            stats_file=args.stats_file,
            stats_mel_file=args.stats_mel_file,
            phone_size=args.phone_size,
            embed_size=args.embedding_size,
            hidden_size=args.hidden_size,
            glu_num_layers=args.glu_num_layers,
            dropout=args.dropout,
            output_dim=args.feat_dim,
            dec_nhead=args.dec_nhead,
            dec_num_block=args.dec_num_block,
            n_mels=args.n_mels,
            local_gaussian=args.local_gaussian,
        )
    elif args.model_type == "GLU_Transformer_norm":
        model = GLU_TransformerSVS_norm(
            stats_file=args.stats_file,
            stats_mel_file=args.stats_mel_file,
            phone_size=args.phone_size,
            embed_size=args.embedding_size,
            hidden_size=args.hidden_size,
            glu_num_layers=args.glu_num_layers,
            dropout=args.dropout,
            output_dim=args.feat_dim,
            dec_nhead=args.dec_nhead,
            dec_num_block=args.dec_num_block,
            n_mels=args.n_mels,
            local_gaussian=args.local_gaussian,
        )

    else:
        raise ValueError('Not Support Model Type %s' % args.model_type)

    # Load model weights
    print("Loading pretrained weights from {}".format(args.model_file))
    checkpoint = torch.load(args.model_file, map_location=device)
    state_dict = checkpoint['state_dict']
    model_dict = model.state_dict()
    state_dict_new = {}
    para_list = []
    for k, v in state_dict.items():
        assert k in model_dict
        if model_dict[k].size() == state_dict[k].size():
            state_dict_new[k] = v
        else:
            para_list.append(k)

    print("Total {} parameters, loaded {} parameters".format(
        len(state_dict), len(state_dict_new)))

    if len(para_list) > 0:
        print("Not loading {} because of different sizes".format(
            ", ".join(para_list)))
    model_dict.update(state_dict_new)
    model.load_state_dict(model_dict)
    print("Loaded checkpoint {}".format(args.model_file))
    model = model.to(device)
    model.eval()

    # Decode
    test_set = SVSDataset(align_root_path=args.test_align,
                          pitch_beat_root_path=args.test_pitch,
                          wav_root_path=args.test_wav,
                          char_max_len=args.char_max_len,
                          max_len=args.num_frames,
                          sr=args.sampling_rate,
                          preemphasis=args.preemphasis,
                          nfft=args.nfft,
                          frame_shift=args.frame_shift,
                          frame_length=args.frame_length,
                          n_mels=args.n_mels,
                          power=args.power,
                          max_db=args.max_db,
                          ref_db=args.ref_db,
                          standard=args.standard,
                          sing_quality=args.sing_quality)
    collate_fn_svs = SVSCollator(args.num_frames, args.char_max_len,
                                 args.use_asr_post, args.phone_size)
    test_loader = torch.utils.data.DataLoader(dataset=test_set,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=args.num_workers,
                                              collate_fn=collate_fn_svs,
                                              pin_memory=True)

    if args.loss == "l1":
        loss = MaskedLoss("l1")
    elif args.loss == "mse":
        loss = MaskedLoss("mse")
    else:
        raise ValueError("Not Support Loss Type")

    losses = AverageMeter()
    spec_losses = AverageMeter()
    if args.perceptual_loss > 0:
        pe_losses = AverageMeter()
    if args.n_mels > 0:
        mel_losses = AverageMeter()

    if not os.path.exists(args.prediction_path):
        os.makedirs(args.prediction_path)

    for step, (phone, beat, pitch, spec, real, imag, length, chars,
               char_len_list) in enumerate(test_loader, 1):
        if step >= args.decode_sample:
            break
        phone = phone.to(device)
        beat = beat.to(device)
        pitch = pitch.to(device).float()
        spec = spec.to(device).float()
        mel = mel.to(device).float()
        real = real.to(device).float()
        imag = imag.to(device).float()
        length_mask = length.unsqueeze(2)
        length_mel_mask = length_mask.repeat(1, 1, mel.shape[2]).float()
        length_mask = length_mask.repeat(1, 1, spec.shape[2]).float()
        length_mask = length_mask.to(device)
        length_mel_mask = length_mel_mask.to(device)
        length = length.to(device)
        char_len_list = char_len_list.to(device)

        if not args.use_asr_post:
            chars = chars.to(device)
            char_len_list = char_len_list.to(device)
        else:
            phone = phone.float()

        if args.model_type == "GLU_Transformer":
            output, att, output_mel = model(chars,
                                            phone,
                                            pitch,
                                            beat,
                                            pos_char=char_len_list,
                                            pos_spec=length)
        elif args.model_type == "LSTM":
            output, hidden, output_mel = model(phone, pitch, beat)
            att = None
        elif args.model_type == "PureTransformer":
            output, att, output_mel = model(chars,
                                            phone,
                                            pitch,
                                            beat,
                                            pos_char=char_len_list,
                                            pos_spec=length)
        elif args.model_type in ("PureTransformer_norm",
                                 "GLU_Transformer_norm"):
            output, att, output_mel, spec_norm, mel_norm = model(
                spec,
                mel,
                chars,
                phone,
                pitch,
                beat,
                pos_char=char_len_list,
                pos_spec=length)
            output, _ = model.normalizer.inverse(output)

        if args.normalize:
            global_normalizer = GlobalMVN(args.stats_file)
            output, _ = global_normalizer.inverse(output, length)
        spec_loss = criterion(output, spec, length_mask)
        if args.n_mels > 0:
            mel_loss = criterion(
                output_mel, mel,
                length_mel_mask)  # FIX ME here, mel_loss is recover version
        else:
            mel_loss = 0

        final_loss = mel_loss + spec_loss

        losses.update(train_loss.item(), phone.size(0))
        spec_losses.update(spec_loss.item(), phone.size(0))
        if args.n_mels > 0:
            mel_losses.update(mel_loss.item(), phone.size(0))

        if step % 1 == 0:
            log_figure(step, output, spec, att, length, args.prediction_path,
                       args)
    print("loss avg for test is {}".format(losses.avg))
Example #5
0
    def train(self, imgs, classes, model_dir, tensorboard_dir):
        imgs = torch.from_numpy(imgs).permute(0, 3, 1, 2)
        class_ids = torch.from_numpy(classes.astype(int))
        img_ids = torch.arange(imgs.shape[0])

        tensor_dataset = TensorDataset(imgs, img_ids, class_ids)
        data_loader = DataLoader(tensor_dataset,
                                 batch_size=self.config['train']['batch_size'],
                                 shuffle=True,
                                 sampler=None,
                                 batch_sampler=None,
                                 num_workers=1,
                                 pin_memory=True,
                                 drop_last=True)

        self.model.init()
        self.model.to(self.device)

        criterion = VGGDistance(self.config['perceptual_loss']['layers']).to(
            self.device)

        optimizer = Adam(
            [{
                'params': self.model.generator.parameters(),
                'lr': self.config['train']['learning_rate']['generator']
            }, {
                'params': self.model.modulation.parameters(),
                'lr': self.config['train']['learning_rate']['generator']
            }, {
                'params': self.model.embeddings.parameters(),
                'lr': self.config['train']['learning_rate']['latent']
            }],
            betas=(0.5, 0.999))

        scheduler = CosineAnnealingLR(
            optimizer,
            T_max=self.config['train']['n_epochs'] * len(data_loader),
            eta_min=self.config['train']['learning_rate']['min'])

        with SummaryWriter(
                log_dir=os.path.join(tensorboard_dir, 'stage1')) as summary:
            train_loss = AverageMeter()
            for epoch in range(1, self.config['train']['n_epochs'] + 1):
                self.model.train()
                train_loss.reset()

                with tqdm(iterable=data_loader) as pbar:
                    for batch in pbar:
                        batch_imgs, batch_img_ids, batch_class_ids = (
                            tensor.to(self.device) for tensor in batch)
                        generated_imgs, batch_content_codes, batch_class_codes = self.model(
                            batch_img_ids, batch_class_ids)

                        optimizer.zero_grad()

                        content_penalty = torch.sum(batch_content_codes**2,
                                                    dim=1).mean()
                        loss = criterion(
                            generated_imgs, batch_imgs
                        ) + self.config['content_decay'] * content_penalty
                        loss.backward()

                        optimizer.step()
                        scheduler.step()

                        train_loss.update(loss.item())
                        pbar.set_description_str('epoch #{}'.format(epoch))
                        pbar.set_postfix(loss=train_loss.avg)

                torch.save(self.model.generator.state_dict(),
                           os.path.join(model_dir, 'generator.pth'))
                torch.save(self.model.embeddings.state_dict(),
                           os.path.join(model_dir, 'embeddings.pth'))
                torch.save(self.model.modulation.state_dict(),
                           os.path.join(model_dir, 'class_modulation.pth'))

                self.model.eval()
                fixed_sample_img = self.evaluate(imgs,
                                                 img_ids,
                                                 class_ids,
                                                 randomized=False)
                random_sample_img = self.evaluate(imgs,
                                                  img_ids,
                                                  class_ids,
                                                  randomized=True)

                summary.add_scalar(tag='loss',
                                   scalar_value=train_loss.avg,
                                   global_step=epoch)
                summary.add_image(tag='sample-fixed',
                                  img_tensor=fixed_sample_img,
                                  global_step=epoch)
                summary.add_image(tag='sample-random',
                                  img_tensor=random_sample_img,
                                  global_step=epoch)
Example #6
0
    def train_encoders(self, imgs, classes, model_dir, tensorboard_dir):
        imgs = torch.from_numpy(imgs).permute(0, 3, 1, 2)
        class_ids = torch.from_numpy(classes.astype(int))
        img_ids = torch.arange(imgs.shape[0])

        tensor_dataset = TensorDataset(imgs, img_ids, class_ids)
        data_loader = DataLoader(
            tensor_dataset,
            batch_size=self.config['train_encoders']['batch_size'],
            shuffle=True,
            sampler=None,
            batch_sampler=None,
            num_workers=1,
            pin_memory=True,
            drop_last=True)

        self.embeddings = LordEmbeddings(self.config)
        self.modulation = LordModulation(self.config)
        self.encoders = LordEncoders(self.config)
        self.generator = LordGenerator(self.config)
        self.embeddings.load_state_dict(
            torch.load(os.path.join(model_dir, 'embeddings.pth')))
        self.modulation.load_state_dict(
            torch.load(os.path.join(model_dir, 'class_modulation.pth')))
        self.generator.load_state_dict(
            torch.load(os.path.join(model_dir, 'generator.pth')))
        self.encoders.init()

        self.model = LordStage2(self.encoders, self.modulation, self.generator)

        self.model.to(self.device)
        self.embeddings.to(self.device)

        criterion = VGGDistance(self.config['perceptual_loss']['layers']).to(
            self.device)

        optimizer = Adam([{
            'params': self.model.encoders.parameters(),
            'lr': self.config['train_encoders']['learning_rate']
        }, {
            'params': self.model.modulation.parameters(),
            'lr': self.config['train_encoders']['learning_rate']
        }, {
            'params': self.model.generator.parameters(),
            'lr': self.config['train_encoders']['learning_rate']
        }],
                         betas=(0.5, 0.999))

        scheduler = ReduceLROnPlateau(optimizer,
                                      mode='min',
                                      factor=0.5,
                                      patience=20,
                                      verbose=1)

        with SummaryWriter(
                log_dir=os.path.join(tensorboard_dir, 'stage2')) as summary:
            train_loss = AverageMeter()
            for epoch in range(1, self.config['train']['n_epochs'] + 1):
                self.model.train()
                train_loss.reset()

                with tqdm(iterable=data_loader) as pbar:
                    for batch in pbar:
                        batch_imgs, batch_img_ids, batch_class_ids = (
                            tensor.to(self.device) for tensor in batch)
                        batch_content_codes, batch_class_codes = self.embeddings(
                            batch_img_ids, batch_class_ids)

                        generated_imgs, predicted_content_codes, predicted_class_codes = self.model(
                            batch_imgs)

                        optimizer.zero_grad()

                        perc_loss = criterion(generated_imgs, batch_imgs)
                        loss_content = F.mse_loss(batch_content_codes,
                                                  predicted_content_codes)
                        loss_class = F.mse_loss(batch_class_codes,
                                                predicted_class_codes)
                        loss = perc_loss + 10 * loss_content + 10 * loss_class
                        loss.backward()

                        optimizer.step()

                        train_loss.update(loss.item())
                        pbar.set_description_str('epoch #{}'.format(epoch))
                        pbar.set_postfix(loss=train_loss.avg)

                torch.save(self.model.encoders.state_dict(),
                           os.path.join(model_dir, 'encoders.pth'))
                torch.save(self.model.generator.state_dict(),
                           os.path.join(model_dir, 'generator.pth'))
                torch.save(self.model.modulation.state_dict(),
                           os.path.join(model_dir, 'class_modulation.pth'))

                scheduler.step(train_loss.avg)

                self.model.eval()
                fixed_sample_img = self.encoder_evaluate(imgs,
                                                         randomized=False)
                random_sample_img = self.encoder_evaluate(imgs,
                                                          randomized=True)

                summary.add_scalar(tag='loss',
                                   scalar_value=train_loss.avg,
                                   global_step=epoch)
                summary.add_image(tag='sample-fixed',
                                  img_tensor=fixed_sample_img,
                                  global_step=epoch)
                summary.add_image(tag='sample-random',
                                  img_tensor=random_sample_img,
                                  global_step=epoch)
Example #7
0
def train_epoch(epoch, summary, summary_writer, model, loss_fn, optimizer,
                dataloader_train, cfg):
    model.train()
    num_classes = cfg['num_classes']
    train_loss = AverageMeter()
    train_acc = AverageMeter()

    steps = len(dataloader_train)
    batch_size = dataloader_train.batch_size

    dataiter = iter(dataloader_train)
    time_now = time.time()
    loss_sum = 0
    acc_sum = 0

    summary['epoch'] = epoch

    if args.local_rank == 0:
        print("steps:", steps)
    prefetcher = data_prefetcher(dataiter)
    img, target = prefetcher.next()
    for step in range(steps):
        data = img.to(device)
        target = target.to(device)

        output = model(data)
        output = F.relu(output)
        output = output.view(img.size(0), num_classes)
        target = target.view(img.size(0), num_classes)
        # print(output[:, 1].shape)
        # output = output.view(int(batch_size))
        loss = loss_fn(output[:, 0], target[:, 0]) + \
            0.5*loss_fn(output[:, 1], target[:, 1])

        optimizer.zero_grad()
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        # loss.backward()
        optimizer.step()
        torch.cuda.synchronize()
        # scheduler.step()
        # lr = scheduler.get_last_lr()[0]
        target_class = ((target[:, 0]) * 3 >= 15)
        predicts_class = ((output[:, 0]) * 3 >= 15)

        acc = (predicts_class == target_class).type(
            torch.cuda.FloatTensor).sum() * 1.0 / img.size(0)

        r2 = r2_score(target.cpu().detach().numpy(),
                      output.cpu().detach().numpy())

        reduced_loss = reduce_tensor(loss.data)
        reduced_acc = reduce_tensor(acc.data)

        train_loss.update(to_python_float(reduced_loss))
        train_acc.update(to_python_float(reduced_acc))

        if args.local_rank == 0:
            time_spent = time.time() - time_now
            time_now = time.time()

            logging.info('Epoch : {}, Step : {}, Training Loss : {:.5f}, '
                         'R2 : {:.3f}, Acc : {:.3f}, Run Time : {:.2f}'.format(
                             summary['epoch'], summary['step'], train_loss.avg,
                             r2, reduced_acc, time_spent))

            summary['step'] += 1

        img, target = prefetcher.next()

    if args.local_rank == 0:
        time_spent = time.time() - time_now
        time_now = time.time()
        summary_writer.add_scalar('train/loss', train_loss.avg, epoch)
        summary_writer.add_scalar('train/R2', r2, epoch)
        summary_writer.add_scalar('train/Acc', train_acc.avg, epoch)
        # summary_writer.add_scalar(
        #     'learning_rate', lr, summary['step'] + steps*epoch)
        summary['epoch'] = epoch
        summary_writer.flush()

    return summary
Example #8
0
def train_epoch(epoch, summary, summary_writer, model, loss_fn, optimizer, dataloader_train, cfg):
    model.train()
    num_classes = cfg['num_classes']
    class_point = cfg['class_point']

    train_loss = AverageMeter()
    train_acc = AverageMeter()
    confusion_matrix = ConfusionMatrix(num_classes=num_classes)

    steps = len(dataloader_train)
    batch_size = dataloader_train.batch_size

    dataiter = iter(dataloader_train)
    time_now = time.time()
    loss_sum = 0
    acc_sum = 0

    summary['epoch'] = epoch

    if args.local_rank == 0:
        print("steps:", steps)
    prefetcher = data_prefetcher(dataiter)
    img, target, mask = prefetcher.next()
    for step in range(steps):
        data = img.to(device)
        target = target.to(device)

        # # mixup
        # # generate mixed inputs, two one-hot label vectors and mixing coefficient
        # data, target_a, target_b, lam = mixup_data(
        #     data, target, args.alpha, use_cuda)
        # print(data.shape)
        output = model(data)
        output = output.view(int(batch_size), num_classes)
        target = target.view(int(batch_size))
        mask = mask.view(int(batch_size))
        # target = target.long()

        conf_targets = target[mask]
        conf_preds = output[mask]
        # print("conf_preds", conf_preds.shape)
        loss = loss_fn(conf_preds, conf_targets)
        # loss = loss_func(loss_fn, output)

        optimizer.zero_grad()
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        # loss.backward()
        optimizer.step()
        torch.cuda.synchronize()
        # scheduler.step()
        # lr = scheduler.get_last_lr()[0]
        probs = F.softmax(output, dim=1)
        # torch.max(a,1) 返回每一行中最大值的那个元素FloatTensor,且返回其索引LongTensor(返回最大元素在这一行的列索引)
        _, predicts = torch.max(probs, 1)

        # target = (target >= class_point).long()

        acc = (predicts[mask] == conf_targets).type(
            torch.cuda.FloatTensor).sum() * 1.0 / conf_targets.size(0)
        for t in range(num_classes):
            for p in range(num_classes):
                count = (predicts[mask][conf_targets == t] == p).type(
                    torch.cuda.FloatTensor).sum()
                reduced_count = reduce_tensor(count.data, reduction=False)

                confusion_matrix.update(t, p, to_python_float(reduced_count))

        reduced_loss = reduce_tensor(loss.data)
        reduced_acc = reduce_tensor(acc.data)

        train_loss.update(to_python_float(reduced_loss))
        train_acc.update(to_python_float(reduced_acc))

        if args.local_rank == 0:
            time_spent = time.time() - time_now
            time_now = time.time()

            logging.info(
                'Epoch : {}, Step : {}, Training Loss : {:.5f}, '
                'Training Acc : {:.3f}, Run Time : {:.2f}'
                .format(
                    summary['epoch'] + 1,
                    summary['step'] + 1, train_loss.avg, train_acc.avg, time_spent))

            summary['step'] += 1

        img, target, mask = prefetcher.next()

    if args.local_rank == 0:
        time_spent = time.time() - time_now
        time_now = time.time()
        summary_writer.add_scalar(
            'train/loss', train_loss.val,  epoch)
        summary_writer.add_scalar(
            'train/acc', train_acc.val, epoch)
        # summary_writer.add_scalar(
        #     'learning_rate', lr, summary['step'] + steps*epoch)
        summary_writer.flush()
        summary['confusion_matrix'] = plot_confusion_matrix(
            confusion_matrix.matrix,
            cfg['labels'],
            tensor_name='train/Confusion matrix')
        # summary['loss'] = train_loss.avg
        # summary['acc'] = acc_sum / (steps * (batch_size))
        # summary['acc'] = train_acc.avg
        summary['epoch'] = epoch

    return summary
Example #9
0
def train(train_data, valid_data, model, optimizers, schedulers, epoch, args,
          logger, summary_writer):
    ce_loss_meter = AverageMeter()
    accuracy_meter = AverageMeter()
    entropy_meter = AverageMeter()
    n_entropy_meter = AverageMeter()
    prob_ratio_meter = AverageMeter()

    device = args.gpu_id
    model.train()
    global best_val_accuracy
    with tqdm(total=len(train_data), desc=f"Train Epoch #{epoch+1}") as t:
        for batch_idx, (labels, premises, p_mask, hypotheses,
                        h_mask) in enumerate(train_data):
            if torch.cuda.is_available():
                labels = labels.to(device=device)
                premises = premises.to(device=device)
                p_mask = p_mask.to(device=device)
                hypotheses = hypotheses.to(device=device)
                h_mask = h_mask.to(device=device)
            pred_labels, ce_loss, rewards, actions, actions_log_prob, entropy, normalized_entropy = model(
                premises, p_mask, hypotheses, h_mask, labels)
            ce_loss.backward()
            optimizers["environment"].step()
            optimizers["environment"].zero_grad()
            for k in range(args.ppo_updates):
                if k == 0:
                    new_normalized_entropy, new_actions_log_prob = normalized_entropy, actions_log_prob
                else:
                    new_normalized_entropy, new_actions_log_prob = model.evaluate_actions(
                        premises, p_mask, actions["p_actions"], hypotheses,
                        h_mask, actions["h_actions"])
                prob_ratio = (new_actions_log_prob -
                              actions_log_prob.detach()).exp()
                clamped_prob_ratio = prob_ratio.clamp(1.0 - args.epsilon,
                                                      1.0 + args.epsilon)
                ppo_loss = torch.max(prob_ratio * rewards,
                                     clamped_prob_ratio * rewards).mean()
                loss = ppo_loss - args.entropy_weight * new_normalized_entropy.mean(
                )
                loss.backward()
                optimizers["policy"].step()
                optimizers["policy"].zero_grad()
            entropy = entropy.mean()
            normalized_entropy = normalized_entropy.mean()
            n = p_mask.shape[0]
            accuracy = (labels == pred_labels).to(dtype=torch.float32).mean()
            accuracy_meter.update(accuracy.item(), n)
            ce_loss_meter.update(ce_loss.item(), n)
            entropy_meter.update(entropy.item(), n)
            n_entropy_meter.update(normalized_entropy.item(), n)
            prob_ratio_meter.update(
                (1.0 - prob_ratio.detach()).abs().mean().item(), n)

            global global_step
            summary_writer["train"].add_scalar(tag="ce",
                                               scalar_value=ce_loss.item(),
                                               global_step=global_step)
            summary_writer["train"].add_scalar(tag="accuracy",
                                               scalar_value=accuracy.item(),
                                               global_step=global_step)
            summary_writer["train"].add_scalar(
                tag="n_entropy",
                scalar_value=normalized_entropy.item(),
                global_step=global_step)
            summary_writer["train"].add_scalar(
                tag="prob_ratio",
                scalar_value=prob_ratio_meter.value,
                global_step=global_step)

            global_step += 1

            if (batch_idx + 1) % (len(train_data) // 10) == 0:
                logger.info(
                    f"Train: epoch: {epoch} batch_idx: {batch_idx + 1} ce_loss: {ce_loss_meter.avg:.4f} "
                    f"accuracy: {accuracy_meter.avg:.4f} entropy: {entropy_meter.avg:.4f} "
                    f"n_entropy: {n_entropy_meter.avg:.4f}")
                new_val_accuracy = validate(valid_data, model, epoch, device,
                                            logger, summary_writer)
                # TODO(siyu) how scheduler works
                schedulers["environment"].step(new_val_accuracy)
                schedulers["policy"].step(new_val_accuracy)
                global best_model_path, best_val_accuracy
                if new_val_accuracy > best_val_accuracy:
                    best_model_path = f"{args.model_dir}/{epoch}-{batch_idx}.mdl"
                    logger.info("saving model to" + best_model_path)
                    torch.save(
                        {
                            "epoch": epoch,
                            "batch_idx": batch_idx,
                            "state_dict": model.state_dict()
                        }, best_model_path)
                    best_val_accuracy = new_val_accuracy

            t.set_postfix({
                'loss': ce_loss_meter.avg,
                'accuracy': 100. * accuracy_meter.avg
            })
            # 'env_lr': schedulers["environment"].get_lr(),
            # 'policy_lr': schedulers["policy"].get_lr()})
            t.update(1)
Example #10
0
def validate(valid_data, model, epoch, device, logger, summary_writer):
    model.eval()
    ce_loss_meter = AverageMeter()
    accuracy_meter = AverageMeter()
    entropy_meter = AverageMeter()
    n_entropy_meter = AverageMeter()

    with torch.no_grad():
        for labels, premises, p_mask, hypotheses, h_mask in valid_data:
            if torch.cuda.is_available():
                labels = labels.to(device=device)
                premises = premises.to(device=device)
                p_mask = p_mask.to(device=device)
                hypotheses = hypotheses.to(device=device)
                h_mask = h_mask.to(device=device)

            pred_labels, ce_loss, rewards, actions, actions_log_prob, entropy, normalized_entropy = model(
                premises, p_mask, hypotheses, h_mask, labels)
            entropy = entropy.mean()
            normalized_entropy = normalized_entropy.mean()
            n = p_mask.shape[0]
            accuracy = (labels == pred_labels).to(dtype=torch.float32).mean()
            accuracy_meter.update(accuracy.item(), n)
            ce_loss_meter.update(ce_loss.item(), n)
            entropy_meter.update(entropy.item(), n)
            n_entropy_meter.update(normalized_entropy.item(), n)
    logger.info(
        f"Valid: epoch: {epoch} ce_loss: {ce_loss_meter.avg:.4f} accuracy: {accuracy_meter.avg:.4f} "
        f"entropy: {entropy_meter.avg:.4f} n_entropy: {n_entropy_meter.avg:.4f} "
    )
    summary_writer["valid"].add_scalar(tag="ce",
                                       scalar_value=ce_loss_meter.avg,
                                       global_step=global_step)
    summary_writer["valid"].add_scalar(tag="accuracy",
                                       scalar_value=accuracy_meter.avg,
                                       global_step=global_step)
    summary_writer["valid"].add_scalar(tag="n_entropy",
                                       scalar_value=n_entropy_meter.avg,
                                       global_step=global_step)

    model.train()
    return accuracy_meter.avg
Example #11
0
def test(test_data, model, epoch, device, logger):
    model.eval()
    ce_loss_meter = AverageMeter()
    accuracy_meter = AverageMeter()
    entropy_meter = AverageMeter()
    n_entropy_meter = AverageMeter()

    with torch.no_grad():
        for labels, premises, p_mask, hypothese, h_mask in test_data:
            if torch.cuda.is_available():
                labels = labels.to(device=device)
                premises = premises.to(device=device)
                p_mask = p_mask.to(device=device)
                hypotheses = hypotheses.to(device=device)
                h_mask = h_mask.to(device=device)
            pred_labels, ce_loss, rewards, actions, actions_log_prob, entropy, normalized_entropy = model(
                premises, p_mask, hypotheses, h_mask, labels)
            entropy = entropy.mean()
            normalized_entropy = normalized_entropy.mean()
            accuracy = (labels == pred_labels).to(dtype=torch.float32).mean()
            n = p_mask.shape[0]
            accuracy_meter.update(accuracy.item(), n)
            ce_loss_meter.update(ce_loss.item(), n)
            entropy_meter.update(entropy.item(), n)
            n_entropy_meter.update(normalized_entropy.item(), n)

    logger.info(
        f"Test: ce_loss: {ce_loss_meter.avg:.4f} accuracy: {accuracy_meter.avg:.4f} "
        f"entropy: {entropy_meter.avg:.4f} n_entropy: {n_entropy_meter.avg:.4f} "
    )
    return accuracy_meter.avg
Example #12
0
def valid_epoch(summary, summary_writer, epoch, model, loss_fn, dataloader_valid, cfg):
    logger = log.logger()
    model.eval()
    num_classes = cfg['num_classes']
    class_point = cfg['class_point']

    eval_loss = AverageMeter()
    eval_acc = AverageMeter()
    eval_pred_posit = AverageMeter()
    eval_label_posit = AverageMeter()
    confusion_matrix = ConfusionMatrix(num_classes=(num_classes)+1)

    dataloader = [dataloader_valid]

    name = cfg['labels']

    time_now = time.time()
    loss_sum = 0
    acc_sum = 0
    count = 0
    steps_count = 0
    for i in range(len(dataloader)):
        steps = len(dataloader[i])
        batch_size = dataloader[i].batch_size
        dataiter = iter(dataloader[i])
        # 使用 torch,no_grad()构建不需要track的上下文环境
        with torch.no_grad():
            acc_tmp = 0
            loss_tmp = 0
            prefetcher = data_prefetcher(dataiter)
            img, target, label, label_degree = prefetcher.next()

            for step in range(steps):
                # data, target = next(dataiter)
                data = img.to(device)
                target = target.to(device)

                output = model(data)
                output = output.view(img.size(0), num_classes)
                target = target.view(img.size(0), num_classes)
                label = label.view(img.size(0))

                conf_preds = torch.sigmoid(output)
                # print("conf_preds", conf_preds.shape)
                loss = loss_fn(conf_preds, target)

                torch.cuda.synchronize()

                predicts = (conf_preds >= 0.5)
                d = torch.Tensor([0] * img.size(0)
                                 ).reshape(-1, 1).to(device)
                predicts = torch.cat((d, predicts.float()), 1)
                logger.get_info(predicts)
                # _, predicts = torch.max(predicts, 1)
                predicts = MaxIndex(predicts, batch_size)
                # logger.get_info(predicts)


                acc = (predicts == label).type(
                    torch.cuda.FloatTensor).sum() * 1.0 / img.size(0)
                recall_pred = (predicts[label_degree >= 20] > 1).type(
                    torch.cuda.FloatTensor).sum() * 1.0
                recall_label = (label_degree >= 20).sum()

                for t in range(num_classes+1):
                    for p in range(num_classes+1):
                        count = (predicts[label == t] == p).type(
                            torch.cuda.FloatTensor).sum()
                        reduced_count = reduce_tensor(
                            count.data, reduction=False)

                        confusion_matrix.update(t, p,
                                                to_python_float(reduced_count))

                reduced_loss = reduce_tensor(loss.data)
                reduced_acc = reduce_tensor(acc.data)
                reduced_pred_20 = reduce_tensor(recall_pred.data)
                reduced_label_20 = reduce_tensor(recall_label)

                eval_loss.update(to_python_float(reduced_loss))
                eval_acc.update(to_python_float(reduced_acc))
                eval_pred_posit.update(to_python_float(reduced_pred_20))
                eval_label_posit.update(to_python_float(reduced_label_20))

                if args.local_rank == 0:
                    time_spent = time.time() - time_now
                    time_now = time.time()
                    logging.info(
                        'data_num : {}, Step : {}, Testing Loss : {:.5f}, '
                        'Testing Acc : {:.3f}, Run Time : {:.2f}'
                        .format(
                            str(i),
                            summary['step'] + 1, reduced_loss, reduced_acc, time_spent))
                    summary['step'] += 1

                img, target, label, label_degree = prefetcher.next()

    if args.local_rank == 0:
        recall = eval_pred_posit.sum/float(eval_label_posit.sum)
        summary['confusion_matrix'] = plot_confusion_matrix(
            confusion_matrix.matrix,
            cfg['labels'],
            tensor_name='Confusion matrix')
        summary['loss'] = eval_loss.avg
        summary['recall'] = recall
        # summary['acc'] = acc_sum / (steps * (batch_size))
        summary['acc'] = eval_acc.avg
        print("Recall >=20:", recall)
    return summary
Example #13
0
def train_epoch(epoch, summary, summary_writer, model, loss_fn, optimizer, dataloader_train, cfg):
    # logger = log.logger()
    model.train()
    num_classes = cfg['num_classes']
    class_point = cfg['class_point']

    train_loss = AverageMeter()
    train_acc = AverageMeter()
    train_pred_posit = AverageMeter()
    train_label_posit = AverageMeter()

    confusion_matrix = ConfusionMatrix(num_classes=(num_classes)+1)

    steps = len(dataloader_train)
    batch_size = dataloader_train.batch_size

    dataiter = iter(dataloader_train)
    time_now = time.time()
    loss_sum = 0
    acc_sum = 0

    summary['epoch'] = epoch

    if args.local_rank == 0:
        print("steps:", steps)
    prefetcher = data_prefetcher(dataiter)
    img, target, label, label_degree = prefetcher.next()
    for step in range(steps):
        # logger.get_info('...........'+'step' + str(step) + '............')
        data = img.to(device)
        target = target.to(device)

        # # mixup
        # # generate mixed inputs, two one-hot label vectors and mixing coefficient
        # data, target_a, target_b, lam = mixup_data(
        #     data, target, args.alpha, use_cuda)
        # print(data.shape)
        output = model(data)
        output = output.view(int(batch_size), num_classes)
        target = target.view(int(batch_size), num_classes)
        label = label.view(int(batch_size))
        # target = target.long()
        conf_preds = torch.sigmoid(output)

        # print("conf_preds", conf_preds.shape)
        loss = loss_fn(conf_preds, target)

        optimizer.zero_grad()
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        # loss.backward()
        optimizer.step()
        torch.cuda.synchronize()
        # scheduler.step()
        # lr = scheduler.get_last_lr()[0]
        # print(conf_preds.shape)
        # torch.max(a,1) 返回每一行中最大值的那个元素FloatTensor,且返回其索引LongTensor(返回最大元素在这一行的列索引)
        predicts = (conf_preds >= 0.5)
        d = torch.Tensor([0] * int(batch_size)).reshape(-1, 1).to(device)
        predicts = torch.cat((d, predicts.float()), 1)
        # logger.get_info(predicts)
        predicts = MaxIndex(predicts, batch_size)
        # logger.get_info(predicts)

        # target = (target >= class_point).long()

        acc = (predicts == label).type(
            torch.cuda.FloatTensor).sum() * 1.0 / label.size(0)

        # print(type(predicts), predicts[label_degree >= 20])

        recall_pred = (predicts[label_degree >= 20] > 1).type(
            torch.cuda.FloatTensor).sum() * 1.0
        recall_label = (label_degree >= 20).sum()
        # print('recall_pred : {}, recall_label : {}'.format(recall_pred, recall_label))

        for t in range(num_classes+1):
            for p in range(num_classes+1):
                count = (predicts[label == t] == p).type(
                    torch.cuda.FloatTensor).sum()
                reduced_count = reduce_tensor(count.data, reduction=False)

                confusion_matrix.update(t, p, to_python_float(reduced_count))

        reduced_loss = reduce_tensor(loss.data)
        reduced_acc = reduce_tensor(acc.data)
        reduced_pred_20 = reduce_tensor(recall_pred.data)
        reduced_label_20 = reduce_tensor(recall_label)

        train_loss.update(to_python_float(reduced_loss))
        train_acc.update(to_python_float(reduced_acc))
        train_pred_posit.update(to_python_float(reduced_pred_20))
        train_label_posit.update(to_python_float(reduced_label_20))

        if args.local_rank == 0:
            time_spent = time.time() - time_now
            time_now = time.time()

            logging.info(
                'Epoch : {}, Step : {}, Training Loss : {:.5f}, '
                'Training Acc : {:.3f}, Run Time : {:.2f}'
                .format(
                    summary['epoch'] + 1,
                    summary['step'] + 1, train_loss.avg, train_acc.avg, time_spent))

            summary['step'] += 1

        img, target, label, label_degree = prefetcher.next()

    if args.local_rank == 0:
        time_spent = time.time() - time_now
        time_now = time.time()
        recall = train_pred_posit.sum/float(train_label_posit.sum)
        summary_writer.add_scalar(
            'train/loss', train_loss.val,  epoch)
        summary_writer.add_scalar(
            'train/acc', train_acc.val, epoch)
        summary_writer.add_scalar('train/recall', recall, epoch)
        # summary_writer.add_scalar(
        #     'learning_rate', lr, summary['step'] + steps*epoch)
        summary_writer.flush()
        summary['confusion_matrix'] = plot_confusion_matrix(
            confusion_matrix.matrix,
            cfg['labels'],
            tensor_name='train/Confusion matrix')
        # summary['loss'] = train_loss.avg
        # summary['acc'] = acc_sum / (steps * (batch_size))
        # summary['acc'] = train_acc.avg
        summary['epoch'] = epoch
        print("Recall >=20:", recall)
    return summary
Example #14
0
def infer(args):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    # prepare model
    if args.model_type == "GLU_Transformer":
        model = GLU_Transformer(phone_size=args.phone_size,
                                embed_size=args.embedding_size,
                                hidden_size=args.hidden_size,
                                glu_num_layers=args.glu_num_layers,
                                dropout=args.dropout,
                                output_dim=args.feat_dim,
                                dec_nhead=args.dec_nhead,
                                dec_num_block=args.dec_num_block)
    else:
        raise ValueError('Not Support Model Type %s' % args.model_type)


    # Load model weights
    print("Loading pretrained weights from {}".format(args.model_file))
    checkpoint = torch.load(args.model_file, map_location=device)
    state_dict = checkpoint['state_dict']
    model_dict = model.state_dict()
    state_dict_new = {}
    para_list = []
    for k, v in state_dict.items():
        assert k in model_dict
        if model_dict[k].size() == state_dict[k].size():
            state_dict_new[k] = v
        else:
            para_list.append(k)

    print("Total {} parameters, loaded {} parameters".format(len(state_dict), len(state_dict_new)))

    if len(para_list) > 0:
        print("Not loading {} because of different sizes".format(", ".join(para_list)))
    model_dict.update(state_dict_new)
    model.load_state_dict(model_dict)
    print("Loaded checkpoint {}".format(args.model_file))
    model = model.to(device)
    model.eval()
    

    # Decode
    test_set = SVSDataset(align_root_path=args.test_align,
                           pitch_beat_root_path=args.test_pitch,
                           wav_root_path=args.test_wav,
                           char_max_len=args.char_max_len,
                           max_len=args.num_frames,
                           sr=args.sampling_rate,
                           preemphasis=args.preemphasis,
                           frame_shift=args.frame_shift,
                           frame_length=args.frame_length,
                           n_mels=args.n_mels,
                           power=args.power,
                           max_db=args.max_db,
                           ref_db=args.ref_db)
    collate_fn_svs = SVSCollator(args.num_frames, args.char_max_len)
    test_loader = torch.utils.data.DataLoader(dataset=test_set,
                                               batch_size=1,
                                               shuffle=False,
                                               num_workers=args.num_workers,
                                               collate_fn=collate_fn_svs,
                                               pin_memory=True)

    if args.loss == "l1":
        loss = MaskedLoss("l1")
    elif args.loss == "mse":
        loss = MaskedLoss("mse")
    else:
        raise ValueError("Not Support Loss Type")

    losses = AverageMeter()

    if not os.path.exists(args.prediction_path):
        os.makedirs(args.prediction_path)

    for step, (phone, beat, pitch, spec, length, chars, char_len_list) in enumerate(test_loader, 1):
        if step >= args.decode_sample:
            break
        phone = phone.to(device)
        beat = beat.to(device)
        pitch = pitch.to(device).float()
        spec = spec.to(device).float()

        chars = chars.to(device)
        length_mask = create_src_key_padding_mask(length, args.num_frames)
        length_mask = length_mask.unsqueeze(2)
        length_mask = length_mask.repeat(1, 1, spec.shape[2]).float()
        length_mask = length_mask.to(device)
        length = length.to(device)
        char_len_list = char_len_list.to(device)

        output, att = model(chars, phone, pitch, beat, src_key_padding_mask=length,
                       char_key_padding_mask=char_len_list)

        test_loss = loss(output, spec, length_mask)
        if step % 1 == 0:
            # save wav and plot spectrogram
            output = output.cpu().detach().numpy()[0]
            out_spec = spec.cpu().detach().numpy()[0]
            length = length.cpu().detach().numpy()[0]
            att = att.cpu().detach().numpy()[0]
            # np.save("output.npy", output)
            # np.save("out_spec.npy", out_spec)
            # np.save("att.npy", att)
            output = output[:length]
            out_spec = out_spec[:length]
            att = att[:, :length, :length]
            wav = spectrogram2wav(output, args.max_db, args.ref_db, args.preemphasis, args.power, args.sampling_rate, args.frame_shift, args.frame_length)
            wav_true = spectrogram2wav(out_spec, args.max_db, args.ref_db, args.preemphasis, args.power, args.sampling_rate, args.frame_shift, args.frame_length)
            write_wav(os.path.join(args.prediction_path, '{}.wav'.format(step)), wav, args.sampling_rate)
            write_wav(os.path.join(args.prediction_path, '{}_true.wav'.format(step)), wav_true, args.sampling_rate)
            plt.subplot(1, 2, 1)
            specshow(output.T)
            plt.title("prediction")
            plt.subplot(1, 2, 2)
            specshow(out_spec.T)
            plt.title("ground_truth")
            plt.savefig(os.path.join(args.prediction_path, '{}.png'.format(step)))
            plt.subplot(1, 4, 1)
            specshow(att[0])
            plt.subplot(1, 4, 2)
            specshow(att[1])
            plt.subplot(1, 4, 3)
            specshow(att[2])
            plt.subplot(1, 4, 4)
            specshow(att[3])
            plt.savefig(os.path.join(args.prediction_path, '{}_att.png'.format(step)))
        losses.update(test_loss.item(), phone.size(0))
    print("loss avg for test is {}".format(losses.avg))
Example #15
0
	def train_latent(self, imgs, classes, model_dir, tensorboard_dir, retrain=False):

		data = dict(
			img=torch.from_numpy(imgs).permute(0, 3, 1, 2),
			img_id=torch.from_numpy(np.arange(imgs.shape[0])),
			class_id=torch.from_numpy(classes.astype(np.int64))
		)

		dataset = NamedTensorDataset(data)
		data_loader = DataLoader(
			dataset, batch_size=self.config['train']['batch_size'],
			shuffle=True, sampler=None, batch_sampler=None,
			num_workers=1, pin_memory=True, drop_last=True
		)

		if not retrain:
			self.latent_model = LatentModel(self.config)
			self.latent_model.init()
		self.latent_model.to(self.device)

		criterion = VGGDistance(self.config['perceptual_loss']['layers']).to(self.device)
		# content_criterion = nn.KLDivLoss()

		optimizer = Adam([
			{
				'params': itertools.chain(self.latent_model.modulation.parameters(), self.latent_model.generator.parameters()),
				'lr': self.config['train']['learning_rate']['generator']
			},
			{
				'params': itertools.chain(self.latent_model.content_embedding.parameters(), self.latent_model.class_embedding.parameters()),
				'lr': self.config['train']['learning_rate']['latent']
			}
		], betas=(0.5, 0.999))

		scheduler = CosineAnnealingLR(
			optimizer,
			T_max=self.config['train']['n_epochs'] * len(data_loader),
			eta_min=self.config['train']['learning_rate']['min']
		)

		summary = SummaryWriter(log_dir=tensorboard_dir)

		train_loss = AverageMeter()
		for epoch in range(self.config['train']['n_epochs']):
			self.latent_model.train()
			train_loss.reset()

			pbar = tqdm(iterable=data_loader)
			for batch in pbar:
				batch = {name: tensor.to(self.device) for name, tensor in batch.items()}

				optimizer.zero_grad()
				out = self.latent_model(batch['img_id'], batch['class_id'])

				content_penalty = torch.sum(out['content_code'] ** 2, dim=1).mean()
				# content_penalty = content_criterion(out['content_code'], torch.normal(0, self.config['content_std'], size=out['content_code'].shape).to(self.device))
				loss = criterion(out['img'], batch['img']) + self.config['content_decay'] * content_penalty

				loss.backward()
				optimizer.step()
				scheduler.step()

				train_loss.update(loss.item())
				pbar.set_description_str('epoch #{}'.format(epoch))
				pbar.set_postfix(loss=train_loss.avg)

			pbar.close()
			self.save(model_dir, latent=True, amortized=False)

			summary.add_scalar(tag='loss', scalar_value=train_loss.avg, global_step=epoch)

			fixed_sample_img = self.generate_samples(dataset, randomized=False)
			random_sample_img = self.generate_samples(dataset, randomized=True)

			summary.add_image(tag='sample-fixed', img_tensor=fixed_sample_img, global_step=epoch)
			summary.add_image(tag='sample-random', img_tensor=random_sample_img, global_step=epoch)

		summary.close()
def train_model(args, model, train_loader, val_loader, logging):
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=1e-4)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.max_epoch)
    criterion = nn.MSELoss().cuda()
    reg1 = nn.L1Loss().cuda()

    def save_model(name):
        torch.save(dict(params=model.state_dict()),
                   osp.join(args.save_path1, name + '.pth'))

    trlog = {}
    trlog['args'] = vars(args)
    trlog['train_loss'] = []
    trlog['val_loss'] = []
    trlog['train_mae'] = []
    trlog['train_mse'] = []
    trlog['val_mae'] = []
    trlog['val_mse'] = []

    trlog['max_mae'] = 1000000
    trlog['max_mse'] = 1000000
    trlog['max_mae_epoch'] = 0

    trlog['max_mae_last10'] = 1000000
    trlog['max_mse_last10'] = 1000000
    trlog['max_mae_last10_epoch'] = 0

    timer = Timer()
    global_count = 0
    writer = SummaryWriter(logdir=args.save_path1)
    epoch_time = AverageMeter()

    for epoch in range(1, args.max_epoch + 1):
        epoch_start = time.time()
        model.train()
        t1 = AverageMeter()
        maes = AverageMeter()
        mses = AverageMeter()

        batch_time = AverageMeter()

        for i, batch in enumerate(train_loader, 1):
            batch_start = time.time()
            global_count = global_count + 1

            if args.model_type == 'SACANet':
                data, gt_label = batch[0].cuda(), batch[1].cuda()
                pred_map = model(data)
                pred_map = torch.squeeze(pred_map)
                gt_label = torch.squeeze(gt_label)
                loss = criterion(pred_map, gt_label)

            else:
                raise ValueError('')

            pred_map = pred_map.data.cpu().numpy()
            gt_label = gt_label.data.cpu().numpy()

            for i_img in range(pred_map.shape[0]):

                pred_cnt = np.sum(pred_map[i_img]) / args.LOG_PARA

                gt_count = np.sum(gt_label[i_img]) / args.LOG_PARA

                maes.update(abs(gt_count - pred_cnt))
                mses.update((gt_count - pred_cnt) * (gt_count - pred_cnt))

            writer.add_scalar('data/loss', float(loss), global_count)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            torch.cuda.synchronize()
            batch_duration = time.time() - batch_start
            batch_time.update(batch_duration)

            t1.update(loss.item(), data.size(0))

        t1 = t1.avg
        mae = maes.avg

        mse = np.sqrt(mses.avg)

        lr_scheduler.step()

        epoch_duration = time.time() - epoch_start
        epoch_time.update(epoch_duration)
        logging.info(
            'epoch {}, loss={:4f}, train mae={:4f}, train mse={:4f}'.format(
                epoch, float(t1), float(mae), float(mse)))
        logging.info('Epoch time: {:3f}s'.format(epoch_duration))

        v1 = AverageMeter()
        vmaes = AverageMeter()
        vmses = AverageMeter()
        model.eval()

        with torch.no_grad():
            for i, batch in enumerate(val_loader, 1):

                if args.model_type == 'CSRNet':
                    data, gt_label = batch[0].cuda(), batch[1].cuda()
                    pred_map = model(data)
                    pred_map = torch.squeeze(pred_map)
                    gt_label = torch.squeeze(gt_label)
                    loss = criterion(pred_map, gt_label)

                else:
                    raise ValueError('')

                vmaes.update(abs(gt_count - pred_cnt))
                vmses.update((gt_count - pred_cnt) * (gt_count - pred_cnt))

        v1 = v1.avg
        vmae = vmaes.avg
        vmse = np.sqrt(vmses.avg)

        writer.add_scalar('data/val_loss', float(v1), epoch)
        logging.info('epoch {}, val mae={:}, val mse={:}'.format(
            epoch, vmae, vmse))

        if epoch % 10 == 0 or epoch > (args.max_epoch - 30):
            if vmae < trlog['max_mae']:
                trlog['max_mae'] = vmae
                trlog['max_mse'] = vmse
                trlog['max_mae_epoch'] = epoch
                save_model('max_acc')

            if epoch >= (args.max_epoch - 10):
                if vmae <= trlog['max_mae_last10']:
                    trlog['max_mae_last10'] = vmae
                    trlog['max_mse_last10'] = vmse
                    trlog['max_mae_last10_epoch'] = epoch

            trlog['train_loss'].append(t1)
            trlog['train_mae'].append(mae)
            trlog['train_mse'].append(mse)
            trlog['val_loss'].append(v1)
            trlog['val_mae'].append(vmae)
            trlog['val_mse'].append(vmse)

            torch.save(trlog, osp.join(args.save_path1, 'trlog'))

            logging.info(
                'best epoch {}, best val mae={:.4f}, best val mse={:.4f}'.
                format(trlog['max_mae_epoch'], trlog['max_mae'],
                       trlog['max_mse']))
            logging.info(
                'best val mae last 10 epoch {}, val mae last10={}, val mse last10={:.4f}'
                .format(trlog['max_mae_last10_epoch'], trlog['max_mae_last10'],
                        trlog['max_mse_last10']))
            logging.info('ETA:{}/{}'.format(
                timer.measure(), timer.measure(epoch / args.max_epoch)))

        logging.info(
            'Total epoch training time: {:.3f}s, average: {:.3f}s'.format(
                epoch_time.sum, epoch_time.avg))

    writer.close()

    logging.info(args.save_path1)
    return model
def test_model(args, model, test_loader, logging):
    trlog = torch.load(osp.join(args.save_path1, 'trlog'))
    model.load_state_dict(
        torch.load(osp.join(args.save_path1, 'max_acc.pth'))['params'])

    t1 = AverageMeter()
    tmaes = AverageMeter()
    tmses = AverageMeter()
    model.eval()

    logging.info(
        'Best Epoch {}, best val mae={:.4f}, best val mse={:.4f}'.format(
            trlog['max_mae_epoch'], trlog['max_mae'], trlog['max_mse']))

    with torch.no_grad():
        for i, batch in enumerate(test_loader, 1):

            data1, data2, data3, gt_label1, gt_label2, gt_label3 = batch[
                0].cuda(), batch[1].cuda(), batch[2].cuda(), batch[3].cuda(
                ), batch[4].cuda(), batch[5].cuda()

            if args.model_type == 'SACANet':
                data, gt_label = batch[0].cuda(), batch[1].cuda()
                pred_map = model(data)
                loss = criterion(pred_map, gt_label)

            else:
                raise ValueError('')

            pred_map = pred_map[:, 1, :, :].data.cpu().numpy()
            gt_label = gt_label[:, 1, :, :].data.cpu().numpy()
            for i_img in range(pred_map.shape[0]):

                pred_cnt = np.sum(pred_map[i_img]) / args.LOG_PARA
                gt_count = np.sum(gt_label[i_img]) / args.LOG_PARA

                tmaes.update(abs(gt_count - pred_cnt))
                tmses.update((gt_count - pred_cnt) * (gt_count - pred_cnt))

            t1.update(loss.item(), data.size(0))

    t1 = t1.avg
    tmae = tmaes.avg
    tmse = np.sqrt(tmses.avg)

    logging.info('Test mae={:.4f}, mse={:.4f}'.format(tmae, tmse))
Example #18
0
def valid_epoch(summary, summary_writer, epoch, model, loss_fn, dataloader_valid, cfg):
    model.eval()
    num_classes = cfg['num_classes']
    class_point = cfg['class_point']

    eval_loss = AverageMeter()
    eval_acc = AverageMeter()
    confusion_matrix = ConfusionMatrix(num_classes=num_classes)

    dataloader = [dataloader_valid]

    name = cfg['labels']

    time_now = time.time()
    loss_sum = 0
    acc_sum = 0
    count = 0
    steps_count = 0
    for i in range(len(dataloader)):
        steps = len(dataloader[i])
        batch_size = dataloader[i].batch_size
        dataiter = iter(dataloader[i])
        # 使用 torch,no_grad()构建不需要track的上下文环境
        with torch.no_grad():
            acc_tmp = 0
            loss_tmp = 0
            prefetcher = data_prefetcher(dataiter)
            img, target, mask = prefetcher.next()

            for step in range(steps):
                # data, target = next(dataiter)
                data = img.to(device)
                target = target.to(device)

                output = model(data)
                output = output.view(int(batch_size), num_classes)
                target = target.view(int(batch_size))
                mask = mask.view(int(batch_size))

                conf_targets = target[mask]
                conf_preds = output[mask]
                loss = loss_fn(conf_preds, conf_targets)

                torch.cuda.synchronize()
                probs = F.softmax(output, dim=1)
                _, predicts = torch.max(probs, 1)

                acc = (predicts[mask] == conf_targets).type(
                    torch.cuda.FloatTensor).sum() * 1.0 / conf_targets.size(0)
                for t in range(num_classes):
                    for p in range(num_classes):
                        count = (predicts[mask][conf_targets == t] == p).type(
                            torch.cuda.FloatTensor).sum()
                        reduced_count = reduce_tensor(
                            count.data, reduction=False)

                        confusion_matrix.update(t, p,
                                                to_python_float(reduced_count))

                reduced_loss = reduce_tensor(loss.data)
                reduced_acc = reduce_tensor(acc.data)

                eval_loss.update(to_python_float(reduced_loss))
                eval_acc.update(to_python_float(reduced_acc))

                if args.local_rank == 0:
                    time_spent = time.time() - time_now
                    time_now = time.time()
                    logging.info(
                        'data_num : {}, Step : {}, Testing Loss : {:.5f}, '
                        'Testing Acc : {:.3f}, Run Time : {:.2f}'
                        .format(
                            str(i),
                            summary['step'] + 1, reduced_loss, reduced_acc, time_spent))
                    summary['step'] += 1

                img, target, mask = prefetcher.next()

    if args.local_rank == 0:
        summary['confusion_matrix'] = plot_confusion_matrix(
            confusion_matrix.matrix,
            cfg['labels'],
            tensor_name='train/Confusion matrix')
        summary['loss'] = eval_loss.avg
        # summary['acc'] = acc_sum / (steps * (batch_size))
        summary['acc'] = eval_acc.avg

    return summary
        help=
        'some manifest file with the first col containing the wav file path')
    parser.add_argument('--hold_idx', default=-1, type=int)
    parser.add_argument('--stats', dest='stats', action='store_true')
    parser.add_argument('--scramble_repeat', default=-1, type=int)
    args = parser.parse_args()

    root = os.getcwd()  # the root is the current working directory
    filepath = osp.join(os.getcwd(), args.file)
    print("\n\nOpening: {}".format(filepath))
    print("Root: {}".format(root))
    if args.stats:
        manifest_file = filepath + "_stats"
        make_folder(manifest_file)
        make_file(manifest_file)
        audio_dur = AverageMeter()
    elif args.scramble_repeat > 1:
        manifest_file = filepath + "_scram_rep"
        make_folder(manifest_file)
        make_file(manifest_file)
    else:
        manifest_file = make_manifest(filepath, root, args.hold_idx)
    print("Manifest made: {}".format(manifest_file))

    with open(filepath) as f:
        summary = csv.reader(f, delimiter=',')
        tot = 0
        hold_file = ""
        hold_entry = ""
        repeat_store = []
        for i, row in enumerate(summary):
Example #20
0
def valid_epoch(summary, summary_writer, epoch, model, loss_fn,
                dataloader_valid, cfg):
    model.eval()
    eval_loss = AverageMeter()
    eval_acc = AverageMeter()

    num_classes = cfg['num_classes']
    dataloader = [dataloader_valid]

    time_now = time.time()
    loss_sum = 0
    acc_sum = 0
    count = 0
    steps_count = 0
    for i in range(len(dataloader)):
        steps = len(dataloader[i])
        batch_size = dataloader[i].batch_size
        dataiter = iter(dataloader[i])
        # 使用 torch,no_grad()构建不需要track的上下文环境
        with torch.no_grad():
            acc_tmp = 0
            loss_tmp = 0
            prefetcher = data_prefetcher(dataiter)
            img, target = prefetcher.next()

            for step in range(steps):
                # data, target = next(dataiter)
                data = img.to(device)
                target = target.to(device)

                output = model(data)
                output = F.relu(output)
                output = output.view(img.size(0), num_classes)
                target = target.view(img.size(0), num_classes)

                loss = loss_fn(output[:, 0], target[:, 0]) + \
                    0.5*loss_fn(output[:, 1], target[:, 1])
                torch.cuda.synchronize()

                target_class = ((target[:, 0]) * 3 >= 15)
                predicts_class = ((output[:, 0]) * 3 >= 15)

                acc = (predicts_class == target_class).type(
                    torch.cuda.FloatTensor).sum() * 1.0 / img.size(0)

                r2 = r2_score(target.cpu().detach().numpy(),
                              output.cpu().detach().numpy())

                reduced_loss = reduce_tensor(loss.data)
                reduced_acc = reduce_tensor(acc.data)

                eval_loss.update(to_python_float(reduced_loss))
                eval_acc.update(to_python_float(reduced_acc))

                if args.local_rank == 0:
                    print('target', target[:, 0] * 3)
                    print('output', output[:, 0] * 3)
                    time_spent = time.time() - time_now
                    time_now = time.time()
                    logging.info(
                        'data_num : {}, Step : {}, Testing Loss : {:.5f}, '
                        'R2 : {:.3f}, Acc : {:.3f}, Run Time : {:.2f}'.format(
                            str(i), summary['step'] + 1, reduced_loss, r2,
                            reduced_acc, time_spent))
                    summary['step'] += 1

                img, target = prefetcher.next()

    if args.local_rank == 0:

        summary['loss'] = eval_loss.avg
        # summary['acc'] = acc_sum / (steps * (batch_size))
        summary['acc'] = eval_acc.avg
        summary['r2'] = r2

    return summary
Example #21
0
class CloudPublishNode:
    def __init__(self,
                 seq,
                 node_name,
                 cloud_topic_name,
                 tf_topic_name,
                 dataset,
                 global_tf_name="map",
                 child_tf_name="car"):
        rospy.init_node(node_name)
        self.cloud_pub = rospy.Publisher(cloud_topic_name,
                                         PointCloud2,
                                         queue_size=queue_size)
        self.transform_broadcaster = tf2_ros.TransformBroadcaster()
        self.est_tf_pub = rospy.Publisher(
            tf_topic_name, TransformStamped,
            queue_size=queue_size)  # for visualization
        self.gt_tf_pub = rospy.Publisher(
            "gt_pose", TransformStamped,
            queue_size=queue_size)  # for visualization
        self.cap_pub = rospy.Publisher("CAP",
                                       CloudAndPose,
                                       queue_size=queue_size)
        self.rate = rospy.Rate(sleep_rate)
        self.header = Header(frame_id=global_tf_name)
        self.child_tf_name = child_tf_name  # base name before appending prefix
        self.dataset = dataset
        self.seq = seq

        transform_dict = OrderedDict()
        transform_dict[GridSampling([args.grid_size] * 3)] = ["train", "test"]
        transform_dict[NormalizeScale()] = ["train", "test"]
        transform = ComposeAdapt(transform_dict)
        self.model = Net(graph_input=LOAD_GRAPH,
                         act="LeakyReLU",
                         transform=transform,
                         dof=7)
        if args.model_path is not None and osp.exists(args.model_path):
            self.model.load_state_dict(
                torch.load(args.model_path, map_location=torch.device("cpu")))
            print("loaded weights from", args.model_path)
        self.model.eval()

        self.absolute_gt_pose = np.eye(4)[:3, :]
        self.absolute_est_pose = np.eye(4)[:3, :]
        self.infer_time_meter = AverageMeter()
        self.tr_error_meter = AverageMeter()
        self.rot_error_meter = AverageMeter()

        self.fields = [
            PointField('x', 0, PointField.FLOAT32, 1),
            PointField('y', 4, PointField.FLOAT32, 1),
            PointField('z', 8, PointField.FLOAT32, 1),
            PointField('intensity', 12, PointField.FLOAT32, 1)
        ]
        self.pose_list = []

    def estimate_pose(self, target_cloud, source_cloud):
        source_cloud = torch.from_numpy(source_cloud)
        target_cloud = torch.from_numpy(target_cloud)

        begin = time.time()
        pose = self.model(
            (source_cloud.unsqueeze(0), target_cloud.unsqueeze(0),
             torch.tensor(len(source_cloud)).unsqueeze(0),
             torch.tensor(len(target_cloud)).unsqueeze(0)))

        self.infer_time_meter.update(time.time() - begin)
        pose = pose.detach().numpy()
        self.pose_list.append(pose)
        return pose[0, :3], pose[0, 3:]

    def tq2tf_msg(self, translation, quaternion, header, typ="gt"):
        assert typ in ["gt", "est"]
        t = TransformStamped()
        t.header = header
        t.child_frame_id = "{}_{}".format(typ, self.child_tf_name)
        t.transform.translation.x = translation[0]
        t.transform.translation.y = translation[1]
        t.transform.translation.z = translation[2]
        t.transform.rotation.x = quaternion[0]
        t.transform.rotation.y = quaternion[1]
        t.transform.rotation.z = quaternion[2]
        t.transform.rotation.w = quaternion[3]
        return t

    def mat2tf_msg(self, transform_mat, header, typ):
        translation = transform_mat[:3, -1]
        quat = Rotation.from_matrix(transform_mat[:3, :3]).as_quat()
        return self.tq2tf_msg(translation, quat, header, typ)

    def serve(self, idx):
        self.header.seq = idx
        self.header.stamp = rospy.Time.from_sec(
            self.dataset.timestamps[idx].total_seconds())

        current_cloud = self.dataset.get_velo(idx)
        if idx == 0:
            # guess 0 pose at first time frame
            tr, quat = np.zeros((3, )), np.array([0., 0., 0., 1.])
        else:
            # estimate coarse pose relative to the previous frame with model
            prev_cloud = self.dataset.get_velo(idx - 1)
            tr, quat = self.estimate_pose(prev_cloud, current_cloud)

        gt_pose = self.dataset.poses[idx]

        est_mat = trq2mat(tr, quat)
        delta_gt_pose = delta_poses(gt_pose.copy(),
                                    self.absolute_gt_pose.copy())
        self.absolute_gt_pose = gt_pose
        trans_error, rot_error = pose_error(delta_gt_pose, est_mat.copy())
        self.tr_error_meter.update(trans_error)
        self.rot_error_meter.update(rot_error)

        # correct the axis system of the estimated pose
        c_est_mat = kitti2rvizaxis(est_mat.copy())
        c_tr, c_quat = mat2trq(c_est_mat)
        cap_msg = CloudAndPose()
        cap_msg.seq = idx

        cap_msg.point_cloud2 = point_cloud2.create_cloud(
            self.header, self.fields, [point for point in current_cloud])
        cap_msg.init_guess = self.tq2tf_msg(*mat2trq(delta_gt_pose),
                                            self.header, "est")

        self.absolute_est_pose = add_poses(self.absolute_est_pose, c_est_mat)

        est_mat_temp = self.absolute_est_pose.copy()

        est_tf = self.mat2tf_msg(est_mat_temp, self.header, "est")
        gt_tf = self.mat2tf_msg(kitti2rvizaxis(gt_pose.copy()), self.header,
                                "gt")
        self.est_tf_pub.publish(est_tf)
        self.gt_tf_pub.publish(gt_tf)
        self.transform_broadcaster.sendTransform(gt_tf)
        self.transform_broadcaster.sendTransform(est_tf)
        self.cloud_pub.publish(
            point_cloud2.create_cloud(Header(frame_id="gt_car"), self.fields,
                                      [point for point in current_cloud]))
        self.cap_pub.publish(cap_msg)

        print(
            "[{}] inference spent: {:.2f} ms\t\t| Trans : {}\t\t| GT Trans: {}\t\t| Trans error: {:.4f}\t\t| "
            "Rot error: {:.4f}".format(
                idx, self.infer_time_meter.avg, list(c_tr),
                list(delta_gt_pose[:3, -1].reshape(3, )), trans_error,
                rot_error))
        self.rate.sleep()

    def __call__(self):
        for idx in range(args.start, len(self.dataset.poses)):
            if rospy.is_shutdown():
                break
            self.serve(idx)
        print("Avg Tr Error: {:.3e}\tAvg Rot Error: {:.3e}".format(
            self.tr_error_meter.avg, self.rot_error_meter.avg))
        save_pose_predictions(
            np.eye(4)[:3, :], self.pose_list, f"{self.seq}.txt")