Esempio n. 1
0
    def __init__(self, cfg: ConfigTree):
        self.cfg = cfg
        print(cfg)
        self.summary_writer = SummaryWriter(log_dir=experiment_path)
        self.model_builder = ModelFactory(cfg)
        self.dataset_builder = DataLoaderFactory(cfg)

        self.train_ds = self.dataset_builder.build(split='train')
        self.test_ds = self.dataset_builder.build(split='val')
        self.ds: YoutubeDataset = self.train_ds.dataset

        self.train_criterion = nn.CrossEntropyLoss(
            ignore_index=self.ds.PAD_IDX)
        self.val_criterion = nn.CrossEntropyLoss(ignore_index=self.ds.PAD_IDX)
        self.model: nn.Module = self.model_builder.build(
            device=torch.device('cuda'), wrapper=nn.DataParallel)
        optimizer = optim.Adam(self.model.parameters(),
                               lr=0.,
                               betas=(0.9, 0.98),
                               eps=1e-9)
        self.optimizer = CustomSchedule(
            self.cfg.get_int('model.emb_dim'),
            optimizer=optimizer,
        )

        self.num_epochs = cfg.get_int('num_epochs')

        logger.info(f'Use control: {self.ds.use_control}')
Esempio n. 2
0
    def __init__(self, cfg: ConfigTree, args):

        #pdb.set_trace()

        self.cfg = cfg
        self.device = self.cfg.get_string('device')
        self.summary_writer = SummaryWriter(log_dir=experiment_path)
        self.model_builder = ModelFactory(cfg)
        self.dataset_builder = DataLoaderFactory(cfg)

        self.train_ds = self.dataset_builder.build(split='train')
        self.test_ds = self.dataset_builder.build(split='val')
        self.ds: YoutubeDataset = self.train_ds.dataset

        self.train_criterion = nn.CrossEntropyLoss(
            ignore_index=self.ds.PAD_IDX)
        self.val_criterion = nn.CrossEntropyLoss(ignore_index=self.ds.PAD_IDX)

        self.model: nn.Module = self.model_builder.build(
            device=torch.device(self.device), wrapper=nn.DataParallel)

        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=3e-4,
                                    betas=(0.9, 0.98),
                                    eps=1e-9,
                                    weight_decay=1e-5)
        """
        self.optimizer = CustomSchedule(
            self.cfg.get_int('model.emb_dim'),
            optimizer=optimizer,
        )
        """
        self.num_epochs = cfg.get_int('num_epochs')

        if (args.load):
            print("loading model...")
            checkpoint = torch.load(experiment_path + '/checkpoint.pth.tar')
            self.model.load_state_dict(checkpoint['model_state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            epoch = checkpoint['epoch']
            self.num_epochs -= epoch
            self.epochs_left = epoch
            #self.loss = checkpoint['loss']
        else:
            self.epochs_left = 0
            #checkpoint = torch.load('../Foley-Music/exps/urmp-vn/checkpoint.pth.tar') #Transfer learning
            #self.model.load_state_dict(checkpoint['model_state_dict'], strict=False)

        logger.info(f'Use control: {self.ds.use_control}')
        self.duration = self.cfg.get_float('dataset.duration')
Esempio n. 3
0
def main(args):
    torch.set_grad_enabled(False)

    checkpoint_path = Path(args.checkpoint)
    output_dir = Path(args.output)
    if args.control is not None:
        control_tensor = utils.midi.pitch_histogram_string_to_control_tensor(
            args.control)
    else:
        control_tensor = None

    cp = torch.load(checkpoint_path)
    cfg = ConfigFactory.parse_file(checkpoint_path.parent / 'config.conf')
    instrument = cfg.get_string('dataset.instrument', args.instrument)
    pprint(cfg)
    print('Using Instrument:', instrument)

    model_factory = ModelFactory(cfg)
    dataloader_factory = DataLoaderFactory(cfg)
    device = cfg.get_string('device')
    rnn = cfg.get_bool('model.rnn')

    #model: MusicTransformer = model_factory.build(device=DEVICE)
    model: nn.Module = model_factory.build(device=torch.device(device),
                                           wrapper=nn.DataParallel)

    model.load_state_dict(cp['model_state_dict'])
    model.eval()

    dl = dataloader_factory.build(split='test')
    ds: YoutubeDataset = dl.dataset
    pprint(ds.samples[:5])

    length = cfg.get_float(
        'dataset.duration')  # how long is your produced audio
    # One is for generated audio, one is for generated video
    os.makedirs(output_dir / 'audio', exist_ok=True)
    os.makedirs(output_dir / 'video', exist_ok=True)

    test_criterion = nn.CrossEntropyLoss(ignore_index=ds.PAD_IDX)

    print(len(ds), "samples")

    for data in tqdm(ds):

        #pdb.set_trace()
        index = data['index']
        imu = data['imu']
        midi_x, midi_y = data['midi_x'], data['midi_y']

        if device == 'cuda':
            imu = imu.cuda(non_blocking=True)
        if control_tensor is not None:
            control_tensor = control_tensor.cuda(non_blocking=True)

        sample = ds.samples[index]

        if rnn:
            imu = model.module.forward_imu_net(imu.unsqueeze(0))
            events = torch.from_numpy(
                model.module.transformer.predict(imu.squeeze(1),
                                                 ds.num_events))
            events = model.module.softmax(
                model.module.get_output(events.unsqueeze(1)))
            events = torch.max(events, dim=2)[1]
        else:
            events = model.module.generate(
                imu.unsqueeze(0),
                target_seq_length=ds.num_events,
                beam=5,
                pad_idx=ds.PAD_IDX,
                sos_idx=ds.SOS_IDX,
                eos_idx=ds.EOS_IDX,
                control=control_tensor,
            )
        #pdb.set_trace()
        if events.shape[1] <= 0:
            print('=' * 100)
            print('not events')
            print(sample)
            print('=' * 100)
            continue

        print('this events shape: ', events.shape)
        print('this events length: ', len(events))
        """
        mask = (midi_x != ds.PAD_IDX)

        out = events.squeeze()[mask]
        tgt = midi_x[mask]
        num_right = (out == tgt)
        num_right = torch.sum(num_right).float()

        acc = num_right / len(tgt)
        print("Accuracy", acc)
        """
        """
        loss = test_criterion(events, midi_x)

        acc = compute_epiano_accuracy(events, midi_x)

        batch_size = len(midi_x)
        loss_meter.update(loss.item(), batch_size)
        acc_meter.update(acc.item(), batch_size)
        logger.info(
                f'Val [{epoch}]/{self.num_epochs}][{i}/{num_iters}]\t'
                f'{loss_meter}\t{acc_meter}'
        )
        """

        ss = change_time_format(sample.start_time)
        dd = change_time_format(sample.start_time + length)
        add_name = '-' + ss + '-' + dd

        folder_name = "samples"
        midi_filename = sample.vid
        audio_filename = sample.vid
        midi_dir = output_dir / 'midi'
        os.makedirs(midi_dir, exist_ok=True)
        midi_dir2 = output_dir / 'midi2'
        os.makedirs(midi_dir2, exist_ok=True)
        midi_path = midi_dir / f'{midi_filename}{add_name}.midi'
        pm = utils.midi.tensor_to_pm(events.squeeze(), instrument=instrument)
        pm.write(str(midi_path))

        midi_path2 = midi_dir2 / f'{midi_filename}{add_name}.midi'
        pm2 = utils.midi.tensor_to_pm(midi_x, instrument=instrument)
        pm2.write(str(midi_path2))

        audio_dir = output_dir / 'audio' / f'{folder_name}'
        os.makedirs(audio_dir, exist_ok=True)
        audio_path = audio_dir / f'{audio_filename}{add_name}.wav'

        utils.midi.pm_to_wav(
            pm,
            audio_path,
            rate=22050,
        )
Esempio n. 4
0
def main(args):
    torch.set_grad_enabled(False)

    checkpoint_path = Path(args.checkpoint)
    video_dir = Path(args.video)
    output_dir = Path(args.output)
    if args.control is not None:
        control_tensor = utils.midi.pitch_histogram_string_to_control_tensor(
            args.control)
    else:
        control_tensor = None

    cp = torch.load(checkpoint_path)
    cfg = ConfigFactory.parse_file(checkpoint_path.parent / 'config.conf')
    instrument = cfg.get_string('dataset.instrument', args.instrument)
    pprint(cfg)
    print('Using Instrument:', instrument)

    model_factory = ModelFactory(cfg)
    dataloader_factory = DataLoaderFactory(cfg)

    model: MusicTransformer = model_factory.build(device=DEVICE)

    model.load_state_dict(cp['state_dict'])
    model.eval()

    dl = dataloader_factory.build(split='val')
    ds: YoutubeDataset = dl.dataset
    pprint(ds.samples[:5])

    length = cfg.get_float(
        'dataset.duration')  # how long is your produced audio
    os.makedirs(output_dir / 'audio', exist_ok=True)
    os.makedirs(output_dir / 'video', exist_ok=True)

    for data in tqdm(ds):
        index = data['index']
        pose = data['pose']

        pose = pose.cuda(non_blocking=True)
        if control_tensor is not None:
            control_tensor = control_tensor.cuda(non_blocking=True)
        sample = ds.samples[index]

        events = model.generate(
            pose.unsqueeze(0),
            target_seq_length=ds.num_events,
            beam=5,
            pad_idx=ds.PAD_IDX,
            sos_idx=ds.SOS_IDX,
            eos_idx=ds.EOS_IDX,
            control=control_tensor,
        )
        if events.shape[1] <= 0:
            print('=' * 100)
            print('not events')
            print(sample)
            print('=' * 100)
            continue

        print('this events shape: ', events.shape)
        print('this events length: ', len(events))
        try:
            video_path = next(video_dir.glob(f'{sample.vid}.*'))
        except Exception as e:
            print(e)
            print('skip')
            if args.only_audio:
                pass
            else:
                continue

        ss = change_time_format(sample.start_time)
        dd = change_time_format(sample.start_time + length)
        add_name = '-' + ss + '-' + dd

        midi_dir = output_dir / 'midi' / f'{sample.vid}'
        os.makedirs(midi_dir, exist_ok=True)
        midi_path = midi_dir / f'{sample.vid}{add_name}.midi'
        pm = utils.midi.tensor_to_pm(events.squeeze(), instrument=instrument)
        pm.write(str(midi_path))

        audio_dir = output_dir / 'audio' / f'{sample.vid}'
        os.makedirs(audio_dir, exist_ok=True)
        audio_path = audio_dir / f'{sample.vid}{add_name}.wav'

        utils.midi.pm_to_wav(
            pm,
            audio_path,
            rate=22050,
        )

        if not args.only_audio:
            # find only video in val.csv
            in_path = get_video_path(video_dir, sample.vid)
            vid_name = sample.vid
            vid_dir = os.path.join(output_dir, 'video', vid_name)
            if not os.path.exists(vid_dir):
                os.mkdir(vid_dir)

            # cut video to fixed length
            vid_dir_name = sample.vid  # just name, no suffix like .mp4
            cut_name = str(vid_dir_name) + add_name + '_middle.mp4'

            # concat audio and video
            vid_path = os.path.join(vid_dir,
                                    str(vid_dir_name) + add_name + '.mp4')
            cmd2 = f'ffmpeg -y -ss {ss} -i {in_path} -t {length} -i {str(audio_path)} -t {length} -map 0:v:0 -map 1:a:0 -c:v libx264 -c:a aac -strict experimental {vid_path}'

            os.system(cmd2)
Esempio n. 5
0
class Engine(BaseEngine):
    def __init__(self, cfg: ConfigTree, args):

        #pdb.set_trace()

        self.cfg = cfg
        self.device = self.cfg.get_string('device')
        self.summary_writer = SummaryWriter(log_dir=experiment_path)
        self.model_builder = ModelFactory(cfg)
        self.dataset_builder = DataLoaderFactory(cfg)

        self.train_ds = self.dataset_builder.build(split='train')
        self.test_ds = self.dataset_builder.build(split='val')
        self.ds: YoutubeDataset = self.train_ds.dataset

        self.train_criterion = nn.CrossEntropyLoss(
            ignore_index=self.ds.PAD_IDX)
        self.val_criterion = nn.CrossEntropyLoss(ignore_index=self.ds.PAD_IDX)

        self.model: nn.Module = self.model_builder.build(
            device=torch.device(self.device), wrapper=nn.DataParallel)

        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=3e-4,
                                    betas=(0.9, 0.98),
                                    eps=1e-9,
                                    weight_decay=1e-5)
        """
        self.optimizer = CustomSchedule(
            self.cfg.get_int('model.emb_dim'),
            optimizer=optimizer,
        )
        """
        self.num_epochs = cfg.get_int('num_epochs')

        if (args.load):
            print("loading model...")
            checkpoint = torch.load(experiment_path + '/checkpoint.pth.tar')
            self.model.load_state_dict(checkpoint['model_state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            epoch = checkpoint['epoch']
            self.num_epochs -= epoch
            self.epochs_left = epoch
            #self.loss = checkpoint['loss']
        else:
            self.epochs_left = 0
            #checkpoint = torch.load('../Foley-Music/exps/urmp-vn/checkpoint.pth.tar') #Transfer learning
            #self.model.load_state_dict(checkpoint['model_state_dict'], strict=False)

        logger.info(f'Use control: {self.ds.use_control}')
        self.duration = self.cfg.get_float('dataset.duration')

    def train(self, epoch=0):
        loss_meter = AverageMeter('Loss')
        acc_meter = AverageMeter('Acc')
        num_iters = len(self.train_ds)
        self.model.train()
        count = 0
        model_parameters = filter(lambda p: p.requires_grad,
                                  self.model.parameters())
        count = sum([np.prod(p.size()) for p in model_parameters])
        print(count)

        for i, data in enumerate(self.train_ds):
            midi_x, midi_y = data['midi_x'], data['midi_y']

            #pdb.set_trace()
            if self.ds.use_pose:
                feat = data['pose']
            elif self.ds.use_rgb:
                feat = data['rgb']
            elif self.ds.use_flow:
                feat = data['flow']
            elif self.ds.use_imu:
                feat = data['imu']
            else:
                raise Exception('No feature!')

            if (self.device == 'cuda'):
                feat, midi_x, midi_y = (feat.cuda(non_blocking=True),
                                        midi_x.cuda(non_blocking=True),
                                        midi_y.cuda(non_blocking=True))

            if self.ds.use_control:
                control = data['control']
                control = control.cuda(non_blocking=True)
            else:
                control = None

            output = self.model(feat,
                                midi_x,
                                pad_idx=self.ds.PAD_IDX,
                                control=control)

            loss = self.train_criterion(output.view(-1, output.shape[-1]),
                                        midi_y.flatten())

            self.optimizer.zero_grad()
            loss.backward()

            self.optimizer.step()

            acc = compute_epiano_accuracy(output,
                                          midi_y,
                                          pad_idx=self.ds.PAD_IDX)

            batch_size = len(midi_x)
            loss_meter.update(loss.item(), batch_size)
            acc_meter.update(acc.item(), batch_size)

            logger.info(
                f'Train [{epoch}]/{self.num_epochs}][{i}/{num_iters}]\t'
                f'{loss_meter}\t{acc_meter}')
        self.summary_writer.add_scalar('train/loss', loss_meter.avg, epoch)
        self.summary_writer.add_scalar('train/acc', acc_meter.avg, epoch)
        return loss_meter.avg

    def test(self, epoch=0):
        loss_meter = AverageMeter('Loss')
        acc_meter = AverageMeter('Acc')
        num_iters = len(self.test_ds)
        self.model.eval()

        with torch.no_grad():
            for i, data in enumerate(self.test_ds):
                midi_x, midi_y = data['midi_x'], data['midi_y']
                #pdb.set_trace()
                if self.ds.use_pose:
                    feat = data['pose']
                elif self.ds.use_rgb:
                    feat = data['rgb']
                elif self.ds.use_flow:
                    feat = data['flow']
                elif self.ds.use_imu:
                    feat = data['imu']
                else:
                    raise Exception('No feature!')

                if (self.device == 'cuda'):
                    feat, midi_x, midi_y = (feat.cuda(non_blocking=True),
                                            midi_x.cuda(non_blocking=True),
                                            midi_y.cuda(non_blocking=True))

                if self.ds.use_control:
                    control = data['control']
                    control = control.cuda(non_blocking=True)
                else:
                    control = None

                output = self.model(feat,
                                    midi_x,
                                    pad_idx=self.ds.PAD_IDX,
                                    control=control)
                """
                For CrossEntropy
                output: [B, T, D] -> [BT, D]
                target: [B, T] -> [BT]
                """
                loss = self.val_criterion(output.view(-1, output.shape[-1]),
                                          midi_y.flatten())

                acc = compute_epiano_accuracy(output, midi_y)

                batch_size = len(midi_x)
                loss_meter.update(loss.item(), batch_size)
                acc_meter.update(acc.item(), batch_size)
                logger.info(
                    f'Val [{epoch}]/{self.num_epochs}][{i}/{num_iters}]\t'
                    f'{loss_meter}\t{acc_meter}')
            self.summary_writer.add_scalar('val/loss', loss_meter.avg, epoch)
            self.summary_writer.add_scalar('val/acc', acc_meter.avg, epoch)

        return loss_meter.avg

    @staticmethod
    def epoch_time(start_time: float, end_time: float):
        elapsed_time = end_time - start_time
        elapsed_mins = int(elapsed_time / 60)
        elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
        return elapsed_mins, elapsed_secs

    def run(self):
        best_loss = float('inf')
        for epoch in range(self.num_epochs):
            start_time = time.time()
            _train_loss = self.train(epoch)
            loss = self.test(epoch)
            end_time = time.time()
            epoch_mins, epoch_secs = self.epoch_time(start_time, end_time)

            logger.info(
                f'Epoch: {epoch + 1:02} | Time: {epoch_mins}m {epoch_secs}s')

            is_best = loss < best_loss
            best_loss = min(loss, best_loss)

            torch.save(
                {
                    'epoch': epoch + self.epochs_left,
                    'model_state_dict': self.model.state_dict(),
                    'optimizer_state_dict': self.optimizer.state_dict(),
                    'loss': loss,
                }, experiment_path + '/' + 'checkpoint.pth.tar')

    def close(self):
        self.summary_writer.close()
Esempio n. 6
0
class Engine(BaseEngine):
    def __init__(self, cfg: ConfigTree):
        self.cfg = cfg
        print(cfg)
        self.summary_writer = SummaryWriter(log_dir=experiment_path)
        self.model_builder = ModelFactory(cfg)
        self.dataset_builder = DataLoaderFactory(cfg)

        self.train_ds = self.dataset_builder.build(split='train')
        self.test_ds = self.dataset_builder.build(split='val')
        self.ds: YoutubeDataset = self.train_ds.dataset

        self.train_criterion = nn.CrossEntropyLoss(
            ignore_index=self.ds.PAD_IDX)
        self.val_criterion = nn.CrossEntropyLoss(ignore_index=self.ds.PAD_IDX)
        self.model: nn.Module = self.model_builder.build(
            device=torch.device('cuda'), wrapper=nn.DataParallel)
        optimizer = optim.Adam(self.model.parameters(),
                               lr=0.,
                               betas=(0.9, 0.98),
                               eps=1e-9)
        self.optimizer = CustomSchedule(
            self.cfg.get_int('model.emb_dim'),
            optimizer=optimizer,
        )

        self.num_epochs = cfg.get_int('num_epochs')

        logger.info(f'Use control: {self.ds.use_control}')

    def train(self, epoch=0):
        loss_meter = AverageMeter('Loss')
        acc_meter = AverageMeter('Acc')
        num_iters = len(self.train_ds)
        self.model.train()
        for i, data in enumerate(self.train_ds):
            midi_x, midi_y = data['midi_x'], data['midi_y']

            if self.ds.use_pose:
                feat = data['pose']
            elif self.ds.use_rgb:
                feat = data['rgb']
            elif self.ds.use_flow:
                feat = data['flow']
            else:
                raise Exception('No feature!')

            feat, midi_x, midi_y = (feat.cuda(non_blocking=True),
                                    midi_x.cuda(non_blocking=True),
                                    midi_y.cuda(non_blocking=True))

            if self.ds.use_control:
                control = data['control']
                control = control.cuda(non_blocking=True)
            else:
                control = None

            output = self.model(feat,
                                midi_x,
                                pad_idx=self.ds.PAD_IDX,
                                control=control)

            loss = self.train_criterion(output.view(-1, output.shape[-1]),
                                        midi_y.flatten())

            self.optimizer.zero_grad()
            loss.backward()

            self.optimizer.step()

            acc = compute_epiano_accuracy(output,
                                          midi_y,
                                          pad_idx=self.ds.PAD_IDX)

            batch_size = len(midi_x)
            loss_meter.update(loss.item(), batch_size)
            acc_meter.update(acc.item(), batch_size)

            logger.info(
                f'Train [{epoch}]/{self.num_epochs}][{i}/{num_iters}]\t'
                f'{loss_meter}\t{acc_meter}')
        self.summary_writer.add_scalar('train/loss', loss_meter.avg, epoch)
        self.summary_writer.add_scalar('train/acc', acc_meter.avg, epoch)
        return loss_meter.avg

    def test(self, epoch=0):
        loss_meter = AverageMeter('Loss')
        acc_meter = AverageMeter('Acc')
        num_iters = len(self.test_ds)
        self.model.eval()

        with torch.no_grad():
            for i, data in enumerate(self.test_ds):
                midi_x, midi_y = data['midi_x'], data['midi_y']

                if self.ds.use_pose:
                    feat = data['pose']
                elif self.ds.use_rgb:
                    feat = data['rgb']
                elif self.ds.use_flow:
                    feat = data['flow']
                else:
                    raise Exception('No feature!')

                feat, midi_x, midi_y = (feat.cuda(non_blocking=True),
                                        midi_x.cuda(non_blocking=True),
                                        midi_y.cuda(non_blocking=True))

                if self.ds.use_control:
                    control = data['control']
                    control = control.cuda(non_blocking=True)
                else:
                    control = None

                output = self.model(feat,
                                    midi_x,
                                    pad_idx=self.ds.PAD_IDX,
                                    control=control)
                """
                For CrossEntropy
                output: [B, T, D] -> [BT, D]
                target: [B, T] -> [BT]
                """
                loss = self.val_criterion(output.view(-1, output.shape[-1]),
                                          midi_y.flatten())

                acc = compute_epiano_accuracy(output, midi_y)

                batch_size = len(midi_x)
                loss_meter.update(loss.item(), batch_size)
                acc_meter.update(acc.item(), batch_size)
                logger.info(
                    f'Val [{epoch}]/{self.num_epochs}][{i}/{num_iters}]\t'
                    f'{loss_meter}\t{acc_meter}')
            self.summary_writer.add_scalar('val/loss', loss_meter.avg, epoch)
            self.summary_writer.add_scalar('val/acc', acc_meter.avg, epoch)

        return loss_meter.avg

    @staticmethod
    def epoch_time(start_time: float, end_time: float):
        elapsed_time = end_time - start_time
        elapsed_mins = int(elapsed_time / 60)
        elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
        return elapsed_mins, elapsed_secs

    def run(self):
        best_loss = float('inf')
        for epoch in range(self.num_epochs):
            start_time = time.time()
            _train_loss = self.train(epoch)
            loss = self.test(epoch)
            end_time = time.time()
            epoch_mins, epoch_secs = self.epoch_time(start_time, end_time)

            logger.info(
                f'Epoch: {epoch + 1:02} | Time: {epoch_mins}m {epoch_secs}s')

            is_best = loss < best_loss
            best_loss = min(loss, best_loss)
            save_checkpoint(
                {
                    'state_dict': self.model.module.state_dict(),
                    'optimizer': self.optimizer.state_dict()
                },
                is_best=is_best,
                folder=experiment_path)

    def close(self):
        self.summary_writer.close()