def main():

    os.system('cp -r ../ConvTasNet "{0}"'.format(config.basePath +
                                                 '/savedCode'))

    model = DataParallel(ConvTasNet(C=2))

    print('Total Parameters: ', sum(p.numel() for p in model.parameters()))

    dataloader = AVSpeech('train')

    loss_func = SISNRPIT()

    if config.use_cuda:
        model = model.cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr[1])

    if config.pretrained:
        saved_model = torch.load(config.pretrained_train)
        model.load_state_dict(saved_model['model_state_dict'])
        optimizer.load_state_dict(saved_model['optimizer_state_dict'])
        saved_loss = np.load(config.loss_path).tolist()
    else:
        saved_loss = None

    dataloader = DataLoader(dataloader,
                            batch_size=config.batchsize['train'],
                            num_workers=config.num_workers['train'],
                            worker_init_fn=init_fn)

    train(model, dataloader, optimizer, loss_func, saved_loss)
Ejemplo n.º 2
0
def main():

    os.system('cp -r ../Oracle "{0}"'.format(config.basePath + '/savedCode'))

    model = DataParallel(ConvTasNet(C=2, test_with_asr=True))
    dataloader = AVSpeech('test')
    dataloader = DataLoader(dataloader,
                            batch_size=config.batchsize['test'],
                            num_workers=config.num_workers['test'],
                            worker_init_fn=init_fn)

    if config.use_cuda:
        model = model.cuda()

    config.pretrained_test = [
        '/home/SharedData/Pragya/Experiments/Oracle/2020-05-20 15:23:34.411560/116662.pth'
    ]

    for cur_test in config.pretrained_test:

        print('Currently working on: ', cur_test.split('/')[-1])

        model.load_state_dict(torch.load(cur_test)['model_state_dict'])

        total_loss = test(model, dataloader)

        torch.cuda.empty_cache()

        print('Average Loss for ',
              cur_test.split('/')[-1], 'is: ', np.mean(total_loss))
Ejemplo n.º 3
0
def main():

    os.system('cp -r ../ConvTasNet "{0}"'.format(config.basePath +
                                                 '/savedCode'))

    model = DataParallel(ConvTasNet(C=2))
    dataloader = AVSpeech('test')
    dataloader = DataLoader(dataloader,
                            batch_size=config.batchsize['test'],
                            num_workers=config.num_workers['test'],
                            worker_init_fn=init_fn)
    loss_func = SISNRPIT()

    if config.use_cuda:
        model = model.cuda()

    config.pretrained_test = [
        '/home/SharedData/Pragya/ModelsToUse/AudioOnlyConvTasNet.pth',
    ]

    for cur_test in config.pretrained_test:

        print('Currently working on: ', cur_test.split('/')[-1])

        model.load_state_dict(torch.load(cur_test)['model_state_dict'])

        total_loss = test(
            cur_test.split('/')[-1].split('.')[0], model, dataloader,
            loss_func)

        torch.cuda.empty_cache()

        print('Average Loss for ',
              cur_test.split('/')[-1], 'is: ', np.mean(total_loss))
Ejemplo n.º 4
0
def main(args):
    # Construct Solver
    # data
    tr_dataset = AudioDataset(args.train_json,
                              sample_rate=args.sample_rate,
                              segment_length=args.segment_length)

    cv_dataset = AudioDataset(
        args.valid_json,
        sample_rate=args.sample_rate,
        segment_length=args.segment_length,
    )

    tr_loader = AudioDataLoader(tr_dataset,
                                batch_size=args.batch_size,
                                shuffle=args.shuffle,
                                num_workers=args.num_workers)

    cv_loader = AudioDataLoader(cv_dataset,
                                batch_size=args.batch_size,
                                num_workers=0)

    data = {'tr_loader': tr_loader, 'cv_loader': cv_loader}
    # model
    model = ConvTasNet(args.N,
                       args.L,
                       args.B,
                       args.H,
                       args.P,
                       args.X,
                       args.R,
                       args.C,
                       norm_type=args.norm_type,
                       causal=args.causal,
                       mask_nonlinear=args.mask_nonlinear)
    print(model)
    if args.use_cuda:
        model = torch.nn.DataParallel(model)
        model.cuda()
    # optimizer
    lr = args.lr / args.batch_per_step
    if args.optimizer == 'sgd':
        optimizier = torch.optim.SGD(model.parameters(),
                                     lr=lr,
                                     momentum=args.momentum,
                                     weight_decay=args.l2)
    elif args.optimizer == 'adam':
        optimizier = torch.optim.Adam(model.parameters(),
                                      lr=lr,
                                      weight_decay=args.l2)
    else:
        print("Not support optimizer")
        return

    # solver
    solver = Solver(data, model, optimizier, args)
    solver.train()
Ejemplo n.º 5
0
def main(args):
    # Construct Solver
    # data
    tr_dataset = AudioDataset(args.train_dir,
                              args.batch_size,
                              sample_rate=args.sample_rate,
                              segment=args.segment)
    cv_dataset = AudioDataset(
        args.valid_dir,
        batch_size=1,  # 1 -> use less GPU memory to do cv
        sample_rate=args.sample_rate,
        segment=-1,
        cv_maxlen=args.cv_maxlen)  # -1 -> use full audio
    tr_loader = AudioDataLoader(tr_dataset,
                                batch_size=1,
                                shuffle=args.shuffle,
                                num_workers=4)
    cv_loader = AudioDataLoader(cv_dataset,
                                batch_size=1,
                                num_workers=4,
                                pin_memory=True)
    data = {'tr_loader': tr_loader, 'cv_loader': cv_loader}
    # model
    model = ConvTasNet(args.N,
                       args.L,
                       args.B,
                       args.H,
                       args.P,
                       args.X,
                       args.R,
                       args.C,
                       norm_type=args.norm_type,
                       causal=args.causal,
                       mask_nonlinear=args.mask_nonlinear)
    if args.use_cuda:
        model = torch.nn.DataParallel(model)
        model.cuda()
    # optimizer
    if args.optimizer == 'sgd':
        optimizier = torch.optim.SGD(model.parameters(),
                                     lr=args.lr,
                                     momentum=args.momentum,
                                     weight_decay=args.l2)
    elif args.optimizer == 'adam':
        optimizier = torch.optim.Adam(model.parameters(),
                                      lr=args.lr,
                                      weight_decay=args.l2)
    else:
        print("Not support optimizer")
        return

    # solver
    solver = Solver(data, model, optimizier, args)
    solver.train()
def main():

    os.system('cp -r ../Oracle "{0}"'.format(config.basePath + '/savedCode'))

    convtasnet_audio_with_asr_model = DataParallel(
        ConvTasNet(C=2, test_with_asr=True)).cuda()

    convtasnet_audio_without_asr_model = DataParallel(
        ConvTasNet(C=2, asr_addition=False)).cuda()
    dataloader = AVSpeech('test')
    dataloader = DataLoader(dataloader,
                            batch_size=config.batchsize['test'],
                            num_workers=config.num_workers['test'],
                            worker_init_fn=init_fn)
    loss_func = SISNRPIT()

    convtasnet_model = config.convtasnet_audio_model
    convtasnet_asr_model = [
        '/home/SharedData/Pragya/Experiments/Oracle/2020-05-20 15:23:34.411560/116662.pth'
    ]

    for conv_asr_test in convtasnet_asr_model:
        print('Currently working convtasnet on: ',
              convtasnet_model.split('/')[-1])
        print('Currently working E2ESpeechSaparation on: ',
              conv_asr_test.split('/')[-1])

        convtasnet_audio_without_asr_model.load_state_dict(
            torch.load(convtasnet_model)['model_state_dict'])
        convtasnet_audio_with_asr_model.load_state_dict(
            torch.load(conv_asr_test)['model_state_dict'])

        total_loss = test(convtasnet_audio_without_asr_model,
                          convtasnet_audio_with_asr_model, dataloader,
                          loss_func)

        torch.cuda.empty_cache()

        print('Average Loss for ',
              conv_asr_test.split('/')[-1], 'is: ', np.mean(total_loss))
def main(train_dir,batch_size,sample_rate, segment,valid_dir,cv_maxlen,shuffle,num_workers,N, L, B, H, P, X, R, C,norm_type, causal, mask_nonlinear,use_cuda,optimizer,lr,momentum,l2):
     # Construct Solver
    # data
    tr_dataset = AudioDataset(train_dir, batch_size,
                              sample_rate=sample_rate, segment=segment)
    cv_dataset = AudioDataset(valid_dir, batch_size=1,  # 1 -> use less GPU memory to do cv
                              sample_rate=sample_rate,
                              segment=-1, cv_maxlen=cv_maxlen)  # -1 -> use full audio
    tr_loader = AudioDataLoader(tr_dataset, batch_size=1,
                                shuffle=shuffle,
                                num_workers=num_workers)
    cv_loader = AudioDataLoader(cv_dataset, batch_size=1,
                                num_workers=0)
    data = {'tr_loader': tr_loader, 'cv_loader': cv_loader}
    # model
    model = ConvTasNet(N, L, B, H, P, X, R, C, 
                       norm_type=norm_type, causal=causal,
                       mask_nonlinear=mask_nonlinear)
    print(model)
    if use_cuda:
        model = torch.nn.DataParallel(model)
        model.cuda()
    # optimizer
    if optimizer == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(),
                                     lr=lr,
                                     momentum=momentum,
                                     weight_decay=l2)
    elif optimizer == 'adam':
     #fatemeh: change optimizier to optimizer
        optimizer = torch.optim.Adam(model.parameters(),
                                      lr=lr,
                                      weight_decay=l2)
    else:
        print("Not support optimizer")
        return

    # solver
    solver = Solver(data, model, optimizer, use_cuda,epochs,half_lr,early_stop,max_norm,save_folder,checkpoint,continue_from,model_path,print_freq,visdom,visdom_epoch,visdom_id)
    solver.train()
Ejemplo n.º 8
0
def main(args):
    # Construct Solver
    # data

    if args.continue_from == '':
        return
    ev_dataset = EvalAllDataset(args.train_dir,
                                args.mix_json,
                                args.batch_size,
                                sample_rate=args.sample_rate)

    ev_loader = EvalAllDataLoader(ev_dataset,
                                  batch_size=1,
                                  num_workers=args.num_workers)

    data = {'tr_loader': None, 'ev_loader': ev_loader}
    # SEP model
    sep_model = ConvTasNet(args.N,
                           args.L,
                           args.B,
                           args.H,
                           args.P,
                           args.X,
                           args.R,
                           args.C,
                           norm_type=args.norm_type,
                           causal=args.causal,
                           mask_nonlinear=args.mask_nonlinear)

    # ASR model
    asr_model = AttentionModel(args.NUM_HIDDEN_NODES, args.NUM_ENC_LAYERS,
                               args.NUM_CLASSES)
    #print(model)
    if args.use_cuda:
        sep_model = torch.nn.DataParallel(sep_model)
        asr_model = torch.nn.DataParallel(asr_model)
        sep_model.cuda()
        asr_model.cuda()
    # optimizer
    if args.optimizer == 'sgd':
        sep_optimizier = torch.optim.SGD(sep_model.parameters(),
                                         lr=args.lr,
                                         momentum=args.momentum,
                                         weight_decay=args.l2)
        asr_optimizier = torch.optim.SGD(asr_model.parameters(),
                                         lr=args.lr,
                                         momentum=args.momentum,
                                         weight_decay=args.l2)
    elif args.optimizer == 'adam':
        sep_optimizier = torch.optim.Adam(sep_model.parameters(),
                                          lr=args.lr,
                                          weight_decay=args.l2)
        asr_optimizier = torch.optim.Adam(asr_model.parameters(),
                                          lr=args.lr,
                                          weight_decay=args.l2)
    else:
        print("Not support optimizer")
        return

    # solver
    solver = Solver(data,
                    sep_model,
                    asr_model,
                    sep_optimizier,
                    asr_optimizier,
                    args,
                    DEVICE,
                    ev=True)
    solver.eval(args.EOS_ID)
Ejemplo n.º 9
0
    def __init__(self, training):
        super(ModelAccess, self).__init__()
        self.training = training
        self.main_config = ConfigTables()
        self.ioconfig = self.main_config.io_config
        self.dataconfig = self.main_config.data_config
        self.trainconfig = self.main_config.train_config
        self.modelconfig = self.main_config.model_config
        self.mulaw = self.dataconfig["mulaw"]
        self.audio_length = self.dataconfig["audio_length"]
        self.lr = self.trainconfig["lr"]
        self.batch_size = self.trainconfig["batch_size"]
        self.optimizer = self.trainconfig["optimizer"].lower()
        assert self.optimizer in ["sgd", "adma", "rmsprop"], "Not include other optimzier"
        if self.optimizer == "sgd":
            self.optimizer = tf.train.GradientDescentOptimizer(self.lr)
        elif self.optimizer == "adma":
            self.optimizer = tf.train.AdamOptimizer(self.lr)
        else:
            self.optimizer = tf.train.RMSPropOptimizer(self.lr)
        self.ckpt_dir = self.trainconfig["ckpt_dir"]
        self.epoches = self.trainconfig["epoches"]
        self.output_dir = self.dataconfig["output_dir"]
        os.makedirs(self.ckpt_dir, exist_ok=True)
        os.makedirs(self.output_dir, exist_ok=True)
        self.dataset = RecordMaker(training).dataset
        self.max_to_keep = self.trainconfig["max_to_keep"]
        self.filters_e = self.modelconfig["filters_e"]
        self.plot_pertire = self.trainconfig["plot_pertire"]
        self.kernel_size_e = self.modelconfig["kernel_size_e"]
        self.bottle_filter = self.modelconfig["bottle_filter"]
        self.filters_block = self.modelconfig["filters_block"]
        self.kernel_size_block = self.modelconfig["kernel_size_block"]
        self.num_conv_block = self.modelconfig["num_conv_block"]
        self.number_repeat = self.modelconfig["number_repeat"]
        self.spk_num = self.modelconfig["spk_num"]
        self.norm_type = self.modelconfig["norm_type"]
        self.causal = self.modelconfig["causal"]
        self.mask_nonlinear = self.modelconfig["mask_nonlinear"]
        self.savemodel_periter = self.trainconfig["savemodel_periter"]
        self.convtasnet = ConvTasNet(filters_e=self.filters_e,
                                     kernel_size_e=self.kernel_size_e,
                                     bottle_filter=self.bottle_filter,
                                     filters_block=self.filters_block,
                                     kernel_size_block=self.kernel_size_block,
                                     num_conv_block=self.num_conv_block,
                                     number_repeat=self.number_repeat,
                                     spk_num=self.spk_num,
                                     norm_type=self.norm_type,
                                     causal=self.causal,
                                     mask_nonlinear=self.mask_nonlinear,
                                     speech_length=self.audio_length)
        self.checkpoint = tf.train.Checkpoint(optimizer=self.optimizer,
                                              contasnet=self.convtasnet)
        self.ckpt_manager = tf.contrib.checkpoint.CheckpointManager(
            self.checkpoint,
            directory=self.ckpt_dir,
            max_to_keep=self.max_to_keep)

        self.checkpoint.restore(tf.train.latest_checkpoint(self.ckpt_dir))

        if training:
            self.train_epochs()
        else:
            print("finish load model!")
Ejemplo n.º 10
0
def separate(args):
    if args.mix_dir is None and args.mix_json is None:
        print("Must provide mix_dir or mix_json! When providing mix_dir, "
              "mix_json is ignored.")

    # Load model
    model = ConvTasNet(256,
                       20,
                       256,
                       512,
                       3,
                       8,
                       4,
                       2,
                       norm_type="gLN",
                       causal=0,
                       mask_nonlinear="relu")
    model.cuda()
    model.load_state_dict(torch.load(args.model_path)['sep_state_dict'])
    print(model)
    model.eval()

    # Load data
    eval_dataset = EvalDataset(args.mix_dir,
                               args.mix_json,
                               batch_size=args.batch_size,
                               sample_rate=args.sample_rate)
    eval_loader = EvalDataLoader(eval_dataset, batch_size=1)
    os.makedirs(args.out_dir, exist_ok=True)

    def write(inputs, filename, sr=args.sample_rate):
        #librosa.output.write_wav(filename, inputs, sr)# norm=True)
        #librosa.output.write_wav(filename, inputs, sr, norm=True)
        #print(inputs)
        inputs = inputs / max(np.abs(inputs))
        #print(inputs)

        sf.write(filename, inputs, sr)
        #sf.write(filename, inputs, sr, 'PCM_16')

    with torch.no_grad():
        for (i, data) in enumerate(eval_loader):
            # Get batch data
            mixture, mix_lengths, filenames = data
            if args.use_cuda:
                mixture, mix_lengths = mixture.cuda(), mix_lengths.cuda()
            # Forward
            estimate_source = model(mixture)  # [B, C, T]
            # Remove padding and flat
            flat_estimate = remove_pad(estimate_source, mix_lengths)
            mixture = remove_pad(mixture, mix_lengths)
            # Write result
            for i, filename in enumerate(filenames):
                filename = os.path.join(
                    args.out_dir,
                    os.path.basename(filename).strip('.wav'))
                write(mixture[i], filename + '.wav')
                C = flat_estimate[i].shape[0]
                for c in range(C):
                    write(flat_estimate[i][c],
                          filename + '_s{}.wav'.format(c + 1))
def get_initial_model_optimizer():
    """
    Loading pretrained model of convtasnet and ASR, for domain translation of asr's features
    to convtasnet, Domain translation block is used
    """
    from ETESpeechRecognition.model import E2E as ASR
    from domainTranslation import DomainTranslation
    import ETESpeechRecognition.config as asrConfig

    # loading convtasnet model
    trained_convtasnet_audio_model = torch.load(
        config.convtasnet_audio_model, map_location=torch.device('cpu'))

    convtasnet_audio_with_asr_model = DataParallel(ConvTasNet(C=2))

    model_state_dict = trained_convtasnet_audio_model['model_state_dict']

    # adding random weights to model for new block addition
    model_state_dict['module.separator.network.0.gamma'] = torch.cat([
        model_state_dict['module.separator.network.0.gamma'],
        torch.randn(size=[1, 512, 1])
    ],
                                                                     dim=1)
    model_state_dict['module.separator.network.0.beta'] = torch.cat([
        model_state_dict['module.separator.network.0.beta'],
        torch.randn(size=[1, 512, 1])
    ],
                                                                    dim=1)
    model_state_dict['module.separator.network.1.weight'] = torch.cat([
        model_state_dict['module.separator.network.1.weight'],
        torch.randn(size=[512, 512, 1])
    ],
                                                                      dim=1)

    convtasnet_audio_with_asr_model.load_state_dict(
        trained_convtasnet_audio_model['model_state_dict'])

    print('Total Parameters in ConvTasNet without ASR model: ',
          sum(p.numel() for p in convtasnet_audio_with_asr_model.parameters()))

    convtasnet_audio_with_asr_model.module.domainTranslation = DomainTranslation(
    )

    optimizer_init = torch.optim.Adam(
        convtasnet_audio_with_asr_model.parameters(), lr=config.lr[1])

    if config.use_cuda:
        convtasnet_audio_with_asr_model = convtasnet_audio_with_asr_model.cuda(
        )

    # Loading ASR model
    asr_model = DataParallel(
        ASR(idim=80, odim=5002, args=asrConfig.ModelArgs(), get_features=True))
    if config.use_cuda:
        trained_asr_model = torch.load(config.asr_model)
    else:
        trained_asr_model = torch.load(config.asr_model,
                                       map_location=torch.device('cpu'))

    asr_model.load_state_dict(trained_asr_model['model'])

    convtasnet_audio_with_asr_model.module.asr = asr_model

    print('Total Parameters in ConvTasNet with ASR model: ',
          sum(p.numel() for p in convtasnet_audio_with_asr_model.parameters()))

    return convtasnet_audio_with_asr_model, optimizer_init
Ejemplo n.º 12
0
        'P': args.P,
        'X': args.X,
        'R': args.R,
        'C': args.C,
        'norm_type': args.norm_type,
        'causal': args.causal,
        'mask_nonlinear': args.mask_nonlinear
    }

    train_args = {
        'lr': args.lr,
        'batch_size': args.batch_size,
        'epochs': args.epochs
    }

    model = ConvTasNet(**model_args)

    if args.evaluate == 0 and args.separate == 0:
        dataset = AudioDataset(args.data_dir, sr=args.sr, mode='train', seq_len=args.seq_len, verbose=0, voice_only=args.voice_only)

        print('DataLoading Done')

        train(model, dataset, **train_args)
    elif args.evaluate == 1:
        model.load_state_dict(torch.load(args.model, map_location='cpu'))

        dataset = AudioDataset(args.data_dir, sr=args.sr, mode='test', seq_len=args.seq_len, verbose=0, voice_only=args.voice_only)

        evaluate(model, dataset, args.batch_size, 0, args.cal_sdr)
    else:
        model.load_state_dict(torch.load(args.model, map_location='cpu'))