def train(model,
          training_generator,
          optimizer,
          mean_tr,
          std_tr,
          epoch,
          history,
          n_batches,
          n_sources,
          training_labels=''):
    model.train()
    bar = ChargingBar("Training for epoch: {}...".format(epoch),
                      max=n_batches)
    for batch_data in training_generator:
        (abs_tfs, masks) = batch_data
        if torch.cuda.is_available():
            input_tfs, index_ys = abs_tfs.cuda(), masks.cuda()
        else:
            input_tfs, index_ys = abs_tfs, masks
        # the input sequence is determined by time and not freqs
        # before: input_tfs = batch_size x (n_fft/2+1) x n_timesteps
        input_tfs = input_tfs.permute(0, 2, 1).contiguous()
        index_ys = index_ys.permute(0, 2, 1).contiguous()

        # normalize with mean and variance from the training dataset
        input_tfs -= mean_tr
        input_tfs /= std_tr

        if training_labels == 'raw_phase_diff':
            flatened_ys = index_ys.view(index_ys.size(0), -1, 1)
        else:
            # index_ys = index_ys.permute(0, 2, 1).contiguous()
            one_hot_ys = converters.one_hot_3Dmasks(index_ys,
                                                    n_sources)
            if torch.cuda.is_available(): 
                flatened_ys = one_hot_ys.view(one_hot_ys.size(0),
                                            -1,
                                            one_hot_ys.size(-1)).cuda()
            else:
                flatened_ys = one_hot_ys.view(one_hot_ys.size(0),
                                            -1,
                                            one_hot_ys.size(-1))

        optimizer.zero_grad()
        vs = model(input_tfs)


        loss = affinity_losses.paris_naive(vs, flatened_ys)

        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 100.)
        optimizer.step()

        update_history.values_update([('loss', loss)],
                                     history, update_mode='batch')
        bar.next()
    bar.finish()
Exemple #2
0
def train(args, model, training_generator, optimizer, mean_tr, std_tr, epoch,
          history, n_batches):
    model.train()
    timing_dic = {
        'Loading batch': 0.,
        'Transformations and Forward': 0.,
        'Loss Computation and Backprop': 0.
    }
    before = time.time()
    bar = ChargingBar("Training for epoch: {}...".format(epoch), max=n_batches)
    for batch_data in training_generator:
        abs_tfs, masks, wavs_lists, real_tfs, imag_tfs = batch_data
        timing_dic['Loading batch'] += time.time() - before
        before = time.time()
        input_tfs, index_ys = abs_tfs.cuda(), masks.cuda()
        # the input sequence is determined by time and not freqs
        # before: input_tfs = batch_size x (n_fft/2+1) x n_timesteps
        input_tfs = input_tfs.permute(0, 2, 1).contiguous()
        index_ys = index_ys.permute(0, 2, 1).contiguous()

        # normalize with mean and variance from the training dataset
        input_tfs -= mean_tr
        input_tfs /= std_tr

        # index_ys = index_ys.permute(0, 2, 1).contiguous()
        one_hot_ys = converters.one_hot_3Dmasks(index_ys, args.n_sources)

        optimizer.zero_grad()
        vs = model(input_tfs)

        flatened_ys = one_hot_ys.view(one_hot_ys.size(0), -1,
                                      one_hot_ys.size(-1)).cuda()

        timing_dic['Transformations and Forward'] += time.time() - \
                                                     before
        before = time.time()
        loss = affinity_losses.paris_naive(vs, flatened_ys)
        # loss = affinity_losses.diagonal(vs.view(vs.size(0),
        #                                         one_hot_ys.size(1),
        #                                         one_hot_ys.size(2),
        #                                         vs.size(-1)),
        #                                 one_hot_ys.cuda())

        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), 100.)
        optimizer.step()
        timing_dic['Loss Computation and Backprop'] += time.time() - \
                                                       before

        update_history.values_update([('loss', loss)],
                                     history,
                                     update_mode='batch')
        before = time.time()
        bar.next()
    bar.finish()

    pprint(timing_dic)
Exemple #3
0
def convergence_of_LSTM(args):
    visible_cuda_ids = ','.join(map(str, args.cuda_available_devices))
    os.environ["CUDA_VISIBLE_DEVICES"] = visible_cuda_ids

    (training_generator, mean_tr, std_tr, n_tr_batches) = \
        fast_data_gen.get_data_generator(args,
                                         return_stats=True)

    val_args = copy.copy(args)
    val_args.partition = 'val'
    val_generator, n_val_batches = \
        fast_data_gen.get_data_generator(val_args,
                                         get_top=args.n_eval)

    model = LSTM_enc.BLSTMEncoder(num_layers=args.n_layers,
                                  hidden_size=args.hidden_size,
                                  embedding_depth=args.embedding_depth,
                                  bidirectional=args.bidirectional)
    model = nn.DataParallel(model).cuda()

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 betas=(0.9, 0.999))

    k_means_obj = KMeans(n_clusters=2)
    # just iterate over the data
    history = {}
    for epoch in np.arange(args.epochs):

        train(args, model, training_generator, optimizer, mean_tr,
              std_tr, epoch, history, n_tr_batches)

        update_history.values_update([('loss', None)],
                                     history,
                                     update_mode='epoch')


        if epoch % args.evaluate_per == 0:
            eval(args, model, val_generator, mean_tr,
                 std_tr, epoch, history, n_val_batches, k_means_obj)

            update_history.values_update([('sdr', None),
                                          ('sir', None),
                                          ('sar', None)],
                                         history,
                                         update_mode='epoch')

        pprint(history['loss'][-1])
        pprint(history['sdr'][-1])
        pprint(history['sir'][-1])
        pprint(history['sar'][-1])
        print(
            "BEST SDR: {}, SIR: {}, SAR {}".format(max(history['sdr']),
                                                   max(history['sir']),
                                                   max(history['sar'])))
def eval(model,
         val_generator,
         mean_tr,
         std_tr,
         epoch,
         history,
         n_batches,
         k_means_obj,
         n_sources,
         batch_size):

    model.eval()
    with torch.no_grad():
        bar = ChargingBar("Evaluating for epoch: {}...".format(epoch),
                          max=n_batches*batch_size)
        for batch_data in val_generator:
            abs_tfs, wavs_lists, real_tfs, imag_tfs = batch_data
            if torch.cuda.is_available():
                input_tfs = abs_tfs.cuda()
            else:
                input_tfs = abs_tfs
            # the input sequence is determined by time and not freqs
            # before: input_tfs = batch_size x (n_fft/2+1) x n_timesteps
            input_tfs = input_tfs.permute(0, 2, 1).contiguous()

            # normalize with mean and variance from the training dataset
            input_tfs -= mean_tr
            input_tfs /= std_tr

            vs = model(input_tfs)
            for b in np.arange(vs.size(0)):

                embedding_features = vs[b, :, :].data.cpu().numpy()

                embedding_labels = np.array(k_means_obj.fit_predict(
                                            embedding_features))

                sdr, sir, sar = numpy_eval.naive_cpu_bss_eval(
                    embedding_labels,
                    real_tfs[b].data.numpy(),
                    imag_tfs[b].data.numpy(),
                    wavs_lists[b].data.numpy(),
                    n_sources,
                    batch_index=b)

                update_history.values_update([('sdr', sdr),
                                              ('sir', sir),
                                              ('sar', sar)],
                                             history,
                                             update_mode='batch')

                bar.next()
        bar.finish()
Exemple #5
0
def eval(args, model, val_generator, mean_tr, std_tr, epoch, history,
         n_batches):
    timing_dic = {
        'Loading batch': 0.,
        'Transformations and Forward': 0.,
        'BSS CPU evaluation': 0.,
        'Kmeans evaluation': 0.
    }
    r_kmeans = robust_kmeans.RobustKmeans(n_true_clusters=args.n_sources,
                                          n_used_clusters=args.n_sources)
    z_scaler = StandardScaler()

    # make some evaluation
    model.eval()
    before = time.time()
    with torch.no_grad():
        bar = ChargingBar("Evaluating for epoch: {}...".format(epoch),
                          max=n_batches)
        before = time.time()
        for batch_data in val_generator:
            (abs_tfs, real_tfs, imag_tfs, duet_masks, ground_truth_masks,
             sources_raw, amplitudes, n_sources) = batch_data
            timing_dic['Loading batch'] += time.time() - before
            before = time.time()
            input_tfs, index_ys = abs_tfs.cuda(), duet_masks.cuda()
            # the input sequence is determined by time and not freqs
            # before: input_tfs = batch_size x (n_fft/2+1) x n_timesteps
            input_tfs = input_tfs.permute(0, 2, 1).contiguous()

            # normalize with mean and variance from the training dataset
            input_tfs -= mean_tr
            input_tfs /= std_tr

            vs = model(input_tfs)
            for b in np.arange(vs.size(0)):
                embedding_features = z_scaler.fit_transform(
                    vs[b, :, :].data.cpu().numpy())

                embedding_labels = r_kmeans.fit(embedding_features)

                sdr, sir, sar = numpy_eval.naive_cpu_bss_eval(
                    embedding_labels, real_tfs[b].data.numpy(),
                    imag_tfs[b].data.numpy(), sources_raw[b].data.numpy(),
                    n_sources[0].data.numpy())

                update_history.values_update([('sdr', sdr), ('sir', sir),
                                              ('sar', sar)],
                                             history,
                                             update_mode='batch')

            before = time.time()
            bar.next()
        bar.finish()
Exemple #6
0
def eval(args,
         model,
         val_generator,
         mean_tr,
         std_tr,
         epoch,
         history,
         n_batches,
         k_means_obj):
    timing_dic = {'Standard Scaler': 0.,
                  'Kmeans': 0.,
                  'Dummy BSS evaluation': 0.}

    # make some evaluation
    model.eval()
    before = time.time()
    with torch.no_grad():
        bar = ChargingBar("Evaluating for epoch: {}...".format(epoch),
                          max=n_batches)
        before = time.time()
        for batch_data in val_generator:
            abs_tfs, masks, wavs_lists, real_tfs, imag_tfs = batch_data
            input_tfs = abs_tfs.cuda()
            # the input sequence is determined by time and not freqs
            # before: input_tfs = batch_size x (n_fft/2+1) x n_timesteps
            input_tfs = input_tfs.permute(0, 2, 1).contiguous()

            # normalize with mean and variance from the training dataset
            input_tfs -= mean_tr
            input_tfs /= std_tr

            vs = model(input_tfs)
            for b in np.arange(vs.size(0)):

                # possibly go into GPU ?
                # before = time.time()
                # embedding_features = z_scaler.fit_transform(
                #     vs[b, :, :].data.cpu().numpy())
                # timing_dic['Standard Scaler'] += time.time() - before

                embedding_features = vs[b, :, :].data.cpu().numpy()
                # embedding_features = masks[b, :, :].view(-1, 1).data.numpy()
                # embedding_labels = masks[b].data.numpy()
                # embedding_features = flatened_ys[b, :, :].data.cpu().numpy()



                # possibly perform kmeans on GPU?
                before = time.time()
                embedding_labels = np.array(k_means_obj.fit_predict(
                                            embedding_features))
                timing_dic['Kmeans'] += time.time() - before

                # possibly do it on GPU?
                before = time.time()
                sdr, sir, sar = numpy_eval.naive_cpu_bss_eval(
                    embedding_labels,
                    real_tfs[b].data.numpy(),
                    imag_tfs[b].data.numpy(),
                    wavs_lists[b].data.numpy(),
                    args.n_sources,
                    batch_index=b)
                timing_dic['Dummy BSS evaluation'] += time.time() - before

                update_history.values_update([('sdr', sdr),
                                              ('sir', sir),
                                              ('sar', sar)],
                                             history,
                                             update_mode='batch')

            bar.next()
        pprint(timing_dic)
        bar.finish()
def run_LSTM_experiment(args):
    visible_cuda_ids = ','.join(map(str, args.cuda_available_devices))
    os.environ["CUDA_VISIBLE_DEVICES"] = visible_cuda_ids

    (training_generator, mean_tr, std_tr, n_tr_batches, n_tr_sources) =\
    fast_data_gen.get_data_generator(args.train,
                                     partition='train',
                                     num_workers=args.num_workers,
                                     return_stats=True,
                                     get_top=args.n_train,
                                     batch_size=args.batch_size,
                                     return_n_batches=True,
                                     labels_mask=args.training_labels,
                                     return_n_sources=True)

    val_generator, n_val_batches, n_val_sources = \
    fast_data_gen.get_data_generator(args.val,
                                     partition='val',
                                     num_workers=args.num_workers,
                                     return_stats=False,
                                     get_top=args.n_val,
                                     batch_size=args.batch_size,
                                     return_n_batches=True,
                                     labels_mask=None,
                                     return_n_sources=True)

    model = LSTM_enc.BLSTMEncoder(num_layers=args.n_layers,
                                  hidden_size=args.hidden_size,
                                  embedding_depth=args.embedding_depth,
                                  bidirectional=args.bidirectional,
                                  dropout=args.dropout)
    if torch.cuda.is_available():
        model = nn.DataParallel(model).cuda()
    else:
        model = nn.DataParallel(model)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 betas=(0.9, 0.999))

    assert n_val_sources == n_tr_sources, "Number of sources in both " \
                                          "training and evaluation " \
                                          "should be equal while " \
                                          "training"
    k_means_obj = KMeans(n_clusters=n_tr_sources)
    # just iterate over the data
    history = {}
    for epoch in np.arange(args.epochs):

        train(model, training_generator, optimizer, mean_tr,
              std_tr, epoch, history, n_tr_batches, n_tr_sources,
              training_labels=args.training_labels)

        update_history.values_update([('loss', None)],
                                     history,
                                     update_mode='epoch')

        # added the second term so it will save the model on the last epoch
        if (epoch % args.eval_per == 0) or (epoch == (args.epochs - 1)):
            eval(model, val_generator, mean_tr, std_tr, epoch,
                 history, n_val_batches, k_means_obj, n_val_sources,
                 args.batch_size)

            update_history.values_update([('sdr', None),
                                          ('sir', None),
                                          ('sar', None)],
                                         history,
                                         update_mode='epoch')

            # keep track of best performances so far
            epoch_performance_dic = {
                'sdr': history['sdr'][-1],
                'sir': history['sir'][-1],
                'sar': history['sar'][-1]
            }
            update_history.update_best_performance(
                           epoch_performance_dic, epoch, history,
                           buffer_size=args.save_best)


            # save the model if it is one of the best according to SDR
            if (history['sdr'][-1] >=
                history['best_performances'][-1][0]['sdr']):
                dataset_id = os.path.basename(args.train)

                model_logger.save(model,
                                  optimizer,
                                  args,
                                  epoch,
                                  epoch_performance_dic,
                                  dataset_id,
                                  mean_tr,
                                  std_tr,
                                  training_labels=args.training_labels)


        pprint(history['loss'][-1])
        pprint(history['best_performances'])