def main():

    # gpu configuration
    toolkits.initialize_GPU(args)

    import model
    import generator

    # ==================================
    #       Get Train/Val.
    # ==================================
    trnlist, trnlb = toolkits.get_hike_datalist(
        meta_paths=args.train_meta_data_path,
        data_paths=args.train_data_path,
        mode=model_config['loss'])
    vallist, vallb = toolkits.get_hike_datalist(
        meta_paths=args.val_meta_data_path,
        data_paths=args.val_data_path,
        mode=model_config['loss'])

    input_length = int(args.audio_length * 25)
    num_class = len(score_rule)
    # construct the data generator.
    params = {
        'dim': (513, input_length, 1),
        'mp_pooler': toolkits.set_mp(processes=args.multiprocess),
        'nfft': 1024,
        'spec_len': input_length,
        'win_length': 1024,
        'hop_length': 640,
        'n_classes': num_class,
        'sampling_rate': 16000,
        'batch_size': model_config['batch_size'],
        'shuffle': True,
        'normalize': True,
        'loss': model_config['loss'],
        'data_format': args.data_format
    }

    # Datasets
    partition = {'train': trnlist.flatten(), 'val': vallist.flatten()}
    labels = {'train': trnlb.flatten(), 'val': vallb.flatten()}

    # Generators
    wandb.init(project='vgg_speaker')
    trn_gen = generator.DataGenerator(partition['train'], labels['train'],
                                      **params)
    val_gen = generator.DataGenerator(partition['val'], labels['val'],
                                      **params)
    network = model.vggvox_resnet2d_icassp(input_dim=params['dim'],
                                           num_class=params['n_classes'],
                                           mode='train',
                                           args=model_config)
    # # val data
    # val_data = [params['mp_pooler'].apply_async(ut.load_data,
    #                                 args=(ID, params['win_length'], params['sampling_rate'], params['hop_length'],
    #                                       params['nfft'], params['spec_len'], 'train', args.data_format)) for ID in partition['val']]
    # val_data = np.expand_dims(np.array([p.get() for p in val_data]), -1)

    # ==> load pre-trained model ???
    print(keras.backend.tensorflow_backend._get_available_gpus())

    if args.resume:
        print("Attempting to load", args.resume)
        if args.resume:
            if os.path.isfile(args.resume):
                network.load_weights(os.path.join(args.resume),
                                     by_name=True,
                                     skip_mismatch=True)
                print('==> successfully loading model {}.'.format(args.resume))
            else:
                raise ValueError("==> no checkpoint found at '{}'".format(
                    args.resume))

    print(network.summary())
    print('==> gpu {} is, training {} images, classes: 0-{} '
          'loss: {}, aggregation: {}, ohemlevel: {}'.format(
              args.gpu, len(partition['train']), np.max(labels['train']),
              model_config['loss'], model_config['aggregation_mode'],
              model_config['ohem_level']))

    model_path, log_path = set_path(args, model_config)
    normal_lr = keras.callbacks.LearningRateScheduler(step_decay)
    # tbcallbacks = keras.callbacks.TensorBoard(log_dir=log_path, histogram_freq=0, write_graph=True, write_images=False,
    #                                           update_freq=model_config['batch_size'] * 16)
    callbacks = [
        keras.callbacks.ModelCheckpoint(
            os.path.join(model_path, 'weights-{epoch:02d}-{loss:.3f}.h5'),
            monitor='loss',
            mode='min',
            save_best_only=True,
            period=20,
        ), normal_lr,
        WandbCallback()
    ]

    if model_config[
            'ohem_level'] > 1:  # online hard negative mining will be used
        candidate_steps = int(
            len(partition['train']) // model_config['batch_size'])
        iters_per_epoch = int(
            len(partition['train']) //
            (model_config['ohem_level'] * model_config['batch_size']))

        ohem_generator = generator.OHEM_generator(
            network, trn_gen, candidate_steps, model_config['ohem_level'],
            model_config['batch_size'], params['dim'], params['n_classes'])

        A = ohem_generator.next(
        )  # for some reason, I need to warm up the generator

        network.fit_generator(generator.OHEM_generator(
            network, trn_gen, iters_per_epoch, model_config['ohem_level'],
            model_config['batch_size'], params['dim'], params['n_classes']),
                              steps_per_epoch=iters_per_epoch,
                              epochs=model_config['epochs'],
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1)

    else:
        if model_config['loss'] != 'mse':
            network.fit_generator(trn_gen,
                                  steps_per_epoch=int(
                                      len(partition['train']) //
                                      model_config['batch_size']),
                                  epochs=model_config['epochs'],
                                  max_queue_size=10,
                                  validation_data=val_gen,
                                  validation_freq=1,
                                  callbacks=callbacks,
                                  use_multiprocessing=False,
                                  workers=1,
                                  verbose=1)
        else:
            network.fit_generator(trn_gen,
                                  steps_per_epoch=int(
                                      len(partition['train']) //
                                      model_config['batch_size']),
                                  epochs=model_config['epochs'],
                                  max_queue_size=10,
                                  validation_data=val_gen,
                                  validation_freq=1,
                                  callbacks=callbacks,
                                  use_multiprocessing=False,
                                  workers=1,
                                  verbose=1)
Example #2
0
def train_model(model, data_path, net_path, epochs=None, epoch_break=10, batch_size=32):
    print("Epochs: {}\nEpoch_break={}".format(epochs, epoch_break))
    name = __file__[:-4]
    
    #Store the results of training (i.e. the loss)
    results = []
    
    #Check if epochs is None, if so try to train until the loss of the trainingsset and the one of the testingset seperate by too much
    if epochs == None:
        raise NotImplementedError('Self stopping at overfitting is not implemented.')
        keepRunning = True
        curr_counter = 0
        
        #Keep training for the number of epochs specified in epoch_break
        while keepRunning:
            #Fit data to model
            model.fit(train_data, train_labels, epochs=epoch_break)
            
            curr_counter += epoch_break
            
            #Save after every training-cycle
            model.save(os.path.join(net_path, name + "_epoch_" + str(curr_counter) + ".hf5"))
            
            #Evaluate the net and store the values
            results.append([curr_counter, model.evaluate(train_data, train_labels), model.evaluate(test_data, test_labels)])
            
            #Train at least 5 times, after that keep training only if no overfitting happens
            if len(results) >= 5:
                start_index = int(curr_counter / epoch_break) - 1
                train_loss = [dat[1][0] for dat in results[start_index:start_index+5]]
                test_loss = [dat[2][0] for dat in results[start_index:start_index+5]]
                keepRunning = not evaluate_overfitting(train_loss, test_loss)
                
    else:
        #Check if epochs are a smiple multiple of epoch_break, meaning that it should train for an integer number of cycles
        if epochs % epoch_break == 0:
            ran = int(epochs / epoch_break)
        #If not, train one more cycle and train only for the left amount of epochs in the last cycle.
        else:
            ran = int(epochs / epoch_break) + 1
        
        #Count how many epochs have passed
        curr_counter = 0
        
        (train_data, train_labels), (test_data, test_labels) = get_formatted_data(data_path)
        
        training_generator = g.DataGenerator(train_data, train_labels, batch_size=batch_size)
        testing_generator = g.DataGenerator(test_data, test_labels, batch_size=batch_size)
        
        for i in range(ran):
            print("ran: {}\ni: {}".format(ran, i))
            #If epochs were not an integer multiple of epoch_break, the last training cycle has to be smaller
            if i == int(epochs / epoch_break):
                epoch_break = epochs - (ran - 1) * epoch_break
                #Handle the exception of epochs < epoch_break
                if epoch_break < 0:
                    epoch_break += epoch_break
            
            #Fit data to model
            model.fit_generator(generator=training_generator, epochs=epoch_break)
            
            #Iterate counter
            curr_counter += epoch_break
            print(curr_counter)
            
            #Store model after each training-cycle
            model.save(os.path.join(net_path, name + "_epoch_" + str(curr_counter) + ".hf5"))
            print("Stored net")
            
            #Evaluate the performance of the net after every cycle and store it.
            results.append([curr_counter, model.evaluate_generator(generator=training_generator), model.evaluate_generator(generator=testing_generator)])
            #print("Results: {}".format(results))
    
    #Save the results to a file.
    with open(os.path.join(net_path, name + '_results.json'), "w+") as FILE:
        json.dump(results, FILE, indent=4)
    
    return(model)
Example #3
0
def main():

    # gpu configuration
    toolkits.initialize_GPU(args)

    import model
    import generator

    # ==================================
    #       Get Train/Val.
    # ==================================
    trnlist, trnlb = toolkits.get_voxceleb2_datalist(args, path='../meta/voxlb2_train.txt')
    vallist, vallb = toolkits.get_voxceleb2_datalist(args, path='../meta/voxlb2_val.txt')

    # construct the data generator.
    params = {'dim': (257, 250, 1),
              'mp_pooler': toolkits.set_mp(processes=args.multiprocess),
              'nfft': 512,
              'spec_len': 250,
              'win_length': 400,
              'hop_length': 160,
              'n_classes': 5994,
              'sampling_rate': 16000,
              'batch_size': args.batch_size,
              'shuffle': True,
              'normalize': True,
              }

    # Datasets
    partition = {'train': trnlist.flatten(), 'val': vallist.flatten()}
    labels = {'train': trnlb.flatten(), 'val': vallb.flatten()}

    # Generators
    trn_gen = generator.DataGenerator(partition['train'], labels['train'], **params)
    network = model.vggvox_resnet2d_icassp(input_dim=params['dim'],
                                           num_class=params['n_classes'],
                                           mode='train', args=args)
    # ==> load pre-trained model ???
    mgpu = len(keras.backend.tensorflow_backend._get_available_gpus())

    if args.resume:
        print("Attempting to load", args.resume)
        if args.resume:
            if os.path.isfile(args.resume):
                if mgpu == 1:
                    # by_name=True, skip_mismatch=True
                    # https://github.com/WeidiXie/VGG-Speaker-Recognition/issues/46
                    network.load_weights(os.path.join(args.resume), by_name=True, skip_mismatch=True)
                else:
                    network.layers[mgpu + 1].load_weights(os.path.join(args.resume))
                print('==> successfully loading model {}.'.format(args.resume))
            else:
                print("==> no checkpoint found at '{}'".format(args.resume))

    print(network.summary())
    print('==> gpu {} is, training {} images, classes: 0-{} '
          'loss: {}, aggregation: {}, ohemlevel: {}'.format(args.gpu, len(partition['train']), np.max(labels['train']),
                                                            args.loss, args.aggregation_mode, args.ohem_level))

    model_path, log_path = set_path(args)
    normal_lr = keras.callbacks.LearningRateScheduler(step_decay)
    tbcallbacks = keras.callbacks.TensorBoard(log_dir=log_path, histogram_freq=0, write_graph=True, write_images=False,
                                              update_freq=args.batch_size * 16)
    callbacks = [keras.callbacks.ModelCheckpoint(os.path.join(model_path, 'weights-{epoch:02d}-{acc:.3f}.h5'),
                                                 monitor='loss',
                                                 mode='min',
                                                 save_best_only=True),
                 normal_lr, tbcallbacks]

    if args.ohem_level > 1:     # online hard negative mining will be used
        candidate_steps = int(len(partition['train']) // args.batch_size)
        iters_per_epoch = int(len(partition['train']) // (args.ohem_level*args.batch_size))

        ohem_generator = generator.OHEM_generator(network,
                                                  trn_gen,
                                                  candidate_steps,
                                                  args.ohem_level,
                                                  args.batch_size,
                                                  params['dim'],
                                                  params['n_classes']
                                                  )

        A = ohem_generator.next()   # for some reason, I need to warm up the generator

        network.fit_generator(generator.OHEM_generator(network, trn_gen, iters_per_epoch,
                                                       args.ohem_level, args.batch_size,
                                                       params['dim'], params['n_classes']),
                              steps_per_epoch=iters_per_epoch,
                              epochs=args.epochs,
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1)

    else:
        network.fit_generator(trn_gen,
                              steps_per_epoch=int(len(partition['train'])//args.batch_size),
                              epochs=args.epochs,
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1)
Example #4
0
def main():

    # gpu configuration
    toolkits.initialize_GPU(args)

    import model
    import generator

    # ==================================
    #       Get Train/Val.
    # ==================================
    feats_path = os.path.join(args.kaldi_data_dir, 'feats.scp')
    utt2spk_path = os.path.join(args.kaldi_data_dir, 'utt2spk')
    assert os.path.exists(feats_path), 'Path `{}` does not exists.'.format(feats_path)
    assert os.path.exists(utt2spk_path), 'Path `{}` does not exists.'.format(utt2spk_path)

    utt2ark = {}
    with open(feats_path) as f:
        for line in f:
            key, ark = line.split()
            if args.use_clean_only:
                if not is_clean(key):
                    continue
            ark, position = ark.split(':')
            utt2ark[key] = (key, ark, int(position))

    label2count, utt2label, label2int, label2utts = {}, {}, {}, {}
    with open(utt2spk_path) as f:
        for line in f:
            utt, label = line.split()
            if args.use_clean_only:
                if not is_clean(utt):
                    continue
            if label not in label2int:
                label2int[label] = len(label2int)
            label = label2int[label]
            utt2label[utt] = label
            if label not in label2count:
                label2count[label] = 0
            label2count[label] += 1
            if label not in label2utts:
                label2utts[label] = []
            label2utts[label].append(utt2ark[utt])

    # balancing classes
    trnlist, vallist, trnlb, vallb = [], [], [], []
    max_utts = max(label2count.values())
    for label in label2utts:
        # print('Balancing', label)
        validation_thr = label2count[label] * args.validation_ratio
        random.shuffle(label2utts[label])
        utts_array = np.array(label2utts[label])
        random_indexes = np.random.randint(low=0, high=label2count[label] - 1, size=max_utts)
        trn_indexes = random_indexes[random_indexes > validation_thr]
        val_indexes = random_indexes[random_indexes <= validation_thr]
        # print(np.max(trn_indexes), np.min(trn_indexes), np.max(val_indexes), np.min(val_indexes))
        trnlist.extend([(x[0], x[1], int(x[2])) for x in utts_array[trn_indexes]])
        trnlb.extend([label for x in range(len(trnlist))])
        # print(trnlist[:10], trnlb[:10])
        # 1/0
        vallist.extend([(x[0], x[1], int(x[2])) for x in utts_array[val_indexes]])
        vallb.extend([label for x in range(len(vallist))])

    # print(all_list[:10])
    # print(label2int)
    # print(label2count)
    # 1/0

    # label2val_count, trnlist, vallist, trnlb, vallb = {}, [], [], [], []
    # for utt in all_list:
    #     label = utt2label[utt[0]]
    #     if label not in label2val_count:
    #         label2val_count[label] = 0
    #     if label2val_count[label] <= label2count[label] * args.validation_ratio:
    #         # use for validation
    #         vallist.append(utt)
    #         vallb.append(label)
    #         label2val_count[label] += 1
    #     else:
    #         # use for training
    #         trnlist.append(utt)
    #         trnlb.append(label)

    # trnlb = keras.utils.to_categorical(trnlb)
    # vallb = keras.utils.to_categorical(vallb)

    # construct the data generator.
    params = {
        'dim': (args.num_dim, 250, 1),
        'mp_pooler': toolkits.set_mp(processes=4 * len(args.gpu.split(',')) + 1),
        'nfft': 512,
        'spec_len': 250,
        'win_length': 400,
        'hop_length': 160,
        'n_classes': len(label2count),
        'sampling_rate': 16000,
        'batch_size': args.batch_size,
        'shuffle': True,
        'normalize': True,
        'use_clean_only': args.use_clean_only
    }

    # Datasets
    partition = {'train': trnlist, 'val': vallist}
    labels = {'train': np.array(trnlb), 'val': np.array(vallb)}

    # Generators
    trn_gen = generator.DataGenerator(partition['train'], labels['train'], **params)
    val_gen = generator.DataGenerator(partition['val'], labels['val'], **params)
    network = model.vggvox_resnet2d_icassp(input_dim=params['dim'],
                                           num_class=params['n_classes'],
                                           mode='train', args=args)

    # ==> load pre-trained model ???
    mgpu = len(keras.backend.tensorflow_backend._get_available_gpus())
    if args.resume:
        if os.path.isfile(args.resume):
            if mgpu == 1: network.load_weights(os.path.join(args.resume))
            else: network.layers[mgpu + 1].load_weights(os.path.join(args.resume))
            print('==> successfully loading model {}.'.format(args.resume))
        else:
            print("==> no checkpoint found at '{}'".format(args.resume))

    print(network.summary())
    print('==> gpu {} is, training {} features, validating {} features, classes: 0-{} '
          'loss: {}, aggregation: {}, ohemlevel: {}'.format(args.gpu, len(partition['train']),
                                                            len(partition['val']), np.max(labels['train']),
                                                            args.loss, args.aggregation_mode, args.ohem_level))

    model_path, log_path = set_path(args)
    normal_lr = keras.callbacks.LearningRateScheduler(step_decay)
    tbcallbacks = keras.callbacks.TensorBoard(log_dir=log_path, histogram_freq=0, write_graph=True, write_images=False,
                                              update_freq=args.batch_size * 16)
    callbacks = [keras.callbacks.ModelCheckpoint(os.path.join(model_path, 'weights-{epoch:02d}-{acc:.3f}.h5'),
                                                 monitor='loss',
                                                 mode='min',
                                                 save_best_only=True),
                 normal_lr, tbcallbacks]

    if args.ohem_level > 1:     # online hard negative mining will be used
        candidate_steps = int(len(partition['train']) // args.batch_size)
        iters_per_epoch = int(len(partition['train']) // (args.ohem_level*args.batch_size))

        ohem_generator = generator.OHEM_generator(network,
                                                  trn_gen,
                                                  candidate_steps,
                                                  args.ohem_level,
                                                  args.batch_size,
                                                  params['dim'],
                                                  params['n_classes']
                                                  )

        A = ohem_generator.next()   # for some reason, I need to warm up the generator

        network.fit_generator(generator.OHEM_generator(network, trn_gen, iters_per_epoch,
                                                       args.ohem_level, args.batch_size,
                                                       params['dim'], params['n_classes']),
                              steps_per_epoch=iters_per_epoch,
                              epochs=args.epochs,
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1,
                              validation_data=val_gen,
                              validation_steps=int(len(vallist) // args.batch_size))

    else:
        network.fit_generator(trn_gen,
                              steps_per_epoch=int(len(partition['train'])//args.batch_size),
                              epochs=args.epochs,
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1,
                              validation_data=val_gen,
                              validation_steps=int(len(vallist) // args.batch_size))
Example #5
0
def main():

    # gpu configuration
    toolkits.initialize_GPU(args)

    import model
    import generator

    # ==================================
    #       Get Train/Val.
    # ==================================
    trnlist, trnlb = toolkits.get_voxceleb2_datalist(
        args, path='../meta/voxlb2_train.txt')
    vallist, vallb = toolkits.get_voxceleb2_datalist(
        args, path='../meta/voxlb2_val.txt')

    # construct the data generator.
    params = {
        'dim': (257, 250, 1),
        'mp_pooler': toolkits.set_mp(processes=args.multiprocess),
        'nfft': 512,
        'spec_len': 250,
        'win_length': 400,
        'hop_length': 160,
        'n_classes': 5994,
        'sampling_rate': 16000,
        'batch_size': args.batch_size,
        'shuffle': True,
        'normalize': True,
    }

    # Datasets
    #The Flatten layer is a utility layer that flattens an input of shape n * c * h * w to a simple vector output of shape n * (c*h*w)
    partition = {'train': trnlist.flatten(), 'val': vallist.flatten()}
    # print("partition is: ",partition)
    labels = {'train': trnlb.flatten(), 'val': vallb.flatten()}

    # Generators
    #make data
    trn_gen = generator.DataGenerator(partition['train'], labels['train'],
                                      **params)
    # create model depend on args
    network = model.vggvox_resnet2d_icassp(input_dim=params['dim'],
                                           num_class=params['n_classes'],
                                           mode='train',
                                           args=args)

    # ==> load pre-trained model ???
    mgpu = len(keras.backend.tensorflow_backend._get_available_gpus())
    if args.resume:
        if os.path.isfile(args.resume):
            if mgpu == 1: network.load_weights(os.path.join(args.resume))
            else:
                network.layers[mgpu + 1].load_weights(os.path.join(
                    args.resume))
            print('==> successfully loading model {}.'.format(args.resume))
        else:
            print("==> no checkpoint found at '{}'".format(args.resume))

    print(network.summary())
    print('==> gpu {} is, training {} images, classes: 0-{} '
          'loss: {}, aggregation: {}, ohemlevel: {}'.format(
              args.gpu, len(partition['train']), np.max(labels['train']),
              args.loss, args.aggregation_mode, args.ohem_level))

    model_path, log_path = set_path(args)
    normal_lr = keras.callbacks.LearningRateScheduler(step_decay)
    # tbcallbacks = keras.callbacks.TensorBoard(log_dir=log_path, histogram_freq=0, write_graph=True, write_images=False,
    #   update_freq=args.batch_size * 16)
    '''
    Keras callbacks return information from a training algorithm while training is taking place. ... 
    A callback is a set of functions to be applied at given stages of the training procedure. 
    You can use callbacks to get a view on internal states and statistics of the model during trainin
    '''
    # callbacks = [keras.callbacks.ModelCheckpoint(os.path.join(model_path, 'weights-{epoch:02d}-{acc:.3f}.h5'),
    #                                              monitor='loss',
    #                                              mode='min',
    #                                              save_best_only=True),
    #              normal_lr, tbcallbacks]

    if args.ohem_level > 1:  # online hard negative mining will be used

        candidate_steps = int(len(partition['train']) // args.batch_size)
        iters_per_epoch = int(
            len(partition['train']) // (args.ohem_level * args.batch_size))

        ohem_generator = generator.OHEM_generator(
            network, trn_gen, candidate_steps, args.ohem_level,
            args.batch_size, params['dim'], params['n_classes'])

        A = ohem_generator.next(
        )  # for some reason, I need to warm up the generator

        network.fit_generator(generator.OHEM_generator(
            network, trn_gen, iters_per_epoch, args.ohem_level,
            args.batch_size, params['dim'], params['n_classes']),
                              steps_per_epoch=iters_per_epoch,
                              epochs=args.epochs,
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1)

    else:
        print("steps_per_epoch=", int(len(partition['train'])), "   ",
              args.batch_size)
        print(trn_gen)
        print(network)
        print("epochs=", args.epochs, "--------")
        print("***************", args.batch_size)
        network.fit_generator(
            trn_gen,
            steps_per_epoch=2,  #int(len(partition['train'])//args.batch_size),
            epochs=args.epochs,
            max_queue_size=2,
            #   callbacks=callbacks,
            use_multiprocessing=True,
            workers=1,
            verbose=1)  #should change to one?
        print("end!")
Example #6
0
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    # config = tf.ConfigProto()
    # config.gpu_options.allow_growth = True
    # _ = tf.Session(config=config)

    # construct the data generator.
    params = {
        'dim': (257, 250, 1),
        'mp_pooler': utils.set_mp(processes=args.multiprocess),
        'nfft': 512,
        'spec_len': 250,
        'win_length': 400,
        'hop_length': 160,
        'n_classes': args.n_classes,
        'sampling_rate': 16000,
        'batch_size': args.batch_size,
        'shuffle': True,
        'normalize': True,
    }

    if args.train_data is None and args.val_data is None:
        # Datasets
        trnlist, trnlb = utils.get_voxceleb2_datalist(path=args.train_list)
        vallist, vallb = utils.get_voxceleb2_datalist(path=args.val_list)
        partition = {'train': trnlist.flatten(), 'val': vallist.flatten()}
        labels = {'train': trnlb.flatten(), 'val': vallb.flatten()}
        # Generators
        trn_gen = generator.DataGenerator(partition['train'],
                                          labels=labels['train'],
                                          **params)
        val_gen = generator.DataGenerator(partition['val'],
                                          labels=labels['val'],
                                          **params)
        image_len = len(partition['train'])
    else:
        trainAudioData = AudioData(args.train_data)
        testAudioData = AudioData(args.val_data)
        trn_gen = generator.DataGenerator(list(trainAudioData.get_keys()),
                                          audioData=trainAudioData,
                                          **params)
        val_gen = generator.DataGenerator(list(testAudioData.get_keys()),
                                          audioData=testAudioData,
                                          **params)
        image_len = len(list(trainAudioData.get_keys()))

    network = model.vggvox_resnet2d_icassp(input_dim=params['dim'],
                                           num_class=params['n_classes'],
                                           mode='train',
                                           args=args)

    # ==> load pre-trained model
    mgpu = len(keras.backend.tensorflow_backend._get_available_gpus())
    initial_epoch = 0
    if args.resume:
        if os.path.isfile(args.resume):
            if mgpu == 1:
                network.load_weights(os.path.join(args.resume))
            else:
                network.layers[mgpu + 1].load_weights(os.path.join(
                    args.resume))
            initial_epoch = int(os.path.basename(args.resume).split('-')[1])
            print('==> successfully loading model {}.'.format(args.resume))
        else:
            print("==> no checkpoint found at '{}'".format(args.resume))

    print(network.summary())
    print(
        '==> gpu {} is, training {} images, classes: 0-{} loss: {}, aggregation: {}'
        .format(args.gpu, image_len, args.n_classes, args.loss,
                args.aggregation_mode))

    model_path, log_path = set_path(args)
    normal_lr = keras.callbacks.LearningRateScheduler(step_decay)
    tbcallbacks = keras.callbacks.TensorBoard(log_dir=log_path,
                                              histogram_freq=0,
                                              write_graph=True,
                                              write_images=False,
                                              update_freq=args.batch_size * 16)
    callbacks = [
        keras.callbacks.ModelCheckpoint(os.path.join(
            model_path, 'weights-{epoch:02d}-{acc:.3f}.h5'),
                                        monitor='loss',
                                        mode='min',
                                        save_best_only=True), normal_lr,
        tbcallbacks
    ]

    network.fit_generator(generator=trn_gen,
                          steps_per_epoch=int(image_len // args.batch_size),
                          epochs=args.epochs,
                          initial_epoch=initial_epoch,
                          max_queue_size=10,
                          callbacks=callbacks,
                          use_multiprocessing=True,
                          validation_data=val_gen,
                          workers=4,
                          verbose=1)