Ejemplo n.º 1
0
def main():
    in_arg = get_input_args()  # Creates and returns command line arguments

    print('\nPath To Image:\n', in_arg.path_to_image, '\n', '\nCheckpoint:\n',
          in_arg.checkpoint, '\n')

    print('Optional Command Line Arguments:\n', 'Top K [--top_k]: ',
          in_arg.top_k, '\n', 'Category Names [--category_names]: ',
          in_arg.category_names, '\n', 'GPU [--gpu]: ', in_arg.gpu, '\n')

    label_count, hidden_units, arch, class_to_idx, classifier_state_dict, epochs = mod.load_checkpoint(
        in_arg.checkpoint, in_arg.gpu)  # Load checkpoint

    model = mod.build_model(label_count, hidden_units, arch,
                            class_to_idx)  # Build model

    model.classifier.load_state_dict(classifier_state_dict)
    criterion = nn.NLLLoss()

    image = util.process_image(in_arg.path_to_image)  # Pre-process image

    labels = util.get_labels(
        in_arg.category_names)  # Get dict of categories mapped to real names

    mod.predict(image, model, labels, in_arg.top_k,
                in_arg.gpu)  # Prints Top K Labels and Probabilities
Ejemplo n.º 2
0
def main(_):

    if not os.path.exists(FLAGS.checkpoint_dir):
        print("Houston tengo un problem: No checkPoint directory found")
        return 0
    if not os.path.exists(FLAGS.feature_dir):
        os.makedirs(FLAGS.feature_dir)
    if not os.path.exists(FLAGS.sample_dir):
        os.makedirs(FLAGS.sample_dir)

    #with tf.device("/gpu:0"):
    with tf.device("/cpu:0"):

        x = tf.placeholder(tf.float32, [
            FLAGS.batch_size, FLAGS.output_size, FLAGS.output_size, FLAGS.c_dim
        ],
                           name='d_input_images')

        d_netx, Dx, Dfx = network.discriminator(x,
                                                is_train=FLAGS.is_train,
                                                reuse=False)

        saver = tf.train.Saver()

    with tf.Session() as sess:
        print("starting session")
        sess.run(tf.global_variables_initializer())

        model_dir = "%s_%s_%s" % (FLAGS.train_dataset, 64, FLAGS.output_size)
        save_dir = os.path.join(FLAGS.checkpoint_dir, model_dir)
        labels = utilities.get_labels(FLAGS.num_labels, FLAGS.labels_file)

        saver.restore(sess, save_dir)
        print("Model restored from file: %s" % save_dir)

        #extracting features from train dataset
        extract_features(x, labels, sess, Dfx)

        #extracting features from test dataset
        extract_features(x, labels, sess, Dfx, training=False)

    sess.close()
Ejemplo n.º 3
0
    def __init__(self, model, data_generator, taxonomy_level, cuda=True, 
        verbose=False):
        '''Evaluator to evaluate prediction performance. 
        
        Args: 
          model: object
          data_generator: object
          taxonomy_level: 'fine' | 'coarse'
          cuda: bool
          verbose: bool
        '''

        self.model = model
        self.data_generator = data_generator
        self.taxonomy_level = taxonomy_level
        self.cuda = cuda
        self.verbose = verbose
        
        self.frames_per_second = config.frames_per_second
        self.labels = get_labels(taxonomy_level)
Ejemplo n.º 4
0
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      holdout_fold: '1' | 'None', where '1' indicates using validation and 
          'None' indicates using full data for training
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    seq_len = 640
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = 10  # Number of mini-batches to evaluate on training data
    reduce_lr = True

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'validate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    _temp_submission_path = os.path.join(
        workspace, '_temp_submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv')
    create_folder(os.path.dirname(_temp_submission_path))

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))

    annotation_path = os.path.join(dataset_dir, 'annotations.csv')

    yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml')

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num, seq_len, mel_bins, cuda)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)
    print('cliqueNet parameters:',
          sum(param.numel() for param in model.parameters()))
    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          taxonomy_level=taxonomy_level,
                          cuda=cuda,
                          verbose=False)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 200 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}, {} level statistics:'.format(
                iteration, taxonomy_level))

            train_fin_time = time.time()

            # Evaluate on training data
            if mini_data:
                raise Exception('`mini_data` flag must be set to False to use '
                                'the official evaluation tool!')

            train_statistics = evaluator.evaluate(data_type='train',
                                                  max_iteration=None)

            # Evaluate on validation data
            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(
                    data_type='validate',
                    submission_path=_temp_submission_path,
                    annotation_path=annotation_path,
                    yaml_path=yaml_path,
                    max_iteration=None)

                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'fine_target', 'coarse_target']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        # Train
        model.train()
        batch_output = model(batch_data_dict['feature'])

        # loss
        batch_target = batch_data_dict['{}_target'.format(taxonomy_level)]
        loss = binary_cross_entropy(batch_output, batch_target)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Stop learning
        if iteration == 3000:
            break

        iteration += 1
Ejemplo n.º 5
0
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      holdout_fold: '1' | 'None', where '1' indicates using validation and 
          'None' indicates using full data for training
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    plt_x = []
    plt_y = []
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = 10  # Number of mini-batches to evaluate on training data
    reduce_lr = True

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'validate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    _temp_submission_path = os.path.join(
        workspace, '_temp_submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv')
    create_folder(os.path.dirname(_temp_submission_path))

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))

    annotation_path = os.path.join(dataset_dir, 'annotations.csv')

    yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml')

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num)

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          taxonomy_level=taxonomy_level,
                          cuda=cuda,
                          verbose=False)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0
    best_inde = {}
    best_inde['micro_auprc'] = np.array([0.0])
    best_inde['micro_f1'] = np.array([0.0])
    best_inde['macro_auprc'] = np.array([0.0])
    best_inde['average_precision'] = np.array([0.0])
    best_inde['sum'] = best_inde['micro_auprc'] + best_inde[
        'micro_f1'] + best_inde['macro_auprc']
    best_map = 0

    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 200 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}, {} level statistics:'.format(
                iteration, taxonomy_level))

            train_fin_time = time.time()

            # Evaluate on training data
            if mini_data:
                raise Exception('`mini_data` flag must be set to False to use '
                                'the official evaluation tool!')

            train_statistics = evaluator.evaluate(data_type='train',
                                                  max_iteration=None)
            if iteration > 5000:
                if best_map < np.mean(train_statistics['average_precision']):
                    best_map = np.mean(train_statistics['average_precision'])
                    logging.info('best_map= {}'.format(best_map))
                    # logging.info('iter= {}'.format(iteration))
                    checkpoint = {
                        'iteration': iteration,
                        'model': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'indicators': train_statistics
                    }
                    checkpoint_path = os.path.join(checkpoints_dir,
                                                   'best2.pth')
                    torch.save(checkpoint, checkpoint_path)
                    logging.info(
                        'best_models saved to {}'.format(checkpoint_path))

            # Evaluate on validation data
            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(
                    data_type='validate',
                    submission_path=_temp_submission_path,
                    annotation_path=annotation_path,
                    yaml_path=yaml_path,
                    max_iteration=None)

                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Save model
        if iteration % 1000 == 0 and iteration > 0:
            checkpoint = {
                'iteration': iteration,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict()
            }

            checkpoint_path = os.path.join(
                checkpoints_dir, '{}_iterations.pth'.format(iteration))

            torch.save(checkpoint, checkpoint_path)
            logging.info('Model saved to {}'.format(checkpoint_path))

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9

        # Move data to GPU
        for key in batch_data_dict.keys():
            if key in ['feature', 'fine_target', 'coarse_target', 'spacetime']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)

        feature, spacetime, targets_a, targets_b, lam = mixup_data(
            batch_data_dict['feature'],
            batch_data_dict['spacetime'],
            batch_data_dict['{}_target'.format(taxonomy_level)],
            alpha=1.0,
            use_cuda=True)

        # Train
        model.train()
        criterion = nn.BCELoss().cuda()
        batch_output = model(feature, spacetime)

        # loss
        #batch_target = batch_data_dict['{}_target'.format(taxonomy_level)]
        loss = mixup_criterion(criterion, batch_output, targets_a, targets_b,
                               lam)
        #loss = binary_cross_entropy(batch_output, batch_target)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if iteration % 100 == 0:
            plt_x.append(iteration)
            plt_y.append(loss.item())
        if iteration % 10000 == 0 and iteration != 0:
            plt.figure(1)
            plt.suptitle('test result ', fontsize='18')
            plt.plot(plt_x, plt_y, 'r-', label='loss')
            plt.legend(loc='best')
            plt.savefig(
                '/home/fangjunyan/count/' +
                time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) +
                '{}'.format(holdout_fold) + '{}.jpg'.format(taxonomy_level))
        # Stop learning
        if iteration == 10000:
            break

        iteration += 1
Ejemplo n.º 6
0
def inference_evaluation(args):
    '''Inference on evaluation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      iteration: int
      holdout_fold: 'none', which means using model trained on all development data
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    iteration = args.iteration
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    evaluate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'evaluate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoint_path = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, 'best2.pth')

    submission_path = os.path.join(
        workspace, 'submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'best2_submission.csv')
    create_folder(os.path.dirname(submission_path))

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    model = Model(classes_num)
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = TestDataGenerator(hdf5_path=evaluate_hdf5_path,
                                       scalar=scalar,
                                       batch_size=batch_size)

    # Forward
    output_dict = forward(model=model,
                          generate_func=data_generator.generate(),
                          cuda=cuda,
                          return_target=False)

    # Write submission
    write_submission_csv(audio_names=output_dict['audio_name'],
                         outputs=output_dict['output'],
                         taxonomy_level=taxonomy_level,
                         submission_path=submission_path)
Ejemplo n.º 7
0
def inference_validation(args):
    '''Inference and calculate metrics on validation data. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      iteration: int
      holdout_fold: '1', which means using validation data
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
      visualize: bool
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    iteration = args.iteration
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    visualize = args.visualize
    filename = args.filename

    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'validate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoint_path = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        '{}_iterations.pth'.format(iteration))

    submission_path = os.path.join(
        workspace, 'submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, 'submission.csv')
    create_folder(os.path.dirname(submission_path))

    annotation_path = os.path.join(dataset_dir, 'annotations.csv')

    yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml')

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_logging(logs_dir, 'w')
    logging.info(args)

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Load model
    Model = eval(model_type)
    model = Model(classes_num)
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model'])

    if cuda:
        model.cuda()

    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          taxonomy_level=taxonomy_level,
                          cuda=cuda,
                          verbose=True)

    # Evaluate on validation data
    evaluator.evaluate(data_type='validate',
                       submission_path=submission_path,
                       annotation_path=annotation_path,
                       yaml_path=yaml_path,
                       max_iteration=None)

    # Visualize
    if visualize:
        evaluator.visualize(data_type='validate')
Ejemplo n.º 8
0
    def progress():
        log.debug("Starting progress")
        starttime = time.process_time()
        labels = get_labels(data)
        log.info("So far seen {} examples".format(metadata.nseen))
        store.append(
            f"nseen",
            pd.Series(data=[metadata.nseen], index=[metadata.nprogress]))
        metadata.nprogress += 1
        assignments_window, accuracy_window = get_windows(
            metadata.nseen, progress_assignments_window,
            progress_accuracy_window)
        for name in population_names + input_population_names:
            log.debug(f"Progress for population {name}")
            subpop_e = name + "e"
            csc = store.select(f"cumulative_spike_counts/{subpop_e}")
            spikecounts_present = spike_counts_from_cumulative(
                csc,
                n_data,
                metadata.nseen,
                n_neurons[subpop_e],
                start=-accuracy_window)
            n_spikes_present = spikecounts_present["count"].sum()
            if n_spikes_present > 0:
                spikerates = (
                    spikecounts_present.groupby("i")["count"].mean().astype(
                        np.float32))
                # this reindex no longer necessary?
                spikerates = spikerates.reindex(np.arange(n_neurons[subpop_e]),
                                                fill_value=0)
                spikerates = add_nseen_index(spikerates, metadata.nseen)
                store.append(f"rates/{subpop_e}", spikerates)
                store.flush()
                fn = os.path.join(config.output_path,
                                  "spikerates-summary-{}.pdf".format(subpop_e))
                plot_rates_summary(store.select(f"rates/{subpop_e}"),
                                   filename=fn,
                                   label=subpop_e)
            if name in population_names:
                if not test_mode:
                    spikecounts_past = spike_counts_from_cumulative(
                        csc,
                        n_data,
                        metadata.nseen,
                        n_neurons[subpop_e],
                        end=-accuracy_window,
                        atmost=assignments_window,
                    )
                    n_spikes_past = spikecounts_past["count"].sum()
                    log.debug(
                        "Assignments based on {} spikes".format(n_spikes_past))
                    if name == "O":
                        assignments = pd.DataFrame({
                            "label":
                            np.arange(n_neurons[subpop_e], dtype=np.int32)
                        })
                    else:
                        assignments = get_assignments(spikecounts_past, labels)
                    assignments = add_nseen_index(assignments, metadata.nseen)
                    store.append(f"assignments/{subpop_e}", assignments)
                else:
                    assignments = store.select(f"assignments/{subpop_e}")
                if n_spikes_present == 0:
                    log.debug(
                        "No spikes in present interval - skipping accuracy estimate"
                    )
                else:
                    log.debug(
                        "Accuracy based on {} spikes".format(n_spikes_present))
                    predictions = get_predictions(spikecounts_present,
                                                  assignments, labels)
                    accuracy = get_accuracy(predictions, metadata.nseen)
                    store.append(f"accuracy/{subpop_e}", accuracy)
                    store.flush()
                    accuracy_msg = (
                        "Accuracy [{}]: {:.1f}%  ({:.1f}–{:.1f}% 1σ conf. int.)\n"
                        "{:.1f}% of examples have no prediction\n"
                        "Accuracy excluding non-predictions: "
                        "{:.1f}%  ({:.1f}–{:.1f}% 1σ conf. int.)")

                    log.info(
                        accuracy_msg.format(subpop_e, *accuracy.values.flat))
                    fn = os.path.join(config.output_path,
                                      "accuracy-{}.pdf".format(subpop_e))
                    plot_accuracy(store.select(f"accuracy/{subpop_e}"),
                                  filename=fn)
                    fn = os.path.join(config.output_path,
                                      "spikerates-{}.pdf".format(subpop_e))
                    plot_quantity(
                        spikerates,
                        filename=fn,
                        label=f"spike rate {subpop_e}",
                        nseen=metadata.nseen,
                    )
                theta = theta_to_pandas(subpop_e, neuron_groups,
                                        metadata.nseen)
                store.append(f"theta/{subpop_e}", theta)
                fn = os.path.join(config.output_path,
                                  "theta-{}.pdf".format(subpop_e))
                plot_quantity(
                    theta,
                    filename=fn,
                    label=f"theta {subpop_e} (mV)",
                    nseen=metadata.nseen,
                )
                fn = os.path.join(config.output_path,
                                  "theta-summary-{}.pdf".format(subpop_e))
                plot_theta_summary(store.select(f"theta/{subpop_e}"),
                                   filename=fn,
                                   label=subpop_e)
        if not test_mode or metadata.nseen == 0:
            for conn in config.save_conns:
                log.info(f"Saving connection {conn}")
                conn_df = connections_to_pandas(connections[conn],
                                                metadata.nseen)
                store.append(f"connections/{conn}", conn_df)
            for conn in config.plot_conns:
                log.info(f"Plotting connection {conn}")
                subpop = conn[-2:]
                if "O" in conn:
                    assignments = None
                else:
                    try:
                        assignments = store.select(
                            f"assignments/{subpop}",
                            where="nseen == metadata.nseen")
                        assignments = assignments.reset_index("nseen",
                                                              drop=True)
                    except KeyError:
                        assignments = None
                fn = os.path.join(config.output_path,
                                  "weights-{}.pdf".format(conn))
                plot_weights(
                    connections[conn],
                    assignments,
                    theta=None,
                    filename=fn,
                    max_weight=None,
                    nseen=metadata.nseen,
                    output=("O" in conn),
                    feedback=("O" in conn[:2]),
                    label=conn,
                )
            if monitoring:
                for km, vm in spike_monitors.items():
                    states = vm.get_states()
                    with open(
                            os.path.join(config.output_path,
                                         f"saved-spikemonitor-{km}.pickle"),
                            "wb",
                    ) as f:
                        pickle.dump(states, f)

                for km, vm in state_monitors.items():
                    states = vm.get_states()
                    with open(
                            os.path.join(config.output_path,
                                         f"saved-statemonitor-{km}.pickle"),
                            "wb",
                    ) as f:
                        pickle.dump(states, f)

        log.debug("progress took {:.3f} seconds".format(time.process_time() -
                                                        starttime))
Ejemplo n.º 9
0
def train(args):
    '''Training. Model will be saved after several iterations. 
    
    Args: 
      dataset_dir: string, directory of dataset
      workspace: string, directory of workspace
      taxonomy_level: 'fine' | 'coarse'
      model_type: string, e.g. 'Cnn_9layers_MaxPooling'
      holdout_fold: '1' | 'None', where '1' indicates using validation and 
          'None' indicates using full data for training
      batch_size: int
      cuda: bool
      mini_data: bool, set True for debugging on a small part of data
    '''

    # Arugments & parameters
    dataset_dir = args.dataset_dir
    workspace = args.workspace
    taxonomy_level = args.taxonomy_level
    model_type = args.model_type
    holdout_fold = args.holdout_fold
    batch_size = args.batch_size
    cuda = args.cuda and torch.cuda.is_available()
    mini_data = args.mini_data
    filename = args.filename
    plt_x = []
    plt_y = []
    T_max = 300
    mel_bins = config.mel_bins
    frames_per_second = config.frames_per_second
    max_iteration = 10  # Number of mini-batches to evaluate on training data
    reduce_lr = True

    labels = get_labels(taxonomy_level)
    classes_num = len(labels)

    def mixup_data(x1, x2, y, alpha=1.0, use_cuda=True):  # 数据增强,看下那个博客
        '''Returns mixed inputs, pairs of targets, and lambda'''
        if alpha > 0:
            lam = np.random.beta(alpha, alpha)  # 随机生成一个(1,1)的张量
        else:
            lam = 1
        #
        batch_size = x1.size()[0]
        if use_cuda:
            index = torch.randperm(
                batch_size).cuda()  # 给定参数n,返回一个从0到n-1的随机整数序列
        else:
            index = torch.randperm(batch_size)  # 使用cpu还是gpu

        mixed_x1 = lam * x1 + (1 - lam) * x1[index, :]
        mixed_x2 = lam * x2 + (1 - lam) * x2[index, :]  # 混合数据
        y_a, y_b = y, y[index]
        return mixed_x1, mixed_x2, y_a, y_b, lam

    def mixup_criterion(criterion, pred, y_a, y_b, lam):
        return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

    # Paths
    if mini_data:
        prefix = 'minidata_'
    else:
        prefix = ''

    train_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    validate_hdf5_path = os.path.join(
        workspace, 'features',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'validate.h5')

    scalar_path = os.path.join(
        workspace, 'scalars',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins), 'train.h5')

    checkpoints_dir = os.path.join(
        workspace, 'checkpoints', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(checkpoints_dir)

    _temp_submission_path = os.path.join(
        workspace, '_temp_submissions', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type, '_submission.csv')
    create_folder(os.path.dirname(_temp_submission_path))

    validate_statistics_path = os.path.join(
        workspace, 'statistics', filename,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type,
        'validate_statistics.pickle')
    create_folder(os.path.dirname(validate_statistics_path))
    loss_path = os.path.join(
        workspace, 'loss',
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)
    create_folder(loss_path)

    annotation_path = os.path.join(dataset_dir, 'annotations.csv')

    yaml_path = os.path.join(dataset_dir, 'dcase-ust-taxonomy.yaml')

    logs_dir = os.path.join(
        workspace, 'logs', filename, args.mode,
        '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second,
                                             mel_bins),
        'taxonomy_level={}'.format(taxonomy_level),
        'holdout_fold={}'.format(holdout_fold), model_type)

    create_logging(logs_dir, 'w')
    logging.info(args)

    if cuda:
        logging.info('Using GPU.')
    else:
        logging.info('Using CPU. Set --cuda flag to use GPU.')

    # Load scalar
    scalar = load_scalar(scalar_path)

    # Model
    Model = eval(model_type)
    model = Model(classes_num)
    logging.info(
        " Space_Duo_Cnn_9_Avg  多一层 258*258 不共用FC,必须带时空标签 用loss 监测,使用去零one hot "
    )

    if cuda:
        model.cuda()

    # Optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-08,
                           weight_decay=0.,
                           amsgrad=True)

    logging.info('model parm:{} '.format(
        sum(param.numel() for param in model.parameters())))
    #计算模型参数量

    # Data generator
    data_generator = DataGenerator(train_hdf5_path=train_hdf5_path,
                                   validate_hdf5_path=validate_hdf5_path,
                                   holdout_fold=holdout_fold,
                                   scalar=scalar,
                                   batch_size=batch_size)

    # Evaluator
    evaluator = Evaluator(model=model,
                          data_generator=data_generator,
                          taxonomy_level=taxonomy_level,
                          cuda=cuda,
                          verbose=False)

    # Statistics
    validate_statistics_container = StatisticsContainer(
        validate_statistics_path)

    train_bgn_time = time.time()
    iteration = 0
    best_inde = {}
    best_inde['micro_auprc'] = np.array([0.0])
    best_inde['micro_f1'] = np.array([0.0])
    best_inde['macro_auprc'] = np.array([0.0])
    best_inde['average_precision'] = np.array([0.0])
    best_inde['sum'] = best_inde['micro_auprc'] + best_inde[
        'micro_f1'] + best_inde['macro_auprc']
    last_loss1 = []
    last_loss2 = []
    last_loss = []
    best_map = 0
    # Train on mini batches
    for batch_data_dict in data_generator.generate_train():

        # Evaluate
        if iteration % 200 == 0:
            logging.info('------------------------------------')
            logging.info('Iteration: {}, {} level statistics:'.format(
                iteration, taxonomy_level))

            train_fin_time = time.time()

            # Evaluate on training data
            if mini_data:
                raise Exception('`mini_data` flag must be set to False to use '
                                'the official evaluation tool!')

            train_statistics = evaluator.evaluate(data_type='train',
                                                  max_iteration=None)
            if iteration > 5000:
                if best_map < np.mean(train_statistics['average_precision']):
                    best_map = np.mean(train_statistics['average_precision'])
                    logging.info('best_map= {}'.format(best_map))
                    # logging.info('iter= {}'.format(iteration))
                    checkpoint = {
                        'iteration': iteration,
                        'model': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'indicators': train_statistics
                    }
                    checkpoint_path = os.path.join(checkpoints_dir,
                                                   'best7.pth')
                    torch.save(checkpoint, checkpoint_path)
                    logging.info(
                        'best_models saved to {}'.format(checkpoint_path))

            # Evaluate on validation data
            if holdout_fold != 'none':
                validate_statistics = evaluator.evaluate(
                    data_type='validate',
                    submission_path=_temp_submission_path,
                    annotation_path=annotation_path,
                    yaml_path=yaml_path,
                    max_iteration=None)

                validate_statistics_container.append_and_dump(
                    iteration, validate_statistics)

            train_time = train_fin_time - train_bgn_time
            validate_time = time.time() - train_fin_time

            logging.info('Train time: {:.3f} s, validate time: {:.3f} s'
                         ''.format(train_time, validate_time))

            train_bgn_time = time.time()

        # Reduce learning rate
        if reduce_lr and iteration % 200 == 0 and iteration > 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.9
        batch_data2_dict = batch_data_dict.copy()
        n = []

        for i, l in enumerate(batch_data2_dict['coarse_target']):
            k = 0
            for j in range(0, 8):
                if l[j] > 0.6:
                    l[j] = 1
                else:
                    l[j] = 0
                    k += 1
                if k == 8:
                    if taxonomy_level == 'coarse':
                        n.append(i)

        for i, l in enumerate(batch_data2_dict['fine_target']):
            k = 0
            for j in range(0, 29):
                if l[j] > 0.6:
                    l[j] = 1
                else:
                    l[j] = 0
                    k += 1
                if k == 29:
                    if taxonomy_level == 'fine':
                        n.append(i)

        batch_data2_dict['fine_target'] = np.delete(
            batch_data2_dict['fine_target'], n, axis=0)
        batch_data2_dict['coarse_target'] = np.delete(
            batch_data2_dict['coarse_target'], n, axis=0)
        batch_data2_dict['audio_name'] = np.delete(
            batch_data2_dict['audio_name'], n, axis=0)
        batch_data2_dict['feature'] = np.delete(batch_data2_dict['feature'],
                                                n,
                                                axis=0)
        batch_data2_dict['spacetime'] = np.delete(
            batch_data2_dict['spacetime'], n, axis=0)
        if batch_data2_dict['audio_name'].size == 0:
            iteration += 1
            continue
        #使用 概率数据请注释下行,使用去零onehot数据不用注释
        batch_data_dict = batch_data2_dict

        # if iteration <8655:
        #      batch_data_dict = batch_data2_dict
        # elif iteration >=8655 and  iteration % 2 == 0:
        #     batch_data_dict = batch_data2_dict

        # Move data to GPU                                       ,'external_target','external_feature'
        for key in batch_data_dict.keys():
            if key in ['feature', 'fine_target', 'coarse_target', 'spacetime']:
                batch_data_dict[key] = move_data_to_gpu(
                    batch_data_dict[key], cuda)
        # Train
        model.train()
        # 使用mix_up  数据增强
        feature1, spacetime1, targets1_a, targets1_b, lam1 = mixup_data(
            batch_data_dict['feature'],
            batch_data_dict['spacetime'],
            batch_data_dict['fine_target'],
            alpha=1.0,
            use_cuda=True)
        feature2, spacetime2, targets2_a, targets2_b, lam2 = mixup_data(
            batch_data_dict['feature'],
            batch_data_dict['spacetime'],
            batch_data_dict['coarse_target'],
            alpha=1.0,
            use_cuda=True)
        batch_output1 = model.forward1(feature1, spacetime1)
        batch_output2 = model.forward2(feature2, spacetime2)
        lam1 = int(lam1)
        lam2 = int(lam2)
        loss1 = (lam1 * binary_cross_entropy(batch_output1, targets1_a) +
                 (1 - lam1) * binary_cross_entropy(batch_output1, targets1_b))
        loss2 = (lam2 * binary_cross_entropy(batch_output2, targets2_a) +
                 (1 - lam2) * binary_cross_entropy(batch_output2, targets2_b))

        #不使用mix_up  数据增强,请使用以下代码
        # batch_target1 = batch_data_dict['fine_target']
        # batch_output1 = model.forward1(batch_data_dict['feature'], batch_data_dict['spacetime'])
        # batch_target2 = batch_data_dict['coarse_target']
        # batch_output2 = model.forward2(batch_data_dict['feature'], batch_data_dict['spacetime'])
        # loss1 = binary_cross_entropy(batch_output1, batch_target1)
        # loss2 = binary_cross_entropy(batch_output2, batch_target2)

        loss = loss1 + loss2

        #使用loss监测请使用以下代码否者注释
        if iteration > 4320:
            new_loss = loss.item()
            if len(last_loss) < 5:
                last_loss.append(new_loss)
            else:
                cha = 0
                for i in range(4):
                    cha += abs(last_loss[i + 1] - last_loss[i])
                if new_loss > last_loss[4] and cha >= (new_loss -
                                                       last_loss[4]) > cha / 2:
                    for i in range(4):
                        last_loss[i] = last_loss[i + 1]
                    last_loss[4] = new_loss
                    logging.info(' drop iteration:{}'.format(iteration))
                    iteration += 1
                    continue
                elif new_loss > last_loss[4] and (new_loss -
                                                  last_loss[4]) > cha / 2.75:
                    for i in range(4):
                        last_loss[i] = last_loss[i + 1]
                    last_loss[4] = new_loss
                    logging.info(' low weightiteration:{}'.format(iteration))
                    loss = loss / 2

                else:
                    for i in range(4):
                        last_loss[i] = last_loss[i + 1]
                    last_loss[4] = new_loss

        # # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if iteration % 50 == 0:
            plt_x.append(iteration)
            plt_y.append(loss)

        if iteration % 13000 == 0 and iteration != 0:
            plt.figure(1)
            plt.suptitle('test result ', fontsize='18')
            plt.plot(plt_x, plt_y, 'r-', label='loss')
            plt.legend(loc='best')
            plt.savefig(
                loss_path + '/' +
                time.strftime('%m%d_%H%M%S', time.localtime(time.time())) +
                'loss.jpg')
            plt.savefig(loss_path + '/loss.jpg')

        # Stop learning
        if iteration == 13000:
            # logging.info("best_micro_auprc:{:.3f}".format(best_inde['micro_auprc']))
            # logging.info("best_micro_f1:{:.3f}".format(best_inde['micro_f1']))
            # logging.info("best_macro_auprc:{:.3f}".format(best_inde['macro_auprc']))
            # labels = get_labels(taxonomy_level)
            # for k, label in enumerate(labels):
            #     logging.info('    {:<40}{:.3f}'.format(label, best_inde['average_precision'][k]))
            break
        iteration += 1
Ejemplo n.º 10
0
def train_model(args):

    sr = args.sr
    hop_length = args.hop_length
    freq_res = args.freq_res
    representation = args.representation

    pt_weights = args.pt_weights
    network = args.network
    problem = args.problem
    cluster = args.cluster

    use_only_curated = args.use_only_curated
    epochs = args.epochs
    lr = args.lr
    batch_size = args.batch_size
    log = vars(args)

    path_dataset = '/home/edoardobucheli/Datasets/FSDKaggle2018'

    if sr == 16000:
        sample_rate = '16k'
        path_train = os.path.join(path_dataset, 'audio_train_16k')
        path_test = os.path.join(path_dataset, 'audio_test_16k')
    elif sr == 32000:
        sample_rate = '32k'
        path_train = os.path.join(path_dataset, 'audio_train_32k')
        path_test = os.path.join(path_dataset, 'audio_test_32k')
    else:
        print("Sample Rate option not available")
        exit()

    # Load label data
    train_data = pd.read_csv(
        os.path.join(path_dataset, 'train_post_competition.csv'))
    test_data = pd.read_csv(
        os.path.join(path_dataset, 'test_post_competition_scoring_clips.csv'))

    num_to_label, label_to_num, n_classes = get_all_classes_dict(train_data)
    label_to_meta, label_num_to_meta = get_classes_to_meta_dict(label_to_num)

    data_cur = train_data[train_data['manually_verified'] == 1]
    data_noi = train_data[train_data['manually_verified'] == 0]

    meta_labels_all, labels_all = get_labels(train_data, label_to_meta,
                                             label_to_num)
    meta_labels_cur, labels_cur = get_labels(data_cur, label_to_meta,
                                             label_to_num)
    meta_labels_noi, labels_noi = get_labels(data_noi, label_to_meta,
                                             label_to_num)
    meta_labels_test, labels_test = get_labels(test_data, label_to_meta,
                                               label_to_num)

    n_meta_classes = len(np.unique(meta_labels_all))

    # Load Data

    file_length = 64000
    frames = int(np.ceil(file_length / hop_length))

    if representation == 'WF':

        if problem == 'Cluster':
            experiment_name = '{}-C{}-{}-{}'.format(network, cluster,
                                                    sample_rate,
                                                    representation)
        elif problem == 'MC':
            experiment_name = '{}-MC-{}-{}'.format(network, sample_rate,
                                                   representation)

        pickle_train = './preprocessed_train/{}-{}-64k'.format(
            representation, sample_rate)
        pickle_test = './preprocessed_test/{}-{}-64k'.format(
            representation, sample_rate)
        input_shape = [
            file_length,
        ]
    else:

        if problem == 'Cluster':
            experiment_name = '{}-C{}-{}-{}-{}-HL{}'.format(
                network, cluster, sample_rate, representation, freq_res,
                hop_length)
        elif problem == 'MC':
            experiment_name = '{}-MC-{}-{}-{}-{}'.format(
                network, sample_rate, representation, freq_res, hop_length)

        pickle_train = './preprocessed_train/{}-{}-HL{}-WF{}-64k'.format(
            representation, freq_res, hop_length, sample_rate)
        pickle_test = './preprocessed_test/{}-{}-HL{}-WF{}-64k'.format(
            representation, freq_res, hop_length, sample_rate)
        input_shape = [freq_res, frames]

    with open(pickle_train, 'rb') as fp:
        x_train = pickle.load(fp)
    with open(pickle_test, 'rb') as fp:
        x_test = pickle.load(fp)

    if problem == 'Cluster':

        if use_only_curated:

            is_curated = train_data['manually_verified'].tolist()
            indx_curated = [i for i, f in enumerate(is_curated) if f == 1]
            x_cur = [x_train[f] for f in indx_curated]

            x_train_2, new_labels_train, mc_new_label_mapping = get_x_and_labels(
                x_cur, labels_cur, meta_labels_cur, cluster=cluster)
        else:
            x_train_2, new_labels_train, mc_new_label_mapping = get_x_and_labels(
                x_train, labels_all, meta_labels_all, cluster=cluster)

        x_test_2, new_labels_test, _ = get_x_and_labels(x_test,
                                                        labels_test,
                                                        meta_labels_test,
                                                        cluster=cluster)

        # Load Network
        model = load_network(network,
                             input_shape,
                             len(mc_new_label_mapping) + 1,
                             lr,
                             weights=pt_weights,
                             new_head=False,
                             train_only_head=False)

    elif problem == 'MC':

        x_train_2 = x_train
        new_labels_train = meta_labels_all
        x_test_2 = x_test
        new_labels_test = meta_labels_test

        model = load_network(network,
                             input_shape,
                             len(np.unique(new_labels_train)),
                             lr,
                             weights=pt_weights,
                             new_head=False,
                             train_only_head=False)

    model.compile(optimizer=Adam(lr),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.summary()

    # Make Split
    X_train, X_val, y_train, y_val = train_test_split(x_train_2,
                                                      new_labels_train,
                                                      test_size=0.1,
                                                      random_state=7,
                                                      shuffle=True)

    # Create Generators

    if representation == 'WF':
        train_generator = DataGeneratorWave(X_train,
                                            y_train,
                                            batch_size=batch_size,
                                            sr=sr,
                                            file_length=file_length)
        val_generator = DataGeneratorWave(X_val,
                                          y_val,
                                          batch_size=batch_size,
                                          sr=sr,
                                          file_length=file_length)
    else:
        train_generator = DataGenerator(X_train,
                                        y_train,
                                        batch_size=batch_size,
                                        freq_res=freq_res,
                                        frames=frames)
        val_generator = DataGenerator(X_val,
                                      y_val,
                                      batch_size=batch_size,
                                      freq_res=freq_res,
                                      frames=frames)

    # Train Model

    if problem == 'Cluster':

        version = 0
        while os.path.exists('./outputs/txt_logs/{}[{}].txt'.format(
                experiment_name, version)):
            version += 1

        best_filepath = './outputs/best_weights/{}[{}].h5'.format(
            experiment_name, version)

    elif problem == 'MC':

        version = 0
        while os.path.exists('./outputs_mc/txt_logs/{}[{}].txt'.format(
                experiment_name, version)):
            version += 1

        best_filepath = './outputs_mc/best_weights/{}[{}].h5'.format(
            experiment_name, version)

    checkpoint = ModelCheckpoint(best_filepath,
                                 monitor='val_acc',
                                 verbose=0,
                                 save_best_only=True,
                                 save_weights_only=True,
                                 mode='max')
    callbacks_list = [checkpoint]

    history_callback = model.fit_generator(train_generator,
                                           epochs=epochs,
                                           validation_data=val_generator,
                                           callbacks=callbacks_list)

    print('\n\nDone Training! Preparing Test\n\n')

    log2 = deepcopy(log)

    log2['acc_history'] = history_callback.history['acc']
    log2['val_acc_history'] = history_callback.history['val_acc']
    log2['loss_history'] = history_callback.history['loss']
    log2['val_loss_history'] = history_callback.history['val_loss']

    model.load_weights(best_filepath)

    if representation == 'WF':
        test_me = create_quick_test_wave(x_test_2, file_length)

    else:
        test_me = create_quick_test_2d(x_test_2, freq_res, frames)

    test_loss, test_acc = model.evaluate(test_me, new_labels_test)
    print("Test Accuracy: {}".format(test_acc))

    y_scores = model.predict(test_me)
    y_hat = np.argmax(y_scores, axis=1)

    log2['y_scores'] = y_scores
    log2['y_hat'] = y_hat
    log2['test_loss'] = test_loss
    log2['test_acc'] = test_acc

    log['test_loss'] = test_loss
    log['test_acc'] = test_acc
    #print(y_hat)

    if problem == 'Cluster':

        with open(
                './outputs/txt_logs/{}[{}].txt'.format(experiment_name,
                                                       version), 'w') as f:
            f.write(json.dumps(log, indent=4, separators=(',', ':')))

        with open(
                './outputs/pickle_logs/{}[{}].p'.format(
                    experiment_name, version), 'wb') as fp:
            pickle.dump(log2, fp)

        my_labels = list(mc_new_label_mapping.keys())

        labels = [num_to_label[f] for f in my_labels]
        labels.append('Unknown')

        if cluster == 2:
            xrotation = 90
        else:
            xrotation = 0

        plot_cm(new_labels_test,
                y_hat,
                figsize=(15, 15),
                labels=labels,
                xrotation=xrotation)
        plt.savefig('./outputs/confusion_matrices/{}[{}].eps'.format(
            experiment_name, version))
        #plt.show()

        model.save_weights('./outputs/weights/{}[{}].h5'.format(
            experiment_name, version))
        del (model)

    elif problem == 'MC':
        with open(
                './outputs_mc/txt_logs/{}[{}].txt'.format(
                    experiment_name, version), 'w') as f:
            f.write(json.dumps(log, indent=4, separators=(',', ':')))

        with open(
                './outputs_mc/pickle_logs/{}[{}].p'.format(
                    experiment_name, version), 'wb') as fp:
            pickle.dump(log2, fp)

        #my_labels = list(mc_new_label_mapping.keys())

        #labels = [num_to_label[f] for f in my_labels]
        #labels.append('Unknown')

        #if cluster == 2:
        #    xrotation = 90
        #else:
        #    xrotation = 0

        plot_cm(new_labels_test, y_hat, figsize=(15, 15))
        plt.savefig('./outputs_mc/confusion_matrices/{}[{}].eps'.format(
            experiment_name, version))
        #plt.show()

        model.save_weights('./outputs_mc/weights/{}[{}].h5'.format(
            experiment_name, version))
        del (model)