Exemple #1
0
def test(args):

    data_dir = args.data_dir
    workspace = args.workspace
    #mini_data = args.mini_data
    balance_type = args.balance_type
    #learning_rate = args.learning_rate
    filename = args.filename
    model_type = args.model_type
    model = args.model
    #batch_size = args.batch_size

    # Test data
    test_hdf5_path = os.path.join(data_dir, "eval1.h5")
    (test_x, test_y, test_id_list) = utilities.load_data(test_hdf5_path)

    # Output directories
    sub_dir = os.path.join(filename, 'balance_type={}'.format(balance_type),
                           'model_type={}'.format(model_type))

    models_dir = os.path.join(workspace, "models", sub_dir)
    utilities.create_folder(models_dir)

    stats_dir = os.path.join(workspace, "stats", sub_dir)
    utilities.create_folder(stats_dir)

    probs_dir = os.path.join(workspace, "probs", sub_dir)
    utilities.create_folder(probs_dir)

    iteration = 1200

    # Optimization method
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-07)

    #Loading model..

    PATH = os.path.join(models_dir,
                        "md_{}_iters_300batchsize.tar".format(iteration))
    checkpoint = torch.load(PATH)
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])

    logging.info("Training data shape: {}".format(test_x.shape))
    logging.info("Training data shape: {}".format(test_y.shape))

    logging.info("Test statistics:")
    evaluate(model=model,
             input=test_x,
             target=test_y,
             stats_dir=os.path.join(stats_dir, "test"),
             probs_dir=os.path.join(probs_dir, "test"),
             iteration=iteration)

    print('ready')
def organise_data(config,
                  logger,
                  file,
                  database,
                  min_samples,
                  learning_procedure,
                  mode_label='train'):
    amcs = config.EXPERIMENT_DETAILS['AUDIO_MODE_IS_CONCAT_NOT_SHORTEN']
    freq_bins = config.EXPERIMENT_DETAILS['FREQ_BINS']
    feature_exp = config.EXPERIMENT_DETAILS['FEATURE_EXP']
    convert_to_im = config.EXPERIMENT_DETAILS['CONVERT_TO_IMAGE']
    feature_dim = config.EXPERIMENT_DETAILS['FEATURE_DIMENSIONS']
    window_size = config.WINDOW_SIZE
    hop = config.HOP_SIZE
    labels = util.load_labels(file)
    features = util.load_data(database, labels)

    seconds_segment = determine_seconds_segment(
        config.EXPERIMENT_DETAILS['SECONDS_TO_SEGMENT'], feature_dim,
        window_size, hop, learning_procedure, feature_exp)

    features, labels, loc = process_data(amcs, freq_bins, features, labels,
                                         mode_label, min_samples, logger,
                                         seconds_segment, convert_to_im,
                                         feature_exp)

    if learning_procedure == 'chunked_file':
        feat_shape = features.shape
        features, loc = group_data(features, feat_shape, feature_dim,
                                   freq_bins, convert_to_im)

    index = [0, 0]
    class_weights = [1, 1]
    zeros = ones = 0
    if mode_label == 'dev':
        _, zero_index, _, one_index, class_weights = data_info(
            labels, mode_label, logger, config)
        index = [zero_index, one_index]

    return features, labels, index, loc, (zeros, ones, class_weights)
Exemple #3
0
def train(args):
    """Train a model.
    """

    data_dir = args.data_dir
    workspace = args.workspace
    mini_data = args.mini_data
    balance_type = args.balance_type
    learning_rate = args.learning_rate
    filename = args.filename
    model_type = args.model_type
    model = args.model
    batch_size = args.batch_size

    # Path of hdf5 data
    bal_train_hdf5_path = os.path.join(data_dir, "bal_train.h5")
    unbal_train_hdf5_path = os.path.join(data_dir, "unbal_train.h5")
    test_hdf5_path = os.path.join(data_dir, "eval.h5")

    # Load data
    load_time = time.time()

    if mini_data:
        # Only load balanced data
        (bal_train_x, bal_train_y,
         bal_train_id_list) = utilities.load_data(bal_train_hdf5_path)

        train_x = bal_train_x
        train_y = bal_train_y
        train_id_list = bal_train_id_list

    else:
        # Load both balanced and unbalanced data
        (bal_train_x, bal_train_y,
         bal_train_id_list) = utilities.load_data(bal_train_hdf5_path)

        (unbal_train_x, unbal_train_y,
         unbal_train_id_list) = utilities.load_data(unbal_train_hdf5_path)

        train_x = np.concatenate((bal_train_x, unbal_train_x))
        train_y = np.concatenate((bal_train_y, unbal_train_y))
        train_id_list = bal_train_id_list + unbal_train_id_list

    # Test data
    (test_x, test_y, test_id_list) = utilities.load_data(test_hdf5_path)

    logging.info("Loading data time: {:.3f} s".format(time.time() - load_time))
    logging.info("Training data shape: {}".format(train_x.shape))

    # Optimization method
    optimizer = Adam(lr=learning_rate)
    model.compile(loss='binary_crossentropy', optimizer=optimizer)

    # Output directories
    sub_dir = os.path.join(filename, 'balance_type={}'.format(balance_type),
                           'model_type={}'.format(model_type))

    models_dir = os.path.join(workspace, "models", sub_dir)
    utilities.create_folder(models_dir)

    stats_dir = os.path.join(workspace, "stats", sub_dir)
    utilities.create_folder(stats_dir)

    probs_dir = os.path.join(workspace, "probs", sub_dir)
    utilities.create_folder(probs_dir)

    # Data generator
    if balance_type == 'no_balance':
        DataGenerator = data_generator.VanillaDataGenerator

    elif balance_type == 'balance_in_batch':
        DataGenerator = data_generator.BalancedDataGenerator

    else:
        raise Exception("Incorrect balance_type!")

    train_gen = DataGenerator(x=train_x,
                              y=train_y,
                              batch_size=batch_size,
                              shuffle=True,
                              seed=1234)

    iteration = 0
    call_freq = 1000
    train_time = time.time()

    for (batch_x, batch_y) in train_gen.generate():

        # Compute stats every several interations
        if iteration % call_freq == 0:

            logging.info("------------------")

            logging.info("Iteration: {}, train time: {:.3f} s".format(
                iteration,
                time.time() - train_time))

            logging.info("Balance train statistics:")
            evaluate(model=model,
                     input=bal_train_x,
                     target=bal_train_y,
                     stats_dir=os.path.join(stats_dir, 'bal_train'),
                     probs_dir=os.path.join(probs_dir, 'bal_train'),
                     iteration=iteration)

            logging.info("Test statistics:")
            evaluate(model=model,
                     input=test_x,
                     target=test_y,
                     stats_dir=os.path.join(stats_dir, "test"),
                     probs_dir=os.path.join(probs_dir, "test"),
                     iteration=iteration)

            train_time = time.time()

        # Update params
        (batch_x, batch_y) = utilities.transform_data(batch_x, batch_y)
        model.train_on_batch(x=batch_x, y=batch_y)

        iteration += 1

        # Save model
        save_out_path = os.path.join(models_dir,
                                     "md_{}_iters.h5".format(iteration))
        model.save(save_out_path)

        # Stop training when maximum iteration achieves
        if iteration == 50001:
            break
def train(args):
    """Train a model.
    """

    data_dir = args.data_dir
    workspace = args.workspace
    mini_data = args.mini_data
    balance_type = args.balance_type
    learning_rate = args.learning_rate
    filename = args.filename
    model_type = args.model_type
    model = args.model
    batch_size = args.batch_size
    cuda = True

    # Move model to gpu
    if cuda:
        model.cuda()

    # Path of hdf5 data
    bal_train_hdf5_path = os.path.join(data_dir, "bal_train.h5")
    unbal_train_hdf5_path = os.path.join(data_dir, "unbal_train.h5")
    test_hdf5_path = os.path.join(data_dir, "eval.h5")

    # Load data
    load_time = time.time()

    if mini_data:
        # Only load balanced data
        (bal_train_x, bal_train_y,
         bal_train_id_list) = utilities.load_data(bal_train_hdf5_path)

        train_x = bal_train_x
        train_y = bal_train_y
        train_id_list = bal_train_id_list

    else:
        # Load both balanced and unbalanced data
        (bal_train_x, bal_train_y,
         bal_train_id_list) = utilities.load_data(bal_train_hdf5_path)

        (unbal_train_x, unbal_train_y,
         unbal_train_id_list) = utilities.load_data(unbal_train_hdf5_path)

        train_x = np.concatenate((bal_train_x, unbal_train_x))
        train_y = np.concatenate((bal_train_y, unbal_train_y))
        train_id_list = bal_train_id_list + unbal_train_id_list

    # Test data
    (test_x, test_y, test_id_list) = utilities.load_data(test_hdf5_path)

    logging.info("Loading data time: {:.3f} s".format(time.time() - load_time))
    logging.info("Training data shape: {}".format(train_x.shape))

    # Optimization method
    optimizer = optim.Adam(model.parameters(),
                           lr=1e-3,
                           betas=(0.9, 0.999),
                           eps=1e-07)

    # Output directories
    sub_dir = os.path.join(filename, 'balance_type={}'.format(balance_type),
                           'model_type={}'.format(model_type))

    models_dir = os.path.join(workspace, "models", sub_dir)
    utilities.create_folder(models_dir)

    stats_dir = os.path.join(workspace, "stats", sub_dir)
    utilities.create_folder(stats_dir)

    probs_dir = os.path.join(workspace, "probs", sub_dir)
    utilities.create_folder(probs_dir)

    # Data generator
    if balance_type == 'no_balance':
        DataGenerator = data_generator.VanillaDataGenerator

    elif balance_type == 'balance_in_batch':
        DataGenerator = data_generator.BalancedDataGenerator

    else:
        raise Exception("Incorrect balance_type!")

    train_gen = DataGenerator(x=train_x,
                              y=train_y,
                              batch_size=batch_size,
                              shuffle=True,
                              seed=1234)

    iteration = 0
    call_freq = 1000
    train_time = time.time()

    for (batch_x, batch_y) in train_gen.generate():

        # Compute stats every several interations
        if iteration % call_freq == 0 and iteration > 1:

            logging.info("------------------")

            logging.info("Iteration: {}, train time: {:.3f} s".format(
                iteration,
                time.time() - train_time))

            logging.info("Balance train statistics:")
            evaluate(model=model,
                     input=bal_train_x,
                     target=bal_train_y,
                     stats_dir=os.path.join(stats_dir, 'bal_train'),
                     probs_dir=os.path.join(probs_dir, 'bal_train'),
                     iteration=iteration)

            logging.info("Test statistics:")
            evaluate(model=model,
                     input=test_x,
                     target=test_y,
                     stats_dir=os.path.join(stats_dir, "test"),
                     probs_dir=os.path.join(probs_dir, "test"),
                     iteration=iteration)

            train_time = time.time()

        (batch_x, batch_y) = utilities.transform_data(batch_x, batch_y)

        batch_x = move_data_to_gpu(batch_x, cuda)
        batch_y = move_data_to_gpu(batch_y, cuda)

        # Forward.
        model.train()
        output = model(batch_x)

        # Loss.
        loss = F.binary_cross_entropy(output, batch_y)

        # Backward.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        iteration += 1

        # Save model.
        if iteration % 5000 == 0:
            save_out_dict = {
                'iteration': iteration,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }
            save_out_path = os.path.join(models_dir,
                                         "md_{}_iters.tar".format(iteration))
            torch.save(save_out_dict, save_out_path)
            logging.info("Save model to {}".format(save_out_path))

        # Stop training when maximum iteration achieves
        if iteration == 20001:
            break