예제 #1
0
def list_audio_channels(ctx, channels):
    ctx.log(line())
    ctx.log('{0:2} {1:8} {2:30}'.format(' #', 'Channels', 'Name'))
    ctx.log(line())
    for i, chn in enumerate(channels):
        ctx.log('{0:2} {1:8} {2:30}'.format(
            i,
            chn.channels,
            chn.name[:30]))
예제 #2
0
def list_models(ctx):
    ctx.log(line())
    ctx.log('{0:2} {1:30} {2:10}'.format(' #', 'Name', 'Status'))
    ctx.log(line())
    model_dir = os.path.join(os.getcwd(), MODELS_FOLDER)
    for i, entry in enumerate(os.scandir(model_dir)):
        if entry.is_file():
            try:
                check_valid_model(entry.name.split('.')[0])
            except FileNotFoundError:
                status = click.style('✘', fg='red')
            else:
                status = click.style('✓', fg='green')
            ctx.log('{0:2} {1:30} {2:10}'.format(i, entry.name[:30], status))
예제 #3
0
def list_voices(ctx):
    ctx.log(line())
    ctx.log('{0:2} {1:30} {2:10}'.format(' #', 'Name', 'Status'))
    ctx.log(line())
    voice_dir = os.path.join(os.getcwd(), GENERATED_FOLDER)
    for i, entry in enumerate(os.scandir(voice_dir)):
        if entry.is_dir():
            try:
                check_valid_voice(entry.name)
            except FileNotFoundError:
                status = click.style('✘', fg='red')
            else:
                status = click.style('✓', fg='green')
            ctx.log('{0:2} {1:30} {2:10}'.format(i, entry.name[:30], status))
예제 #4
0
def train_sequence_model(ctx, primary_voice, secondary_voice, name, **kwargs):
    """Train a LSTM neural network on sequence data from a performance"""
    model_name = '{}.h5'.format(name)
    model_path = os.path.join(os.getcwd(), MODELS_FOLDER, model_name)
    resume = False

    if os.path.isfile(model_path):
        click.confirm(
            'Found model with same name! Do you want to resume training?',
            abort=True)
        resume = True

    # Parameters and Hyperparameters
    num_classes = kwargs.get('num_classes')
    batch_size = kwargs.get('batch_size')
    data_split = kwargs.get('data_split')
    seq_len = kwargs.get('seq_len')
    dropout = kwargs.get('dropout')
    epochs = kwargs.get('epochs')
    num_layers = kwargs.get('num_layers')
    num_units = kwargs.get('num_units')

    ctx.log('\nParameters:')
    ctx.log(line(length=32))
    ctx.log('name:\t\t{}'.format(name))
    ctx.log('num_classes:\t{}'.format(num_classes))
    ctx.log('batch_size:\t{}'.format(batch_size))
    ctx.log('data_split:\t{}'.format(data_split))
    ctx.log('seq_len:\t{}'.format(seq_len))
    ctx.log('epochs:\t\t{}'.format(epochs))
    if not resume:
        ctx.log('num_layers:\t{}'.format(num_layers))
        ctx.log('num_units:\t{}'.format(num_units))
    ctx.log(line(length=32))
    ctx.log('')

    primary_voice = Voice(primary_voice)
    secondary_voice = Voice(secondary_voice)

    # Generate training data from voice sequences
    ctx.log(click.style('1. Generate training data from voices', bold=True))
    ctx.log('Primary voice: "{}"'.format(primary_voice.name))
    ctx.log('Secondary voice: "{}"'.format(secondary_voice.name))

    data = generate_sequence(ctx,
                             primary_voice,
                             secondary_voice,
                             save_sequence=kwargs.get('save_sequence'))

    ctx.log('')

    # Encode data before training
    ctx.log(click.style('2. Encode data before training', bold=True))
    encoded_data, kmeans = k_means_encode_data(data, num_classes)
    ctx.log('Number of classes: {}\n'.format(num_classes))

    # Split in 3 sets for training, validation and testing
    ctx.log(click.style('3. Split data in sets', bold=True))
    validation_steps = round((data_split / 2) * len(data))

    train_max = len(data) - (validation_steps * 2)
    val_min = train_max + 1
    val_max = train_max + validation_steps + 1
    test_min = train_max + validation_steps + 2
    test_max = len(data) - 1

    training_steps = test_max - test_min

    train_gen = generator(encoded_data,
                          seq_len=seq_len,
                          batch_size=batch_size,
                          min_index=0,
                          max_index=train_max)

    val_gen = generator(encoded_data,
                        seq_len=seq_len,
                        batch_size=batch_size,
                        min_index=val_min,
                        max_index=val_max)

    test_gen = generator(encoded_data,
                         seq_len=seq_len,
                         batch_size=batch_size,
                         min_index=test_min,
                         max_index=test_max)

    steps_per_epoch = train_max // batch_size

    ctx.log('Batch size: {}'.format(batch_size))
    ctx.log('Steps per epoch: {}'.format(steps_per_epoch))
    ctx.log('Split for validation & test @ {0:.2f}%'.format(data_split * 100))
    ctx.log('Training set: {}-{}'.format(0, train_max))
    ctx.log('Validation set: {}-{}'.format(val_min, val_max))
    ctx.log('Test set: {}-{}\n'.format(test_min, test_max))

    # Define model
    ctx.log(click.style('4. Define a model', bold=True))
    if resume:
        ctx.log('Load existing model to resume training ..')
        try:
            model = load_model(model_path)
        except ValueError as err:
            ctx.elog('Could not load model: {}'.format(err))
            sys.exit(1)
    else:
        model = Sequential()
        model.add(
            layers.Embedding(input_dim=num_classes,
                             output_dim=num_units,
                             input_length=seq_len))
        for n in range(num_layers - 1):
            model.add(layers.LSTM(num_units, return_sequences=True))
            if dropout > 0.0:
                model.add(layers.Dropout(dropout))
        model.add(layers.LSTM(num_units))
        if dropout > 0.0:
            model.add(layers.Dropout(dropout))
        model.add(layers.Dense(num_classes, activation='softmax'))

        model.compile(loss='sparse_categorical_crossentropy',
                      optimizer='adam',
                      metrics=['acc'])

        model.summary()

    ctx.log('')

    # Training!
    ctx.log(click.style('5. Training!', bold=True))
    model.fit_generator(train_gen,
                        steps_per_epoch=steps_per_epoch,
                        epochs=epochs,
                        validation_data=val_gen,
                        validation_steps=validation_steps)
    ctx.log('Finished training.\n')

    # Evaluate training
    ctx.log(click.style('6. Evaluation', bold=True))
    scores = []
    max_dist = .25

    for i in range((training_steps // batch_size)):
        # Predict point from model
        samples, targets = next(test_gen)
        results = model.predict(samples)

        for j, result in enumerate(results):
            # Decode data
            result_value = np.argmax(result)
            position = k_means_decode_data([[result_value]], kmeans).flatten()
            position_target = k_means_decode_data([[targets[j]]], kmeans)
            position_target = position_target.flatten()

            # Calculate distance between prediction and actual test target
            dist = max_dist - min(max_dist,
                                  np.linalg.norm(position - position_target))
            scores.append(0.0 if dist == 0.0 else dist / max_dist)

    score = np.average(scores)

    ctx.log('Score: {0:.2f}%\n'.format(score * 100))

    # Save model
    ctx.log(click.style('7. Store model weights', bold=True))
    ctx.log('Stored weights at "{}"'.format(model_path))
    model.save(model_path)
    ctx.log('Done!')
예제 #5
0
def train_sequence_model(ctx, primary_voice, secondary_voice, name, **kwargs):
    """Train a LSTM neural network on sequence data from a performance"""

    # Prepare voices
    primary_voice = Voice(primary_voice)
    secondary_voice = Voice(secondary_voice)

    if primary_voice.version < 2 or secondary_voice.version < 2:
        ctx.elog('Given voices were generated with an too old version.')

    sr = primary_voice.meta['samplerate']

    if sr != secondary_voice.meta['samplerate']:
        ctx.elog('Voices need same samplerates for correct training.')

    # Prepare model
    model_name = '{}.h5'.format(name)
    model_path = os.path.join(os.getcwd(), MODELS_FOLDER, model_name)
    resume = False

    if os.path.isfile(model_path):
        click.confirm(
            'Found model with same name! Do you want to resume training?',
            abort=True)
        resume = True

    # Parameters and Hyperparameters
    use_dynamics = kwargs.get('dynamics')
    use_durations = kwargs.get('durations')

    num_sound_classes = kwargs.get('num_classes')
    batch_size = kwargs.get('batch_size')
    data_split = kwargs.get('data_split')
    seq_len = kwargs.get('seq_len')
    dropout = kwargs.get('dropout')
    epochs = kwargs.get('epochs')
    num_layers = kwargs.get('num_layers')
    num_units = kwargs.get('num_units')

    # Calculate number of total classes
    num_classes = get_num_classes(num_sound_classes, use_dynamics,
                                  use_durations)

    ctx.log('\nParameters:')
    ctx.log(line(length=32))
    ctx.log('name:\t\t{}'.format(name))
    ctx.log('num_classes:\t{}'.format(num_classes))
    ctx.log('batch_size:\t{}'.format(batch_size))
    ctx.log('data_split:\t{}'.format(data_split))
    ctx.log('seq_len:\t{}'.format(seq_len))
    ctx.log('epochs:\t\t{}'.format(epochs))
    ctx.log('dropout:\t{}'.format(dropout))
    if not resume:
        ctx.log('num_layers:\t{}'.format(num_layers))
        ctx.log('num_units:\t{}'.format(num_units))
    ctx.log(line(length=32))
    ctx.log('')

    # Generate training data from voice sequences
    ctx.log(click.style('1. Generate training data from voices', bold=True))
    ctx.log('Primary voice: "{}"'.format(primary_voice.name))
    ctx.log('Secondary voice: "{}"'.format(secondary_voice.name))

    data = generate_sequence(ctx,
                             primary_voice,
                             secondary_voice,
                             save_sequence=kwargs.get('save_sequence'))

    ctx.log('')

    # Encode data before training
    ctx.log(click.style('2. Encode data before training', bold=True))
    encoded_data = encode_data(data, num_sound_classes, use_dynamics,
                               use_durations, sr)

    ctx.log('Number of classes: {}\n'.format(num_classes))

    # Split in 3 sets for training, validation and testing
    ctx.log(click.style('3. Split data in sets', bold=True))
    validation_steps = round((data_split / 2) * len(data))

    train_max = len(data) - (validation_steps * 2)
    val_min = train_max + 1
    val_max = train_max + validation_steps + 1
    test_min = train_max + validation_steps + 2
    test_max = len(data) - 1

    training_steps = test_max - test_min

    train_gen = generator(encoded_data,
                          seq_len=seq_len,
                          batch_size=batch_size,
                          min_index=0,
                          max_index=train_max)

    val_gen = generator(encoded_data,
                        seq_len=seq_len,
                        batch_size=batch_size,
                        min_index=val_min,
                        max_index=val_max)

    test_gen = generator(encoded_data,
                         seq_len=seq_len,
                         batch_size=batch_size,
                         min_index=test_min,
                         max_index=test_max)

    steps_per_epoch = train_max // batch_size

    ctx.log('Batch size: {}'.format(batch_size))
    ctx.log('Steps per epoch: {}'.format(steps_per_epoch))
    ctx.log('Split for validation & test @ {0:.2f}%'.format(data_split * 100))
    ctx.log('Training set: {}-{}'.format(0, train_max))
    ctx.log('Validation set: {}-{}'.format(val_min, val_max))
    ctx.log('Test set: {}-{}\n'.format(test_min, test_max))

    # Define model
    ctx.log(click.style('4. Define a model', bold=True))
    if resume:
        ctx.log('Load existing model to resume training ..')
        try:
            model = load_model(model_path)
        except ValueError as err:
            ctx.elog('Could not load model: {}'.format(err))
            sys.exit(1)

        num_model_classes = model.layers[-1].output_shape[1]
        if num_model_classes != num_classes:
            ctx.elog('The given model was trained with a different '
                     'amount of classes: given {}, but '
                     'should be {}.'.format(num_classes, num_model_classes))
    else:
        model = Sequential()
        model.add(
            layers.Embedding(input_dim=num_classes,
                             output_dim=num_units,
                             input_length=seq_len))
        for n in range(num_layers - 1):
            model.add(layers.LSTM(num_units, return_sequences=True))
            if dropout > 0.0:
                model.add(layers.Dropout(dropout))
        model.add(layers.LSTM(num_units))
        if dropout > 0.0:
            model.add(layers.Dropout(dropout))
        model.add(layers.Dense(num_classes, activation='softmax'))

        model.compile(loss='sparse_categorical_crossentropy',
                      optimizer='adam',
                      metrics=['acc'])

        model.summary()

    ctx.log('')

    # Training!
    ctx.log(click.style('5. Training!', bold=True))
    if validation_steps == 0:
        model.fit_generator(train_gen,
                            steps_per_epoch=steps_per_epoch,
                            epochs=epochs)
    else:
        model.fit_generator(train_gen,
                            steps_per_epoch=steps_per_epoch,
                            epochs=epochs,
                            validation_data=val_gen,
                            validation_steps=validation_steps)
    ctx.log('Finished training.\n')

    # Evaluate training
    ctx.log(click.style('6. Evaluation', bold=True))
    score = 0
    total = 0

    for i in range((training_steps // batch_size)):
        # Predict point from model
        samples, targets = next(test_gen)
        results = model.predict(samples)

        for j, result in enumerate(results):
            result_class = np.argmax(result)
            target_class = targets[j]

            if result_class == target_class:
                score += 1
            total += 1

    ratio = score / total

    ctx.log('Score: {0:.2f}%\n'.format(ratio * 100))

    # Save model
    ctx.log(click.style('7. Store model weights', bold=True))
    ctx.log('Stored weights at "{}"'.format(model_path))
    model.save(model_path)
    ctx.log('Done!')