Exemple #1
0
def generatepb(TIMESTAMP, CONF):
    sess = tf.InteractiveSession()
    # Create the model and load its weights.
    create_inference_graph(CONF['model_settings']['wanted_words'],
                           CONF['model_settings']['sample_rate'],
                           CONF['model_settings']['clip_duration_ms'],
                           CONF['audio_processor']['clip_stride_ms'],
                           CONF['model_settings']['window_size_ms'],
                           CONF['model_settings']['window_stride_ms'],
                           CONF['model_settings']['dct_coefficient_count'],
                           CONF['training_parameters']['model_architecture'])

    lastckpt = max(glob.glob(paths.get_timestamped_dir() + '/ckpts/*'),
                   key=os.path.getmtime)
    lastmodel = lastckpt.split(".meta", 1)[0]
    models.load_variables_from_checkpoint(sess, lastmodel)

    # Turn all the variables into inline constants inside the graph and save it.
    frozen_graph_def = graph_util.convert_variables_to_constants(
        sess, sess.graph_def, ['labels_softmax'])

    tf.train.write_graph(frozen_graph_def,
                         paths.get_checkpoints_dir(),
                         os.path.basename(
                             CONF['training_parameters']['output_file']),
                         as_text=False)
    tf.logging.info('Saved frozen graph to %s',
                    CONF['training_parameters']['output_file'])
    sess.close()
Exemple #2
0
def train_fn(TIMESTAMP, CONF):

    sess = tf.InteractiveSession()
    paths.timestamp = TIMESTAMP
    paths.CONF = CONF
    print(CONF)

    utils.create_dir_tree()
    #Activate only if you want to make a backup of the splits used for the training
    #utils.backup_splits()

    #  logging.set_verbosity(logging.INFO)
    logging.basicConfig(filename=paths.get_logs_dir() + '/train_info.log',
                        level=logging.DEBUG)

    # Begin by making sure we have the training data we need. If you already have
    # training data of your own, use `--data_url= ` on the command line to avoid
    # downloading.
    model_settings = models.prepare_model_settings(
        len(
            input_data.prepare_words_list(
                CONF["model_settings"]["wanted_words"].split(','))),
        CONF["model_settings"]["sample_rate"],
        CONF["model_settings"]["clip_duration_ms"],
        CONF["model_settings"]["window_size_ms"],
        CONF["model_settings"]["window_stride_ms"],
        CONF['model_settings']['feature_bin_count'])
    audio_processor = input_data.AudioProcessor(
        paths.get_audio_url(), paths.get_audio_dir(),
        CONF["audio_processor"]["silence_percentage"],
        CONF["audio_processor"]["unknown_percentage"],
        CONF["model_settings"]["wanted_words"].split(','),
        CONF["training_parameters"]["validation_percentage"],
        CONF["training_parameters"]["testing_percentage"], model_settings)
    fingerprint_size = model_settings['fingerprint_size']
    label_count = model_settings['label_count']
    time_shift_samples = int((CONF["audio_processor"]["time_shift_ms"] *
                              CONF["audio_processor"]["sample_rate"]) / 1000)
    # Figure out the learning rates for each training phase. Since it's often
    # effective to have high learning rates at the start of training, followed by
    # lower levels towards the end, the number of steps and learning rates can be
    # specified as comma-separated lists to define the rate at each stage. For
    # example --how_many_training_steps=10000,3000 --learning_rate=0.001,0.0001
    # will run 13,000 training loops in total, with a rate of 0.001 for the first
    # 10,000, and 0.0001 for the final 3,000.
    training_steps_list = list(
        map(int,
            CONF['training_parameters']['how_many_training_steps'].split(',')))
    learning_rates_list = list(
        map(float, CONF['training_parameters']['learning_rate'].split(',')))
    if len(training_steps_list) != len(learning_rates_list):
        raise Exception(
            '--how_many_training_steps and --learning_rate must be equal length '
            'lists, but are %d and %d long instead' %
            (len(training_steps_list), len(learning_rates_list)))

    fingerprint_input = tf.placeholder(tf.float32, [None, fingerprint_size],
                                       name='fingerprint_input')

    logits, dropout_prob = models.create_model(
        fingerprint_input,
        model_settings,
        CONF['training_parameters']['model_architecture'],
        is_training=True)

    # Define loss and optimizer
    ground_truth_input = tf.placeholder(tf.int64, [None],
                                        name='groundtruth_input')

    # Optionally we can add runtime checks to spot when NaNs or other symptoms of
    # numerical errors start occurring during training.
    control_dependencies = []
    if CONF['training_parameters']['check_nans']:
        checks = tf.add_check_numerics_ops()
        control_dependencies = [checks]

    # Create the back propagation and training evaluation machinery in the graph.
    with tf.name_scope('cross_entropy'):
        cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(
            labels=ground_truth_input, logits=logits)
    tf.summary.scalar('cross_entropy', cross_entropy_mean)
    with tf.name_scope('train'), tf.control_dependencies(control_dependencies):
        learning_rate_input = tf.placeholder(tf.float32, [],
                                             name='learning_rate_input')
        train_step = tf.train.GradientDescentOptimizer(
            learning_rate_input).minimize(cross_entropy_mean)
    predicted_indices = tf.argmax(logits, 1)
    correct_prediction = tf.equal(predicted_indices, ground_truth_input)
    confusion_matrix = tf.confusion_matrix(ground_truth_input,
                                           predicted_indices,
                                           num_classes=label_count)
    evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.scalar('accuracy', evaluation_step)

    global_step = tf.train.get_or_create_global_step()
    increment_global_step = tf.assign(global_step, global_step + 1)

    saver = tf.train.Saver(tf.global_variables())

    # Merge all the summaries and write them out to /tmp/retrain_logs (by default)
    merged_summaries = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(
        CONF['training_parameters']['summaries_dir'] + '/train', sess.graph)
    validation_writer = tf.summary.FileWriter(
        CONF['training_parameters']['summaries_dir'] + '/validation')

    tf.global_variables_initializer().run()

    start_step = 1

    if CONF['training_parameters']['start_checkpoint']:
        models.load_variables_from_checkpoint(
            sess, CONF['training_parameters']['start_checkpoint'])
        start_step = global_step.eval(session=sess)

    logging.info('Training from step: %d ', start_step)

    # Save graph.pbtxt.
    tf.train.write_graph(
        sess.graph_def, paths.get_checkpoints_dir(),
        CONF['training_parameters']['model_architecture'] + '.pbtxt')

    # Save list of words.

    with gfile.GFile(
            os.path.join(
                paths.get_checkpoints_dir(),
                CONF['training_parameters']['model_architecture'] +
                '_labels.txt'), 'w') as f:

        f.write('\n'.join(audio_processor.words_list))

    # Training loop.
    training_steps_max = np.sum(training_steps_list)
    for training_step in xrange(start_step, training_steps_max + 1):
        # Figure out what the current learning rate is.
        training_steps_sum = 0
        for i in range(len(training_steps_list)):
            training_steps_sum += training_steps_list[i]
            if training_step <= training_steps_sum:
                learning_rate_value = learning_rates_list[i]
                break
        # Pull the audio samples we'll use for training.
        train_fingerprints, train_ground_truth = audio_processor.get_data(
            CONF['training_parameters']['batch_size'], 0, model_settings,
            CONF['training_parameters']['background_frequency'],
            CONF['training_parameters']['background_volume'],
            time_shift_samples, 'training', sess)
        # Run the graph with this batch of training data.
        train_summary, train_accuracy, cross_entropy_value, _, _ = sess.run(
            [
                merged_summaries, evaluation_step, cross_entropy_mean,
                train_step, increment_global_step
            ],
            feed_dict={
                fingerprint_input: train_fingerprints,
                ground_truth_input: train_ground_truth,
                learning_rate_input: learning_rate_value,
                dropout_prob: 0.5
            })
        train_writer.add_summary(train_summary, training_step)
        logging.info('Step #%d: rate %f, accuracy %.1f%%, cross entropy %f' %
                     (training_step, learning_rate_value, train_accuracy * 100,
                      cross_entropy_value))
        is_last_step = (training_step == training_steps_max)
        if (training_step % CONF['training_parameters']['eval_step_interval']
            ) == 0 or is_last_step:
            set_size = audio_processor.set_size('validation')
            total_accuracy = 0
            total_conf_matrix = None
            for i in xrange(0, set_size,
                            CONF['training_parameters']['batch_size']):
                validation_fingerprints, validation_ground_truth = (
                    audio_processor.get_data(
                        CONF['training_parameters']['batch_size'], i,
                        model_settings, 0.0, 0.0, 0, 'validation', sess))
                # Run a validation step and capture training summaries for TensorBoard
                # with the `merged` op.
                validation_summary, validation_accuracy, conf_matrix = sess.run(
                    [merged_summaries, evaluation_step, confusion_matrix],
                    feed_dict={
                        fingerprint_input: validation_fingerprints,
                        ground_truth_input: validation_ground_truth,
                        dropout_prob: 1.0
                    })
                validation_writer.add_summary(validation_summary,
                                              training_step)
                batch_size = min(CONF['training_parameters']['batch_size'],
                                 set_size - i)
                total_accuracy += (validation_accuracy * batch_size) / set_size
                if total_conf_matrix is None:
                    total_conf_matrix = conf_matrix
                else:
                    total_conf_matrix += conf_matrix
            logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
            logging.info('Step %d: Validation accuracy = %.1f%% (N=%d)' %
                         (training_step, total_accuracy * 100, set_size))

        # Save the model checkpoint periodically.
        if (training_step % CONF['training_parameters']['save_step_interval']
                == 0 or training_step == training_steps_max):
            checkpoints_path = os.path.join(
                paths.get_checkpoints_dir(),
                CONF['training_parameters']['model_architecture'] + '.ckpt')
            logging.info('Saving to "%s-%d"', checkpoints_path, training_step)
            saver.save(sess, checkpoints_path, global_step=training_step)

    set_size = audio_processor.set_size('testing')
    logging.info('set_size=%d', set_size)
    total_accuracy = 0
    total_conf_matrix = None
    for i in xrange(0, set_size, CONF['training_parameters']['batch_size']):
        test_fingerprints, test_ground_truth = audio_processor.get_data(
            CONF['training_parameters']['batch_size'], i, model_settings, 0.0,
            0.0, 0, 'testing', sess)
        test_accuracy, conf_matrix = sess.run(
            [evaluation_step, confusion_matrix],
            feed_dict={
                fingerprint_input: test_fingerprints,
                ground_truth_input: test_ground_truth,
                dropout_prob: 1.0
            })
        batch_size = min(CONF['training_parameters']['batch_size'],
                         set_size - i)
        total_accuracy += (test_accuracy * batch_size) / set_size
        if total_conf_matrix is None:
            total_conf_matrix = conf_matrix
        else:
            total_conf_matrix += conf_matrix
    logging.info('Confusion Matrix:\n %s' % (total_conf_matrix))
    logging.info('Final test accuracy = %.1f%% (N=%d)' %
                 (total_accuracy * 100, set_size))
    print('Saving data to {} folder.'.format(paths.get_timestamped_dir()))

    print('Saving the configuration ...')
    model_utils.save_conf(CONF)
    tf.reset_default_graph()
    freeze.generatepb(TIMESTAMP=timestamp, CONF=CONF)
def get_callbacks(CONF, use_lr_decay=True):
    """
    Get a callback list to feed fit_generator.
    #TODO Use_remote callback needs proper configuration
    #TODO Add ReduceLROnPlateau callback?

    Parameters
    ----------
    CONF: dict

    Returns
    -------
    List of callbacks
    """

    calls = []

    # Add mandatory callbacks
    calls.append(callbacks.TerminateOnNaN())
    calls.append(LRHistory())

    # Add optional callbacks
    if use_lr_decay:
        milestones = np.array(
            CONF['training']['lr_step_schedule']) * CONF['training']['epochs']
        milestones = milestones.astype(np.int)
        calls.append(
            LR_scheduler(lr_decay=CONF['training']['lr_step_decay'],
                         epoch_milestones=milestones.tolist()))

    if CONF['monitor']['use_tensorboard']:
        calls.append(
            callbacks.TensorBoard(log_dir=paths.get_logs_dir(),
                                  write_graph=False))

        # # Let the user launch Tensorboard
        # print('Monitor your training in Tensorboard by executing the following comand on your console:')
        # print('    tensorboard --logdir={}'.format(paths.get_logs_dir()))
        # Run Tensorboard  on a separate Thread/Process on behalf of the user
        port = os.getenv('monitorPORT', 6006)
        port = int(port) if len(str(port)) >= 4 else 6006
        subprocess.run(['fuser', '-k', '{}/tcp'.format(port)
                        ])  # kill any previous process in that port
        p = Process(target=launch_tensorboard, args=(port, ), daemon=True)
        p.start()

    if CONF['monitor']['use_remote']:
        calls.append(callbacks.RemoteMonitor())

    if CONF['training']['use_validation'] and CONF['training'][
            'use_early_stopping']:
        calls.append(
            callbacks.EarlyStopping(patience=int(0.1 *
                                                 CONF['training']['epochs'])))

    if CONF['training']['ckpt_freq'] is not None:
        calls.append(
            callbacks.ModelCheckpoint(os.path.join(paths.get_checkpoints_dir(),
                                                   'epoch-{epoch:02d}.hdf5'),
                                      verbose=1,
                                      period=max(
                                          1,
                                          int(CONF['training']['ckpt_freq'] *
                                              CONF['training']['epochs']))))

    if not calls:
        calls = None

    return calls
Exemple #4
0
def load_inference_model():
    """
    Load a model for prediction.

    If several timestamps are available in `./models` it will load `.models/api` or the last timestamp if `api` is not
    available.
    If several checkpoints are available in `./models/[timestamp]/ckpts` it will load
    `.models/[timestamp]/ckpts/final_model.h5` or the last checkpoint if `final_model.h5` is not available.
    """
    global loaded, conf, MODEL_NAME, LABELS_FILE

    # Set the timestamp
    timestamps = next(os.walk(paths.get_models_dir()))[1]
    if not timestamps:
        raise Exception(
            "You have no models in your `./models` folder to be used for inference. "
            "This module does not come with a pretrained model so you have to train a model to use it for prediction."
        )
    else:
        if 'api' in timestamps:
            TIMESTAMP = 'api'
        else:
            TIMESTAMP = sorted(timestamps)[-1]
        paths.timestamp = TIMESTAMP
        print('Using TIMESTAMP={}'.format(TIMESTAMP))

        # Set the checkpoint model to use to make the prediction
        ckpts = os.listdir(paths.get_checkpoints_dir())
        if not ckpts:
            raise Exception(
                "You have no checkpoints in your `./models/{}/ckpts` folder to be used for inference. "
                .format(TIMESTAMP) +
                "Therefore the API can only be used for training.")
        else:
            if 'model.pb' in ckpts:
                MODEL_NAME = 'model.pb'
            else:
                MODEL_NAME = sorted(
                    [name for name in ckpts if name.endswith('*.pb')])[-1]
            print('Using MODEL_NAME={}'.format(MODEL_NAME))

            if 'conv_labels.txt' in ckpts:
                LABELS_FILE = 'conv_labels.txt'
            else:
                LABELS_FILE = sorted(
                    [name for name in ckpts if name.endswith('*.txt')])[-1]
            print('Using LABELS_FILE={}'.format(LABELS_FILE))

            # Clear the previous loaded model
            K.clear_session()

            # Load the class names and info
            ckpts_dir = paths.get_checkpoints_dir()
            MODEL_NAME = os.path.join(ckpts_dir, MODEL_NAME)
            LABELS_FILE = os.path.join(ckpts_dir, LABELS_FILE)

            # Load training configuration
            conf_path = os.path.join(paths.get_conf_dir(), 'conf.json')
            with open(conf_path) as f:
                conf = json.load(f)

    # Set the model as loaded
    loaded = True