Beispiel #1
0
def check_get_hyper_param_dic():
    '''
	Retrieves hyper parameter information from either config file or checkpoint
	'''
    if not os.path.exists(FLAGS.checkpoint_dir):
        os.makedirs(FLAGS.checkpoint_dir)
    serializer = hyperparams.HyperParameterHandler(FLAGS.checkpoint_dir)
    hyper_params = read_config_file()
    if serializer.checkExists():
        if serializer.checkChanged(hyper_params):
            if not hyper_params["use_config_file_if_checkpoint_exists"]:
                hyper_params = serializer.getParams()
                print "Restoring hyper params from previous checkpoint..."
            else:
                new_checkpoint_dir = "{0}_hidden_size_{1}_numlayers_{2}_dropout_{3}".format(
                    int(time.time()), hyper_params["hidden_size"],
                    hyper_params["num_layers"], hyper_params["dropout"])
                new_checkpoint_dir = os.path.join(FLAGS.checkpoint_dir,
                                                  new_checkpoint_dir)
                os.makedirs(new_checkpoint_dir)
                FLAGS.checkpoint_dir = new_checkpoint_dir
                serializer = hyperparams.HyperParameterHandler(
                    FLAGS.checkpoint_dir)
                serializer.saveParams(hyper_params)
        else:
            print "No hyper parameter changed detected, using old checkpoint..."
    else:
        serializer.saveParams(hyper_params)
        print "No hyper params detected at checkpoint... reading config file"
    return hyper_params
Beispiel #2
0
def get_ckpt_path_params():
    '''
	Retrieves hyper parameter information from either config file or checkpoint
	'''
    if not os.path.exists(FLAGS.checkpoint_dir):
        os.makedirs(FLAGS.checkpoint_dir)
    serializer = hyperparams.HyperParameterHandler(FLAGS.checkpoint_dir)
    hyper_params = read_config_file()
    checkpoint_dir = "maxseqlen_{0}_hidden_size_{1}_numlayers_{2}_vocab_size_{3}".format(
        hyper_params["max_seq_length"], hyper_params["hidden_size"],
        hyper_params["num_layers"], hyper_params["dropout"])
    new_checkpoint_dir = os.path.join(FLAGS.checkpoint_dir, checkpoint_dir)
    if os.path.exists(new_checkpoint_dir):
        print("Existing checkpoint found, loading...")
    else:
        os.makedirs(new_checkpoint_dir)
        serializer = hyperparams.HyperParameterHandler(new_checkpoint_dir)
        serializer.save_params(hyper_params)
    return new_checkpoint_dir, hyper_params
Beispiel #3
0
def main():
    prog_params = parse_args()
    serializer = hyperparams.HyperParameterHandler(prog_params['config_file'])
    hyper_params = serializer.getHyperParams()
    audio_processor = audioprocessor.AudioProcessor(
        hyper_params["max_input_seq_length"],
        hyper_params["load_save_input_vec"])

    if prog_params['train'] is True:
        train_rnn(hyper_params, prog_params)
    else:
        process_file(audio_processor, hyper_params, prog_params['file'])
Beispiel #4
0
def main():
    prog_params = parse_args()
    serializer = hyperparams.HyperParameterHandler(prog_params['config_file'])
    hyper_params = serializer.getHyperParams()
    audio_processor = audioprocessor.AudioProcessor(
        hyper_params["max_input_seq_length"],
        hyper_params["signal_processing"])
    # Get the input dimension for the RNN, depend on the chosen signal processing mode
    hyper_params["input_dim"] = audio_processor.feature_size

    if prog_params['train'] is True:
        train_rnn(audio_processor, hyper_params, prog_params)
    elif prog_params['file'] is not None:
        process_file(audio_processor, hyper_params, prog_params['file'])
    elif prog_params['record'] is True:
        record_and_write(audio_processor, hyper_params)
Beispiel #5
0
def main():
    prog_params = parse_args()
    serializer = hyperparams.HyperParameterHandler(prog_params['config_file'])
    hyper_params = serializer.get_hyper_params()
    audio_processor = audioprocessor.AudioProcessor(
        hyper_params["max_input_seq_length"],
        hyper_params["signal_processing"])
    # Get the input dimension for the RNN, depend on the chosen signal processing mode
    hyper_params["input_dim"] = audio_processor.feature_size

    speech_reco = SpeechRecognizer(hyper_params["language"])
    hyper_params["char_map"] = speech_reco.get_char_map()
    hyper_params["char_map_length"] = speech_reco.get_char_map_length()

    if prog_params['start_ps'] is True:
        start_ps_server(prog_params)
    if (prog_params['train_acoustic'] is
            True) or (prog_params['dtrain_acoustic'] is True):
        if hyper_params["dataset_size_ordering"] in ['True', 'First_run_only']:
            ordered = True
        else:
            ordered = False
        train_set, test_set = speech_reco.load_acoustic_dataset(
            hyper_params["training_dataset_dirs"],
            hyper_params["test_dataset_dirs"],
            hyper_params["training_filelist_cache"], ordered,
            hyper_params["train_frac"])
        if prog_params['train_acoustic'] is True:
            train_acoustic_rnn(train_set, test_set, hyper_params, prog_params)
        else:
            distributed_train_acoustic_rnn(train_set, test_set, hyper_params,
                                           prog_params)
    elif prog_params['train_language'] is True:
        train_set, test_set = load_language_dataset(hyper_params)
        train_language_rnn(train_set, test_set, hyper_params, prog_params)
    elif prog_params['file'] is not None:
        process_file(audio_processor, hyper_params, prog_params['file'])
    elif prog_params['record'] is True:
        record_and_write(audio_processor, hyper_params)
    elif prog_params['evaluate'] is True:
        evaluate(hyper_params)
    elif prog_params['generate_text'] is True:
        generate_text(hyper_params)
Beispiel #6
0
def main():
    all_params,prog_params = parse_args()
    serializer = hyperparams.HyperParameterHandler(prog_params['config_file'],checkpoint_dir=prog_params['train_dir'],program_params=all_params)
    hyper_params = serializer.get_hyper_params()
    audio_processor = audioprocessor.AudioProcessor(hyper_params["max_input_seq_length"],
                                                    hyper_params["signal_processing"])
    # Get the input dimension for the RNN, depend on the chosen signal processing mode
    hyper_params["input_dim"] = audio_processor.feature_size

    speech_reco = SpeechRecognizer(hyper_params["language"])
    hyper_params["char_map"] = speech_reco.get_char_map()
    hyper_params["char_map_length"] = speech_reco.get_char_map_length()

    if prog_params['start_ps'] is True:
        start_ps_server(prog_params)
    if prog_params['save_acoustic'] is True:
        if hyper_params["dataset_size_ordering"] in ['True', 'First_run_only']:
            ordered = True
        else:
            ordered = False
        train_set, test_set = speech_reco.load_acoustic_dataset(hyper_params["training_dataset_dirs"],
                                                                hyper_params["test_dataset_dirs"],
                                                                hyper_params["training_filelist_cache"],
                                                                ordered,
                                                                hyper_params["train_frac"])
        logging.info("Save datasets...")
        save_acoustic_rnn(train_set,"train",hyper_params, prog_params)
        save_acoustic_rnn(test_set,"test",hyper_params, prog_params)
        kl = client.Client()
        kl.datasets.push(os.environ.get('WORKSPACE_NAME'),'librispeech-dev','1.0.'+os.environ.get('BUILD_ID')+'-tfrecords',prog_params["train_dir"],create=True)
    elif (prog_params['train_acoustic'] is True) or (prog_params['dtrain_acoustic'] is True):
        if hyper_params["dataset_size_ordering"] in ['True', 'First_run_only']:
            ordered = True
        else:
            ordered = False
        train_set = None
        test_set = None
        if prog_params['train_set'] is not None:
            train_set = prog_params['train_set']
            test_set = prog_params['test_set']
        else:
            train_set, test_set = speech_reco.load_acoustic_dataset(hyper_params["training_dataset_dirs"],
                                                                hyper_params["test_dataset_dirs"],
                                                                hyper_params["training_filelist_cache"],
                                                                ordered,
                                                                hyper_params["train_frac"])
        if prog_params['train_acoustic'] is True:
            train_acoustic_rnn(train_set, test_set, hyper_params, prog_params)
        else:
            distributed_train_acoustic_rnn(train_set, test_set, hyper_params, prog_params)
    elif prog_params['train_language'] is True:
        train_set, test_set = load_language_dataset(hyper_params)
        train_language_rnn(train_set, test_set, hyper_params, prog_params)
    elif prog_params['file'] is not None:
        process_file(audio_processor, hyper_params, prog_params['file'])
    elif prog_params['record'] is True:
        record_and_write(audio_processor, hyper_params)
    elif prog_params['evaluate'] is True:
        evaluate(hyper_params)
    elif prog_params['generate_text'] is True:
        generate_text(hyper_params)