def do_single_file_inference(input_file_path): with tfv1.Session(config=Config.session_config) as session: inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1) # Create a saver using variables from the above newly created graph saver = tfv1.train.Saver() # Restore variables from training checkpoint # TODO: This restores the most recent checkpoint, but if we use validation to counteract # over-fitting, we may want to restore an earlier checkpoint. checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if not checkpoint: log_error( 'Checkpoint directory ({}) does not contain a valid checkpoint state.' .format(FLAGS.checkpoint_dir)) exit(1) checkpoint_path = checkpoint.model_checkpoint_path saver.restore(session, checkpoint_path) features, features_len = audiofile_to_features(input_file_path) previous_state_c = np.zeros([1, Config.n_cell_dim]) previous_state_h = np.zeros([1, Config.n_cell_dim]) # Add batch dimension features = tf.expand_dims(features, 0) features_len = tf.expand_dims(features_len, 0) # Evaluate features = create_overlapping_windows(features).eval(session=session) features_len = features_len.eval(session=session) logits = outputs['outputs'].eval(feed_dict={ inputs['input']: features, inputs['input_lengths']: features_len, inputs['previous_state_c']: previous_state_c, inputs['previous_state_h']: previous_state_h, }, session=session) logits = np.squeeze(logits) if FLAGS.lm_binary_path: scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path, FLAGS.lm_trie_path, Config.alphabet) else: scorer = None decoded = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width, scorer=scorer, cutoff_prob=FLAGS.cutoff_prob, cutoff_top_n=FLAGS.cutoff_top_n) # Print highest probability result print(decoded[0][1])
def do_single_file_inference(input_file_path): with tf.Session(config=Config.session_config) as session: inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1) # Create a saver using variables from the above newly created graph mapping = { v.op.name: v for v in tf.global_variables() if not v.op.name.startswith('previous_state_') } saver = tf.train.Saver(mapping) # Restore variables from training checkpoint # TODO: This restores the most recent checkpoint, but if we use validation to counteract # over-fitting, we may want to restore an earlier checkpoint. checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if not checkpoint: log_error( 'Checkpoint directory ({}) does not contain a valid checkpoint state.' .format(FLAGS.checkpoint_dir)) exit(1) checkpoint_path = checkpoint.model_checkpoint_path saver.restore(session, checkpoint_path) session.run(outputs['initialize_state']) features, features_len = audiofile_to_features(input_file_path) # Add batch dimension features = tf.expand_dims(features, 0) features_len = tf.expand_dims(features_len, 0) # Evaluate features = create_overlapping_windows(features).eval(session=session) features_len = features_len.eval(session=session) logits = outputs['outputs'].eval(feed_dict={ inputs['input']: features, inputs['input_lengths']: features_len, }, session=session) logits = np.squeeze(logits) scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path, FLAGS.lm_trie_path, Config.alphabet) decoded = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width, scorer=scorer) # Print highest probability result print(decoded[0][1])
def do_single_file_inference(input_file_path): with tfv1.Session(config=Config.session_config) as session: inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1) # Restore variables from training checkpoint if FLAGS.load == 'auto': method_order = ['best', 'last'] else: method_order = [FLAGS.load] load_or_init_graph(session, method_order) features, features_len = audiofile_to_features(input_file_path) previous_state_c = np.zeros([1, Config.n_cell_dim]) previous_state_h = np.zeros([1, Config.n_cell_dim]) # Add batch dimension features = tf.expand_dims(features, 0) features_len = tf.expand_dims(features_len, 0) # Evaluate features = create_overlapping_windows(features).eval(session=session) features_len = features_len.eval(session=session) logits = outputs['outputs'].eval(feed_dict={ inputs['input']: features, inputs['input_lengths']: features_len, inputs['previous_state_c']: previous_state_c, inputs['previous_state_h']: previous_state_h, }, session=session) logits = np.squeeze(logits) if FLAGS.scorer_path: scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.scorer_path, Config.alphabet) else: scorer = None decoded = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width, scorer=scorer, cutoff_prob=FLAGS.cutoff_prob, cutoff_top_n=FLAGS.cutoff_top_n) # Print highest probability result print(decoded[0][1])
def do_single_file_inference(input_file_path): with tfv1.Session(config=Config.session_config) as session: inputs, outputs, _ = create_inference_graph(batch_size=1, n_steps=-1) # Create a saver using variables from the above newly created graph saver = tfv1.train.Saver() # Restore variables from training checkpoint loaded = False if not loaded and FLAGS.load in ['auto', 'last']: loaded = try_loading(session, saver, 'checkpoint', 'most recent', load_step=False) if not loaded and FLAGS.load in ['auto', 'best']: loaded = try_loading(session, saver, 'best_dev_checkpoint', 'best validation', load_step=False) if not loaded: print('Could not load checkpoint from {}'.format(FLAGS.checkpoint_dir)) sys.exit(1) features, features_len = audiofile_to_features(input_file_path) previous_state_c = np.zeros([1, Config.n_cell_dim]) previous_state_h = np.zeros([1, Config.n_cell_dim]) # Add batch dimension features = tf.expand_dims(features, 0) features_len = tf.expand_dims(features_len, 0) # Evaluate features = create_overlapping_windows(features).eval(session=session) features_len = features_len.eval(session=session) logits = outputs['outputs'].eval(feed_dict={ inputs['input']: features, inputs['input_lengths']: features_len, inputs['previous_state_c']: previous_state_c, inputs['previous_state_h']: previous_state_h, }, session=session) logits = np.squeeze(logits) if FLAGS.lm_binary_path: scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path, FLAGS.lm_trie_path, Config.alphabet) else: scorer = None decoded = ctc_beam_search_decoder(logits, Config.alphabet, FLAGS.beam_width, scorer=scorer, cutoff_prob=FLAGS.cutoff_prob, cutoff_top_n=FLAGS.cutoff_top_n) # Print highest probability result print(decoded[0][1])
def activations_common_voice_pertubed_sets(input_dir, output_dir, test_only=False, prune_percentage=0, scores_file=None, random=False, verbose=True, randomly_initialized=False): '''Obtains activations for wavs in input_dir and saves them to output_dir''' inputs, outputs, layers = create_inference_graph(batch_size=1, n_steps=-1) intermediate_layer_names = [ 'layer_1', 'layer_2', 'layer_3', 'rnn_output', 'layer_4', 'layer_5' ] intermediate_layers = [ l for n, l in layers.items() if n in intermediate_layer_names ] pertubed_sets = json.load(open('data/pertubed_input_sets_balanced.json')) skip_sets = [] if test_only: skip_sets = json.load(open('./results/set_ids_used.json')) if not prune_percentage: base_path = '{}/activations'.format(output_dir) else: base_path = '{}/activations/pruned-{}'.format(output_dir, prune_percentage * 100) if random: base_path += '-random' with tfv1.Session(config=Config.session_config) as session: # Create a saver using variables from the above newly created graph if not randomly_initialized: saver = tfv1.train.Saver() # Restore variables from training checkpoint loaded = False if not loaded and FLAGS.load in ['auto', 'last']: loaded = try_loading(session, saver, 'checkpoint', 'most recent', load_step=False) if not loaded and FLAGS.load in ['auto', 'best']: loaded = try_loading(session, saver, 'best_dev_checkpoint', 'best validation', load_step=False) if not loaded: print('Could not load checkpoint from {}'.format( FLAGS.checkpoint_dir)) sys.exit(1) else: initializer = tfv1.global_variables_initializer() session.run(initializer) ###### PRUNING PART ###### if verbose: if not prune_percentage: print('No pruning done.') else: if verbose: print('-' * 80) if verbose: print('pruning with {}%...'.format(prune_percentage)) scores_per_layer = np.load(scores_file) layer_masks = prune_matrices(scores_per_layer, prune_percentage=prune_percentage, random=random, verbose=verbose, skip_lstm=False) n_layers_to_prune = len(layer_masks) i = 0 for index, v in enumerate(tf.trainable_variables()): lstm_layer_name = 'cudnn_lstm/rnn/multi_rnn_cell/cell_0/cudnn_compatible_lstm_cell/kernel:0' if 'weights' not in v.name and v.name != lstm_layer_name: continue if (i >= n_layers_to_prune): break # if i < total_ops, it is not yet the last layer # make mask into the shape of the weights if v.name == lstm_layer_name: if skip_lstm: continue # Shape of LSTM weights: [(2*neurons), (4*neurons)] cell_template = np.ones((2, 4)) mask = np.repeat(layer_masks[i], v.shape[0] // 2, axis=0) mask = mask.reshape( [layer_masks[i].shape[0], v.shape[0] // 2]) mask = np.swapaxes(mask, 0, 1) mask = np.kron(mask, cell_template) else: idx = layer_masks[i] == 1 mask = np.repeat(layer_masks[i], v.shape[0], axis=0) mask = mask.reshape([layer_masks[i].shape[0], v.shape[0]]) mask = np.swapaxes(mask, 0, 1) # apply mask to weights session.run(v.assign(tf.multiply(v, mask))) i += 1 ###### END PRUNING PART ###### # Default states for LSTM cell previous_state_c = np.zeros([1, Config.n_cell_dim]) previous_state_h = np.zeros([1, Config.n_cell_dim]) sets_to_process = [ set for set in pertubed_sets if str(set['set_id']) not in skip_sets ] print('{} sets found'.format(len(sets_to_process))) for set in sets_to_process: print('Processing set {}, {} items...'.format( set['set_id'], set['set_length'])) # Only process files that are not yet available in results directory create_dir_if_not_exists('{}/{}'.format( base_path, set['set_id'])) # Check if directory exists files_done = [ f[:-4] for f in os.listdir('{}/{}'.format(base_path, set['set_id'])) if f.endswith('.npy') ] for item in set['set_items']: file_name = item['path'][:-4] print(file_name) if file_name in files_done: print('Skipped.') continue print('current file: {}'.format(file_name)) input_file_path = '{}/{}.wav'.format(input_dir, file_name) # Prepare features features, features_len = audiofile_to_features(input_file_path) features = tf.expand_dims(features, 0) features_len = tf.expand_dims(features_len, 0) features = create_overlapping_windows(features).eval( session=session) features_len = features_len.eval(session=session) feed_dict = { inputs['input']: features, inputs['input_lengths']: features_len, inputs['previous_state_c']: previous_state_c, inputs['previous_state_h']: previous_state_h, } intermediate_activations = session.run(intermediate_layers, feed_dict=feed_dict) # Save activations of actual input save_to_path_activations = '{}/{}/{}.npy'.format( base_path, set['set_id'], file_name) write_numpy_to_file(save_to_path_activations, np.array(intermediate_activations)) print('Activations for {} are saved to: {}'.format( file_name, save_to_path_activations)) return True