def transform_input(batch_x, cfg): if cfg.input_transformation == 'mfcc': batch_x2 = np.asarray([ stacked_mfcc(b, numcep=cfg.dims_mfcc[1], num_layers=cfg.dims_mfcc[2]) for b in batch_x ]) elif cfg.input_transformation == 'filterbank': batch_x2 = np.asarray([ stacked_filterbank(b, nfilt=cfg.dims_mfcc[1], num_layers=cfg.dims_mfcc[2]) for b in batch_x ]) else: batch_x2 = batch_x return batch_x2
def submission(): fn_model = 'models/model40/model_mfcc_bsize512_e49.ckpt' # %% id2name = corpus.decoder #cfg = Config() # cfg.soundcorpus_fp = 'assets/corpora/corpus7/test.pm.soundcorpus.p' size = 158538 submission = dict() with tf.Session(graph=graph) as sess: # Restore variables from disk. saver.restore(sess, fn_model) print("Model restored.") k_batch = 0 try: for (batch_x, batch_y) in batch_gen: batch_x2 = [stacked_mfcc(b) for b in batch_x] if k_batch % 1000 == 0: logging.info(str(k_batch)) prediction = sess.run([pred], feed_dict={x: batch_x2, keep_prob: 1.0}) for k,p in enumerate(prediction[0]): if SC.is_silence(batch_x[k]): fname, label = batch_y[k].decode(), 'silence' else: fname, label = batch_y[k].decode(), id2name[p] submission[fname] = label k_batch += 1 except EOFError: pass with open(os.path.join('assets/corpora/corpus12/', 'submission_test7.csv'), 'w') as fout: fout.write('fname,label\n') for fname, label in submission.items(): fout.write('{},{}\n'.format(fname, label))
def batch_gen(self, batch_size, input_transformation='filterbank', dims_input_transformation=(99, 26, 1)): x = [] y = [] with open(self.fp, 'rb') as file: unpickler = pickle.Unpickler(file) while True: #try: item = unpickler.load() wav = item['wav'] label = item['label'] if input_transformation == 'mfcc': wav = stacked_mfcc(wav, numcep=dims_input_transformation[1], num_layers=dims_input_transformation[2]) elif input_transformation == 'filterbank': wav = stacked_filterbank( wav, nfilt=dims_input_transformation[1], num_layers=dims_input_transformation[2]) else: wav = wav x.append(wav) y.append(label) if len(x) == batch_size: # reshape to np arrays x = np.asarray(x) if not self.mode in ['test']: y = np.asarray(y) yield x, y x = [] y = []
def batch_gen(self): x = [] y = [] gen_train = self.gen_corpus(self.train_corpus.fp) gen_noise = self.gen_corpus(self.background_corpus.fp) gen_unknown = self.gen_corpus(self.unknown_corpus.fp) gen_silence = self.gen_corpus(self.background_corpus.fp) #seed = 24 #np.random.seed(seed=seed) while True: type = np.random.choice( ['known', 'unknown', 'silence'], p=[ 1 - self.portion_unknown - self.portion_silence, self.portion_unknown, self.portion_silence ]) if type == 'known': try: train_data = next(gen_train) except EOFError: print('restarting gen_train') gen_train = self.gen_corpus(self.train_corpus.fp) train_data = next(gen_train) elif type == 'unknown': try: train_data = next(gen_unknown) except EOFError: print('restarting gen_unknown') gen_unknown = self.gen_corpus(self.unknown_corpus.fp) train_data = next(gen_unknown) else: try: silence_type = np.random.choice(['pure', 'noise']) if silence_type == 'noise': train_data = next(gen_silence) else: train_data = { 'wav': np.zeros(16000, dtype=np.float32), 'label': 11 } except EOFError: print('restarting gen_silence') gen_silence = self.gen_corpus(self.background_corpus.fp) train_data = next(gen_silence) try: noise = next(gen_noise) except EOFError: print('restarting gen_bg') gen_noise = self.gen_corpus(self.background_corpus.fp) noise = next(gen_noise) raw_wav = train_data['wav'] noise_wav = noise['wav'] if self.train_silence_detection: if train_data['label'] == 11: label = 1 else: label = 0 else: label = train_data['label'] factor_mix = 1 - np.random.uniform(self.lower_bound_noise_mix, self.upper_bound_noise_mix) if np.random.rand() < self.portion_noised: if type is 'silence': if self.noise_silence: wav = self._combine_wav(raw_wav, noise_wav, factor_mix) else: wav = raw_wav elif type is 'known': wav = self._combine_wav(raw_wav, noise_wav, factor_mix) else: if self.noise_unknown: wav = self._combine_wav(raw_wav, noise_wav, factor_mix) else: wav = raw_wav else: wav = raw_wav if self.input_transformation == 'mfcc': signal = stacked_mfcc( wav, num_layers=self.dims_input_transformation[2], numcep=self.dims_input_transformation[1]) elif self.input_transformation == 'filterbank': signal = stacked_filterbank( wav, num_layers=self.dims_input_transformation[2], nfilt=self.dims_input_transformation[1]) else: signal = wav x.append(signal) y.append(label) if len(x) == self.batch_size: x = np.asarray(x) yield x, y self.batches_counter += 1 x = [] y = []
if os.path.dirname(x) is not bn_dir ] ##################### Load data set percentage_of_speech = 0.5 num_of_data = int( len(silence_fnames) + len(silence2_fnames) + len(silence3_fnames) / percentage_of_speech) X_train, X_test, y_train, y_test, ss = get_balanced_corpus( silence_fnames, speech_fnames, num_of_data, percentage_of_speech, is_split=True) X_train = np.asarray([stacked_mfcc(x, numcep=26) for x in X_train]) X_test = np.asarray([stacked_mfcc(x, numcep=26) for x in X_test]) print('Input dims: ') print(X_train.shape) from batch_gen import SoundCorpus import pickle with open('assets/corpora/corpus14/' + 'fname2label.p', 'rb') as f: fname2label = pickle.load(f) test_corpus = SoundCorpus('assets/corpora/corpus14/', mode='own_test', fn='own_test_fname.p.soundcorpus.p') SC = SilenceDetector()
pred = tf.argmax(logits, 1) #correct_prediction = tf.equal(pred, tf.reshape(y, [-1])) #accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accu') #confusion_matrix = tf.confusion_matrix(tf.reshape(y, [-1]),pred,num_classes) #tf.summary.scalar('accuracy', accuracy) saver = tf.train.Saver() true_silence_ids = [id for id, item in enumerate(batch) if item['label'] == 11] silence_ids = [ id for id, item in enumerate(batch) if silence_classifier.is_silence(item['wav']) ] batch2 = [b for id, b in enumerate(batch) if id not in true_silence_ids] batch_x = [stacked_mfcc(item['wav']) for item in batch2] batch_y = [item['label'] for item in batch2] batch_x_incl_silence = [stacked_mfcc(item['wav']) for item in batch] batch_y_incl_silence = [item['label'] for item in batch] batch_y_test = [np.random.randint(0, 11) for b in batch_y] batch_y_incl_silence_test = [ np.random.randint(0, 11) for b in batch_y_incl_silence ] def predict(batch_x, batch_y): # fn_model = 'models/model40/model_mfcc_bsize512_e49.ckpt' fn_model = 'models/model48/model_mfcc_bsize512_e49.ckpt' # %%
keep_prob = tf.placeholder(tf.float32, name="dropout") with tf.variable_scope('logit'): logits = model.calc_logits(x, keep_prob, num_classes) with tf.variable_scope('acc'): pred = tf.argmax(logits, 1) #correct_prediction = tf.equal(pred, tf.reshape(y, [-1])) #accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accu') #confusion_matrix = tf.confusion_matrix(tf.reshape(y, [-1]),pred,num_classes) #tf.summary.scalar('accuracy', accuracy) saver = tf.train.Saver() batch_x = [b['wav'] for b in batch] batch_x_mfcc = [stacked_mfcc(b) for b in batch_x] batch_y = [b['label'] for b in batch] def predict(batch_x_mfcc, batch_y): fn_model = 'models/model40/model_mfcc_bsize512_e49.ckpt' # %% with tf.Session(graph=graph) as sess: # Restore variables from disk. saver.restore(sess, fn_model) print("Model restored.") k_batch = 0 submission = {} predic = sess.run([pred], feed_dict={
fn_model = 'models/model56/model_mfcc_bsize512_e47.ckpt' submission_own_test_iter = dict() with tf.Session(graph=graph) as sess: # Restore variables from disk. saver.restore(sess, fn_model) print("Model restored.") for k_batch in range(num_batches_own_test): try: batch_x, batch_y = [batch_own_test_x[k_batch] ], [batch_own_test_y[k_batch]] batch_x2 = [stacked_mfcc(b) for b in batch_x] if k_batch % 10 == 0: logging.info('Batch %s / %s' % (k_batch + 1, num_batches_own_test)) prediction = sess.run([pred], feed_dict={ x: batch_x2, keep_prob: 1.0 }) for k, p in enumerate(prediction[0]): if SC.is_silence(batch_x[k]): if own_test_corpus.mode == 'test': fname, label = batch_y[k].decode(), 'silence' else: fname, label = batch_y[k], 'silence'