def compute_mfcc_feats(input_path, output_path, compress, compression_method, write_num_frames, **kwargs): mfcc_args = MFCC.filter_args(**kwargs) mfcc = MFCC(**mfcc_args) if mfcc.input_step == 'wave': input_args = AR.filter_args(**kwargs) reader = AR(input_path, **input_args) else: input_args = DRF.filter_args(**kwargs) reader = DRF.create(input_path, **input_args) writer = DWF.create(output_path, scp_sep=' ', compress=compress, compression_method=compression_method) if write_num_frames is not None: f_num_frames = open(write_num_frames, 'w') for data in reader: if mfcc.input_step == 'wave': key, x, fs = data else: key, x = data logging.info('Extracting MFCC for %s' % (key)) t1 = time.time() y = mfcc.compute(x) dt = (time.time() - t1)*1000 rtf = mfcc.frame_shift*y.shape[0]/dt logging.info('Extracted MFCC for %s num-frames=%d elapsed-time=%.2f ms. real-time-factor=%.2f' % (key, y.shape[0], dt, rtf)) writer.write([key], [y]) if write_num_frames is not None: f_num_frames.write('%s %d\n' % (key, y.shape[0])) mfcc.reset() if write_num_frames is not None: f_num_frames.close()
def compute_mfcc_feats(input_path, output_path, compress, compression_method, write_num_frames, use_gpu, nn_model_path, chunk_size, context, **kwargs): #open device if use_gpu and torch.cuda.is_available(): logging.info('CUDA_VISIBLE_DEVICES=%s' % os.environ['CUDA_VISIBLE_DEVICES']) logging.info('init gpu device') device = torch.device('cuda', 0) torch.tensor([0]).to(device) else: logging.info('init cpu device') device = torch.device('cpu') mfcc_args = MFCC.filter_args(**kwargs) mfcc = MFCC(**mfcc_args) # PUT YOUR NNET MODEL HERE!!!! enhancer = CAN(num_channels=45) enhancer.load_state_dict( torch.load(nn_model_path, map_location=device)['state_dict']) enhancer.to(device) enhancer.eval() if mfcc.input_step == 'wave': input_args = AR.filter_args(**kwargs) reader = AR(input_path, **input_args) else: input_args = DRF.filter_args(**kwargs) reader = DRF.create(input_path, **input_args) writer = DWF.create(output_path, scp_sep=' ', compress=compress, compression_method=compression_method) if write_num_frames is not None: f_num_frames = open(write_num_frames, 'w') for data in reader: if mfcc.input_step == 'wave': key, x, fs = data else: key, x = data logging.info('Extracting filter-banks for %s' % (key)) t1 = time.time() y = mfcc.compute(x) #we apply dummy identity network to fb logging.info('Running enhancement network') y = apply_nnet(y, enhancer, chunk_size, context, device) dt = (time.time() - t1) * 1000 rtf = mfcc.frame_shift * y.shape[0] / dt logging.info( 'Extracted filter-banks for %s num-frames=%d elapsed-time=%.2f ms. real-time-factor=%.2f' % (key, y.shape[0], dt, rtf)) writer.write([key], [y]) if write_num_frames is not None: f_num_frames.write('%s %d\n' % (key, y.shape[0])) mfcc.reset() if write_num_frames is not None: f_num_frames.close()
def compute_mfcc_feats(input_path, output_path, compress, compression_method, write_num_frames, use_gpu, nn_model_path, chunk_size, context, **kwargs): #open device if use_gpu and torch.cuda.is_available(): os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID' max_tries = 100 for g in range(max_tries): try: gpu_ids = find_free_gpus() os.environ['CUDA_VISIBLE_DEVICES'] = gpu_ids logging.info('CUDA_VISIBLE_DEVICES=%s' % os.environ['CUDA_VISIBLE_DEVICES']) logging.info('init gpu device') device = torch.device('cuda', 0) torch.tensor([0]).to(device) break except: if g < max_tries-1: logging.info('failing init gpu, trying again') time.sleep(10) else: logging.info('failing init gpu, using cpu') device = torch.device('cpu') else: logging.info('init cpu device') device = torch.device('cpu') mfcc_args1 = MFCC.filter_args(**kwargs) mfcc_args2 = copy.deepcopy(mfcc_args1) mfcc_args1['output_step'] = 'logfb' mfcc_args2['input_step'] = 'logfb' print(kwargs) print(mfcc_args1) print(mfcc_args2) mfcc1 = MFCC(**mfcc_args1) mfcc2 = MFCC(**mfcc_args2) mvn = MVN(norm_var=False, left_context=150, right_context=150) # PUT YOUR NNET MODEL HERE!!!! enhancer = CGN() #enhancer.load_state_dict(torch.load(nn_model_path, map_location=device)['state_dict']) enhancer.load_state_dict(torch.load(nn_model_path, map_location=device)) enhancer.to(device) enhancer.eval() if mfcc1.input_step == 'wave': input_args = AR.filter_args(**kwargs) reader = AR(input_path, **input_args) else: input_args = DRF.filter_args(**kwargs) reader = DRF.create(input_path, **input_args) writer = DWF.create(output_path, scp_sep=' ', compress=compress, compression_method=compression_method) if write_num_frames is not None: f_num_frames = open(write_num_frames, 'w') for data in reader: if mfcc1.input_step == 'wave': key, x, fs = data else: key, x = data logging.info('Extracting filter-banks for %s' % (key)) t1 = time.time() y = mfcc1.compute(x) # separate logE and filterbanks logE = y[:,0] y = y[:,1:] #estimate log energy from filterbanks logEy1 = logsumexp(y, axis=-1) #we apply dummy identity network to fb logging.info('Running enhancement network') y = mvn.normalize(y) y = apply_nnet(y, enhancer, chunk_size, context, device) #lets rescale the logE based on enhanced filterbanks logEy2 = logsumexp(y, axis=-1) logE = logE + (logEy2 - logEy1) # concatenate logE and filterbanks y = np.concatenate((logE[:,None], y), axis=-1) #apply DCT logging.info('Applying DCT') y = mfcc2.compute(y) dt = (time.time() - t1)*1000 rtf = mfcc1.frame_shift*y.shape[0]/dt logging.info('Extracted filter-banks for %s num-frames=%d elapsed-time=%.2f ms. real-time-factor=%.2f' % (key, y.shape[0], dt, rtf)) writer.write([key], [y]) if write_num_frames is not None: f_num_frames.write('%s %d\n' % (key, y.shape[0])) mfcc1.reset() if write_num_frames is not None: f_num_frames.close()
logging.info('init gpu device') device = torch.device('cuda', 0) torch.tensor([0]).to(device) break except: if g < max_tries-1: logging.info('failing init gpu, trying again') time.sleep(10) else: logging.info('failing init gpu, using cpu') device = torch.device('cpu') else: logging.info('init cpu device') device = torch.device('cpu') mfcc_args1 = MFCC.filter_args(**kwargs) mfcc_args2 = copy.deepcopy(mfcc_args1) mfcc_args1['output_step'] = 'logfb' mfcc_args2['input_step'] = 'logfb' print(kwargs) print(mfcc_args1) print(mfcc_args2) mfcc1 = MFCC(**mfcc_args1) mfcc2 = MFCC(**mfcc_args2) # PUT YOUR NNET MODEL HERE!!!! enhancer = CGN() #enhancer.load_state_dict(torch.load(nn_model_path, map_location=device)['state_dict']) enhancer.load_state_dict(torch.load(nn_model_path, map_location=device)) enhancer.to(device) enhancer.eval()