def e_step(args_list): exp_llh = 0. acc_stats = None n_frames = 0 for arg in args_list: fea_file = arg # Mean / Variance normalization. data = read_htk(fea_file) data -= data_stats['mean'] data /= numpy.sqrt(data_stats['var']) # Get the accumulated sufficient statistics for the # given set of features. s_stats = model.get_sufficient_stats(data) posts, llh, new_acc_stats = model.get_posteriors(s_stats, accumulate=True) exp_llh += numpy.sum(llh) n_frames += len(data) if acc_stats is None: acc_stats = new_acc_stats else: acc_stats += new_acc_stats return (exp_llh, acc_stats, n_frames)
def log_predictive(fea_file): """Lower-bound of the predictive distribution. Parameters ---------- fea_file : str Path to a features file (HTK format). Returns ------- stats : dict Dictionary of with the log-predictive probability. """ # pylint: disable=too-many-locals # Because this function is meant to be executed in a # separated thread, we prefer to reduce the number of # function call to simplify the dependencies. # pylint: disable=global-variable-not-assigned # pylint: disable=undefined-variable # The value of these global variable will be pushed # to the workers dynamically. global MODEL, TEMP_DIR # pylint: disable=redefined-outer-name # pylint: disable=reimported # These imports are done on the remote workers. import os import pickle # Extract the key of the utterance. basename = os.path.basename(fea_file) key, ext = os.path.splitext(basename) if '[' in ext: idx = ext.index('[') key += ext[idx:] # Load the features. data = read_htk(fea_file) expected_llh, unit_stats, state_resps, comp_resps = \ MODEL.expected_log_likelihood(data) # Add the normalizer to the stats to compute the # lower bound. stats = {} stats[-1] = { 'E_log_X': expected_llh, 'N': data.shape[0] } # Store the stats. out_path = os.path.join(TEMP_DIR, key) with open(out_path, 'wb') as file_obj: pickle.dump(stats, file_obj) return out_path
def collect_data_stats(filename): """Job to collect the statistics.""" # We re-import this module here because this code will run # remotely. import amdtk data = amdtk.read_htk(filename) stats_0 = data.shape[0] stats_1 = data.sum(axis=0) stats_2 = (data**2).sum(axis=0) retval = (stats_0, stats_1, stats_2) return retval
def count_frames(fea_file): """Count the number of frames in the features file. Parameters ---------- fea_file : str Path to the features file. Returns ------- count : int Number of frames. """ return read_htk(fea_file).shape[0]
def collect_data_stats(filename): print("filename:", filename) """Job to collect the statistics.""" # We re-import this module here because this code will run # remotely. import amdtk data = amdtk.read_htk(filename) amdtk.utils.test_import() stat_length = data.shape[0] stat_sum = data.sum(axis=0) stat_squared_sum = (data**2).sum(axis=0) return ( stat_length, stat_sum, stat_squared_sum )
def e_step_nonstatic(self, args_list): model = self.model data_stats = self.data_stats exp_llh = 0. acc_stats = None n_frames = 0 for arg in args_list: (fea_file, top_file) = arg # Mean / Variance normalization. data = read_htk(fea_file) data -= data_stats['mean'] data /= np.sqrt(data_stats['var']) # Read top PLU sequence from file with open(top_file, 'r') as f: topstring = f.read() tops = topstring.strip().split(',') tops = [int(x) for x in tops] # Get the accumulated sufficient statistics for the # given set of features. s_stats = model.get_sufficient_stats(data) posts, llh, new_acc_stats = model.get_posteriors(s_stats, tops, accumulate=True, filename=fea_file) exp_llh += np.sum(llh) n_frames += len(data) if acc_stats is None: acc_stats = new_acc_stats else: acc_stats += new_acc_stats return (exp_llh, acc_stats, n_frames)
def std_exp(fea_file): """E-Step of the standard Variational Bayes. Parameters ---------- fea_file : str Path to a features file (HTK format). Returns ------- stats : dict Dictionary of statistics. """ # pylint: disable=too-many-locals # Because this function is meant to be executed in a # separated thread, we prefer to reduce the number of # function call to simplify the dependencies. # pylint: disable=global-variable-not-assigned # pylint: disable=undefined-variable # The value of these global variable will be pushed # to the workers dynamically. global MODEL, TEMP_DIR, ALIGNMENTS # pylint: disable=redefined-outer-name # pylint: disable=reimported # These imports are done on the remote workers. import os import pickle # Extract the key of the utterance. basename = os.path.basename(fea_file) key, ext = os.path.splitext(basename) if '[' in ext: idx = ext.index('[') key += ext[idx:] # Load the features. data = read_htk(fea_file) # Compute the responsibilities per component. if ALIGNMENTS is not None: ali = ALIGNMENTS[fea_file] else: ali = None expected_llh, unit_stats, state_resps, comp_resps = \ MODEL.expected_log_likelihood(data) # Get the sufficient statistics of the model given the # responsibilities. stats = MODEL.get_stats(data, unit_stats, state_resps, comp_resps) # Add the normalizer to the stats to compute the # lower bound. stats[-1] = { 'E_log_X': expected_llh, 'N': data.shape[0] } # Store the stats. out_path = os.path.join(TEMP_DIR, key) with open(out_path, 'wb') as file_obj: pickle.dump(stats, file_obj) return out_path
def run_amdtk_nc(args): elbo = [] time = [] def callback(args): elbo.append(args['objective']) time.append(args['time']) print('elbo=' + str(elbo[-1]), 'time=' + str(time[-1])) rc = Client(profile=args.profile) rc.debug = DEBUG dview = rc[:] print('Connected to', len(dview), 'jobs.') print("done importing!") audio_dir = os.path.abspath(args.audio_dir) print('audio dir:', audio_dir) eval_dir = os.path.abspath(args.eval_dir) print('eval dir:', eval_dir) output_dir = os.path.abspath(args.output_dir) print('output dir:', output_dir) fea_paths = [] top_paths = [] for root, dirs, files in os.walk(audio_dir): for file in files: print('file:', file) if file.lower().endswith(".fea"): fea_paths.append(os.path.join(root, file)) if file.lower().endswith(".top"): top_paths.append(os.path.join(root, file)) # fea_paths = [os.path.abspath(fname) for fname in glob.glob(fea_path_mask)] # top_path_mask = os.path.join(audio_dir,'*top') # top_paths = [os.path.abspath(fname) for fname in glob.glob(top_path_mask)] for path in fea_paths: assert (os.path.exists(path)) print('fea_paths:', fea_paths) print('top_paths:', top_paths) zipped_paths = list(zip(sorted(fea_paths), sorted(top_paths))) print('zipped_paths:', zipped_paths) assert (len(zipped_paths) > 0) for path_pair in zipped_paths: assert (re.sub("\.fea", "", path_pair[0]) == re.sub("\.top", "", path_pair[1])) print("There are {} files".format(len(fea_paths))) print("Getting mean and variance of input data...") data_stats = dview.map_sync(collect_data_stats_by_speaker, fea_paths) # Accumulate the statistics over all the utterances. final_data_stats = accumulate_stats_by_speaker(data_stats) tops = [] # Read top PLU sequences from file for top_path in top_paths: with open(top_path, 'r') as f: topstring = f.read() top_list = topstring.strip().split(',') tops += [int(x) for x in top_list] num_tops = max(tops) + 1 elbo = [] time = [] print("Creating phone loop model...") conc = 0.1 if resume == None: model = amdtk.PhoneLoopNoisyChannel.create( n_units=str(args.bottom_plu_count), # number of acoustic units n_states=3, # number of states per unit n_comp_per_state=args.n_comp, # number of Gaussians per emission n_top_units=num_tops, # size of top PLU alphabet mean=np.zeros_like(final_data_stats[list( final_data_stats.keys())[0]]['mean']), var=np.ones_like( final_data_stats[list(final_data_stats.keys())[0]]['var'] / 20), max_slip_factor=args.max_slip_factor, extra_cond=args.extra_cond, limits={ 'plu_top': args.plu_top_limit, 'mem': args.mem_limit, 'time': args.time_limit }) else: with open(resume, 'rb') as f1: model = pickle.load(f1) numgex = re.compile("[\d]+") res_epoch, res_batch = [int(x) for x in numgex.findall(str(resume))] model.start_epoch = res_epoch model.starting_batch = res_batch if train: if not os.path.exists(os.path.join(output_dir, "models")): os.mkdir(os.path.join(output_dir, "models")) data_stats = final_data_stats print("Creating VB optimizer...") optimizer = amdtk.NoisyChannelOptimizer( dview, data_stats, args={ 'epochs': args.n_epochs, 'batch_size': args.batch_size, 'lrate': args.lrate, 'output_dir': output_dir, 'audio_dir': audio_dir, 'eval_audio_dir': audio_dir, 'audio_samples_per_sec': 100 }, model=model, pkl_path=os.path.join(output_dir, "models")) print("Running VB optimization...") begin = systime.time() print("running with {} paths".format(len(list(zipped_paths)))) optimizer.run(zipped_paths, callback) end = systime.time() print("VB optimization took ", end - begin, " seconds.") print("***ELBO***") for i, n in enumerate(elbo): print('Epoch ' + str(i) + ': ELBO=' + str(n)) if args.decode: for (fea_path, top_path) in zipped_paths: data = amdtk.read_htk(fea_path) # Normalize the data data_mean = np.mean(data) data_var = np.var(data) data = (data - data_mean) / np.sqrt(data_var) # Read top PLU sequence from file with open(top_path, 'r') as f: topstring = f.read() tops = topstring.strip().split(',') tops = [int(x) for x in tops] #result = model.decode(data, tops, state_path=False) #result_path = model.decode(data, tops, state_path=True) (result_intervals, edit_path, _) = model.decode(data, tops, phone_intervals=True, edit_ops=True) print("---") print("Phone sequence for file", fea_path, ":") print(result_intervals) print(edit_path)
graph_items = [] for key,value in sorted(dictionary.items()): how_many_xs = round(value/num_per_x) xs = 'x' * how_many_xs if xs=='' and value > 0: xs = '.' num_string = '('+str(key)+', {0:.2f}) '.format(value) graph_items.append((num_string, xs)) max_len = max([len(x[0]) for x in graph_items]) for item in graph_items: space_padding = ' ' * (max_len-len(item[0])) print(item[0]+space_padding+item[1]) for path in paths: data = amdtk.read_htk(path) # Normalize the data data_mean = np.mean(data) data_var = np.var(data) data = (data-data_mean)/np.sqrt(data_var) print("type of model is ", type(model)) result = model.decode(data, state_path=False) result_path = model.decode(data, state_path=True) result_intervals = model.decode(data, phone_intervals=True) print("---") print("Phone sequence for file", path, ":") print(result)
def e_step(args_list): import os from amdtk import read_htk exp_llh = 0. acc_stats = None n_frames = 0 skipped = 0 for arg in args_list: (fea_file, top_file) = arg # Mean / Variance normalization. data = read_htk(fea_file) speaker = os.path.split(os.path.split(fea_file)[0])[1] data -= data_stats[speaker]['mean'] data /= numpy.sqrt(data_stats[speaker]['var']) # Read top PLU sequence from file with open(top_file, 'r') as f: data_str = f.read() tops = data_str.strip().split(',') tops = [int(x) for x in tops] # Get the accumulated sufficient statistics for the # given set of features. s_stats = model.get_sufficient_stats(data) start_time = time.time() if curr_timing_dir is not None: curr_timing_file = os.path.join( curr_timing_dir, os.path.split(fea_file)[1][:-4]) curr_timing_file = curr_timing_file + '.txt' with open(curr_timing_file, 'w') as f: f.write('PLU tops: {}\n'.format(len(tops))) f.write('PLU bottom types: {}\n'.format(model.n_units)) f.write('Frames: {}\n'.format(data.shape[1])) f.write('Max slip factor: {}\n'.format( model.max_slip_factor)) f.write('\n') f.write('Start time: {}\n'.format( time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime(start_time)))) f.write('\n') else: curr_timing_file = None posts, llh, new_acc_stats = model.get_posteriors( s_stats, tops, accumulate=True, filename=fea_file, output=curr_timing_file) end_time = time.time() if curr_timing_file is not None: with open(curr_timing_file, 'a') as f: f.write('\nEnd time: {}\n'.format( time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime(end_time)))) f.write('Elapsed time: {}\n'.format(end_time - start_time)) f.write('DONE') if llh is not None: exp_llh += numpy.sum(llh) n_frames += len(data) if new_acc_stats is not None: if acc_stats is None: acc_stats = new_acc_stats else: acc_stats += new_acc_stats else: skipped += 1 return (exp_llh, acc_stats, n_frames, skipped)