stats_for_k = {} if not args['dont_run_syntactic']: # syntactic update step augmented_costs = [] costs = [] for block_num in xrange(args['syntactic_blocks_to_run']): training_block = ngram_reader.training_block(data_rng.random_sample()) block_size = training_block.shape[0] for count in xrange(block_size): if count % print_freq == 0: sys.stdout.write('\rk %i b%i: ngram %d of %d' % (model.k, block_num, count, block_size)) sys.stdout.flush() train_index = sample_cumulative_discrete_distribution(training_block[:,-1], rng=data_rng) correct_symbols, error_symbols, ngram_frequency = ngram_reader.contrastive_symbols_from_row(training_block[train_index], rng=data_rng) augmented_cost, cost = model.update_w(*(list(correct_symbols) + list(error_symbols))) if not np.isfinite(cost): print 'single nan detected' save_model('nan_dump.pkl.gz') import IPython IPython.embed() augmented_costs.append(augmented_cost) costs.append(cost) if args['syntactic_blocks_to_run'] > 1: print print '%i intermediate mean %f' % (block_num, np.mean(costs[-block_size:])) print if not np.isfinite(np.mean(costs)): print 'nan cost mean detected' save_model('nan_dump.pkl.gz')
print 'burn complete' while True: last_time = time.clock() model.increase_k() stats_for_k = {} if not args['dont_run_syntactic']: # syntactic update step augmented_costs = [] costs = [] for block_num in xrange(args['syntactic_blocks_to_run']): training_block = ngram_reader.training_block(data_rng.random_sample()) block_size = training_block.shape[0] for datum in syntactic_training_data(data_rng, output='k %i b %i' % (model.k, block_num)): augmented_cost, cost = model.update_w(*datum) augmented_costs.append(augmented_cost) costs.append(cost) if args['syntactic_blocks_to_run'] > 1: print print '%i intermediate mean %f' % (block_num, np.mean(costs[-block_size:])) print stats_for_k['syntactic_mean'] = np.mean(costs) stats_for_k['syntactic_std'] = np.std(costs) print 'training:' print 'syntactic mean cost \t%f' % stats_for_k['syntactic_mean'] print 'syntactic std cost \t%f' % stats_for_k['syntactic_std'] stats_for_k['syntactic_mean_augmented'] = np.mean(augmented_costs) stats_for_k['syntactic_std_augmented'] = np.std(augmented_costs) print 'syntactic mean augmented cost \t%f' % stats_for_k['syntactic_mean_augmented']
last_time = time.clock() model.increase_k() stats_for_k = {} if not args['dont_run_syntactic']: # syntactic update step augmented_costs = [] costs = [] for block_num in xrange(args['syntactic_blocks_to_run']): training_block = ngram_reader.training_block( data_rng.random_sample()) block_size = training_block.shape[0] for datum in syntactic_training_data(data_rng, output='k %i b %i' % (model.k, block_num)): augmented_cost, cost = model.update_w(*datum) augmented_costs.append(augmented_cost) costs.append(cost) if args['syntactic_blocks_to_run'] > 1: print print '%i intermediate mean %f' % ( block_num, np.mean(costs[-block_size:])) print stats_for_k['syntactic_mean'] = np.mean(costs) stats_for_k['syntactic_std'] = np.std(costs) print 'training:' print 'syntactic mean cost \t%f' % stats_for_k['syntactic_mean'] print 'syntactic std cost \t%f' % stats_for_k['syntactic_std'] stats_for_k['syntactic_mean_augmented'] = np.mean(augmented_costs) stats_for_k['syntactic_std_augmented'] = np.std(augmented_costs)
costs = [] for block_num in xrange(args['syntactic_blocks_to_run']): training_block = ngram_reader.training_block( data_rng.random_sample()) block_size = training_block.shape[0] for count in xrange(block_size): if count % print_freq == 0: sys.stdout.write( '\rk %i b%i: ngram %d of %d' % (model.k, block_num, count, block_size)) sys.stdout.flush() train_index = sample_cumulative_discrete_distribution( training_block[:, -1], rng=data_rng) correct_symbols, error_symbols, ngram_frequency = ngram_reader.contrastive_symbols_from_row( training_block[train_index], rng=data_rng) augmented_cost, cost = model.update_w( *(list(correct_symbols) + list(error_symbols))) if not np.isfinite(cost): print 'single nan detected' save_model('nan_dump.pkl.gz') import IPython IPython.embed() augmented_costs.append(augmented_cost) costs.append(cost) if args['syntactic_blocks_to_run'] > 1: print print '%i intermediate mean %f' % ( block_num, np.mean(costs[-block_size:])) print if not np.isfinite(np.mean(costs)): print 'nan cost mean detected'