def main(d): # d is a dictionary containing the auto-encoder design specifications and training phase specifications # RESET DEFAULT GRAPH print('resetting default graph...', flush=True) tf.reset_default_graph() # FINISH CONFIGURATION print('finishing configuration...', flush=True) # specify noise distribution if d['noise_distribution'] == 'truncnorm': noise_distribution = tf.truncated_normal elif d['noise_distribution'] == 'uniform': noise_distribution = tf.random_uniform # specify distribution of initial weights if d['initialization_distribution'] == 'truncnorm': initialization_distribution = tf.truncated_normal # specify activation function if d['activation_function'] == 'tanh': activation_function = {'tf': tf.tanh, 'np': sdae_apply_functions.tanh} elif d['activation_function'] == 'relu': activation_function = { 'tf': tf.nn.relu, 'np': sdae_apply_functions.relu } elif d['activation_function'] == 'elu': activation_function = {'tf': tf.nn.elu, 'np': sdae_apply_functions.elu} elif d['activation_function'] == 'sigmoid': activation_function = { 'tf': tf.sigmoid, 'np': sdae_apply_functions.sigmoid } # load data partitions = ['train', 'valid', 'test'] dataset = {} for partition in partitions: dataset[partition] = datasetIO.load_datamatrix('{0}/{1}.pickle'.format( d['input_path'], partition)) d['{0}_examples'.format(partition)] = dataset[partition].shape[0] # create output directory if not os.path.exists(d['output_path']): os.makedirs(d['output_path']) # initialize model architecture (number of layers and dimension of each layer) d['current_dimensions'] = d[ 'all_dimensions'][:d['current_hidden_layer'] + 1] # dimensions of model up to current depth # specify embedding function for current training phase # we want the option of skipping the embedding activation function to apply only to the full model if not d['apply_activation_to_embedding'] and d['current_dimensions'] == d[ 'all_dimensions']: d['current_apply_activation_to_embedding'] = False else: d['current_apply_activation_to_embedding'] = True # initialize assignments of training examples to mini-batches and number of training steps for stochastic gradient descent d['batch_size'] = d['batch_fraction'] * d['train_examples'] batch_ids = create_batch_ids(d['train_examples'], d['batch_size']) d['batches'] = np.unique(batch_ids).size d['steps'] = d['current_epochs'] * d['batches'] # specify path to weights from previous training run d['previous_variables_path'] = '{0}/variables_layer{1!s}_finetuning{2!s}.pickle'.format( d['output_path'], d['previous_hidden_layer'], d['previous_finetuning_run']) d['fix_or_init'] = 'fix' if d[ 'current_finetuning_run'] == 0 else 'init' # fix for pretraining, init for finetuning # specify rows and columns of figure showing data reconstructions d['reconstruction_rows'] = int( np.round(np.sqrt(np.min([100, d['valid_examples']]) / 2))) d['reconstruction_cols'] = 2 * d['reconstruction_rows'] # print some design information print('input path: {0}'.format(d['input_path']), flush=True) print('output path: {0}'.format(d['output_path']), flush=True) print('previous variables path: {0}'.format(d['previous_variables_path']), flush=True) print('previous variables fix or init: {0}'.format(d['fix_or_init']), flush=True) # SAVE CURRENT DESIGN print('saving current design...', flush=True) with open('{0}/design_layer{1!s}_finetuning{2!s}.json'.format( d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), mode='wt', encoding='utf-8', errors='surrogateescape') as fw: json.dump(d, fw, indent=2) # DEFINE REPORTING VARIABLES print('defining reporting variables...', flush=True) reporting_steps = sdae_design_functions.create_reporting_steps( d['steps'], d['firstcheckpoint'], d['maxstepspercheckpoint']) valid_losses = np.zeros(reporting_steps.size, dtype='float32') train_losses = np.zeros(reporting_steps.size, dtype='float32') valid_noisy_losses = np.zeros(reporting_steps.size, dtype='float32') train_noisy_losses = np.zeros(reporting_steps.size, dtype='float32') print('reporting steps:', reporting_steps, flush=True) # DEFINE COMPUTATIONAL GRAPH # define placeholders for input data, use None to allow feeding different numbers of examples print('defining placeholders...', flush=True) noise_stdv = tf.placeholder(tf.float32, []) noise_prob = tf.placeholder(tf.float32, []) training_and_validation_data_initializer = tf.placeholder( tf.float32, [ dataset['train'].shape[0] + dataset['valid'].shape[0], dataset['train'].shape[1] ]) selection_mask = tf.placeholder( tf.bool, [dataset['train'].shape[0] + dataset['valid'].shape[0]]) # define variables # W contains the weights, bencode contains the biases for encoding, and bdecode contains the biases for decoding print('defining variables...', flush=True) training_and_validation_data = tf.Variable( training_and_validation_data_initializer, trainable=False, collections=[]) if os.path.exists(d['previous_variables_path']): # update variables (if continuing from a previous training run) print('loading previous variables...', flush=True) global_step, W, bencode, bdecode = update_variables( d['current_dimensions'], initialization_distribution, d['initialization_sigma'], d['previous_variables_path'], d['fix_or_init'], d['include_global_step']) elif d['current_hidden_layer'] == 1 and d['current_finetuning_run'] == 0: # create variables global_step, W, bencode, bdecode = create_variables( d['current_dimensions'], initialization_distribution, d['initialization_sigma']) else: raise ValueError('could not find previous variables') # define model # h contains the activations from input layer to bottleneck layer # hhat contains the activations from bottleneck layer to output layer # xhat is a reference to the output layer (i.e. the reconstruction) print('defining model...', flush=True) x = tf.boolean_mask(training_and_validation_data, selection_mask) if d['noise_distribution'] == 'truncnorm': noise = noise_distribution(tf.shape(x), stddev=noise_stdv) else: noise = noise_distribution(tf.shape(x), minval=-noise_stdv, maxval=noise_stdv) noise_mask = tf.to_float(tf.random_uniform(tf.shape(x)) <= noise_prob) xnoisy = apply_noise(x, noise, noise_mask, d['noise_operation']) h, hhat, xhat = create_autoencoder( xnoisy, activation_function['tf'], d['apply_activation_to_output'], d['current_apply_activation_to_embedding'], W, bencode, bdecode) # define loss print('defining loss...', flush=True) loss = tf.reduce_mean(tf.squared_difference(x, xhat)) # squared error loss # define optimizer and training function print('defining optimizer and training function...', flush=True) optimizer = tf.train.AdamOptimizer(learning_rate=d['learning_rate'], epsilon=d['epsilon'], beta1=d['beta1'], beta2=d['beta2']) train_fn = optimizer.minimize(loss, global_step=global_step) # define bottleneck layer preactivation # bottleneck_preactivation = tf.matmul(h[-2], W[-1]) + bencode[-1] # INITIALIZE TENSORFLOW SESSION print('initializing tensorflow session...', flush=True) init = tf.global_variables_initializer() session_config = configure_session(d['processor'], d['gpu_memory_fraction']) with tf.Session(config=session_config) as sess: sess.run(init) # TRAINING print('training...', flush=True) sess.run(training_and_validation_data.initializer, feed_dict={ training_and_validation_data_initializer: np.append(dataset['train'].matrix, dataset['valid'].matrix, 0) }) validation_id = -1 batch_and_validation_ids = np.full(dataset['train'].shape[0] + dataset['valid'].shape[0], validation_id, dtype=batch_ids.dtype) is_train = np.append(np.ones(dataset['train'].shape[0], dtype='bool'), np.zeros(dataset['valid'].shape[0], dtype='bool')) is_valid = ~is_train training_step = 0 i = 0 overfitting_score = 0 stopearly = False starttime = time.time() with open('{0}/log_layer{1!s}_finetuning{2!s}.txt'.format( d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), mode='wt', buffering=1) as fl: fl.write('\t'.join([ 'step', 'train_loss', 'valid_loss', 'train_noisy_loss', 'valid_noisy_loss', 'time' ]) + '\n') for epoch in range(d['current_epochs']): if stopearly: break # randomize assignment of training examples to batches np.random.shuffle(batch_ids) batch_and_validation_ids[is_train] = batch_ids for batch in range(d['batches']): training_step += 1 # select mini-batch selected = batch_and_validation_ids == batch # update weights sess.run(train_fn, feed_dict={ selection_mask: selected, noise_prob: d['noise_probability'], noise_stdv: d['noise_sigma'] }) # record training and validation errors if training_step == reporting_steps[i]: train_losses[i] = sess.run(loss, feed_dict={ selection_mask: is_train, noise_prob: 0, noise_stdv: 0 }) train_noisy_losses[i] = sess.run( loss, feed_dict={ selection_mask: is_train, noise_prob: d['noise_probability'], noise_stdv: d['noise_sigma'] }) valid_losses[i] = sess.run(loss, feed_dict={ selection_mask: is_valid, noise_prob: 0, noise_stdv: 0 }) valid_noisy_losses[i] = sess.run( loss, feed_dict={ selection_mask: is_valid, noise_prob: d['noise_probability'], noise_stdv: d['noise_sigma'] }) print( 'step:{0:1.6g}, train loss:{1:1.3g}, valid loss:{2:1.3g}, train noisy loss:{3:1.3g},valid noisy loss:{4:1.3g}, time:{5:1.6g}' .format(reporting_steps[i], train_losses[i], valid_losses[i], train_noisy_losses[i], valid_noisy_losses[i], time.time() - starttime), flush=True) fl.write('\t'.join([ '{0:1.6g}'.format(x) for x in [ reporting_steps[i], train_losses[i], valid_losses[i], train_noisy_losses[i], valid_noisy_losses[i], time.time() - starttime ] ]) + '\n') # save current weights, reconstructions, and projections if training_step >= d[ 'startsavingstep'] or training_step == reporting_steps[ -1]: with open( '{0}/intermediate_variables_layer{1!s}_finetuning{2!s}_step{3!s}.pickle' .format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'], training_step), 'wb') as fw: pickle.dump( (sess.run(global_step), sess.run(W), sess.run(bencode), sess.run(bdecode)), fw) # stop early if overfitting if valid_losses[i] >= 1.01 * (np.insert( valid_losses[:i], 0, np.inf).min()): overfitting_score += 1 else: overfitting_score = 0 if overfitting_score == d['overfitting_score_max']: stopearly = True print('stopping early!', flush=True) break i += 1 # end tensorflow session print('closing tensorflow session...', flush=True) # ROLL BACK IF OVERFITTING if stopearly: print('rolling back...', flush=True) reporting_steps = reporting_steps[:i + 1] train_losses = train_losses[:i + 1] valid_losses = valid_losses[:i + 1] train_noisy_losses = train_noisy_losses[:i + 1] valid_noisy_losses = valid_noisy_losses[:i + 1] # selected_step = max([reporting_steps[i-d['overfitting_score_max']], d['startsavingstep']]) else: print('completed all training steps...', flush=True) # selected_step = reporting_steps[-1] selected_step = min([ max([reporting_steps[np.argmin(valid_losses)], d['startsavingstep']]), reporting_steps[-1] ]) print('selected step:{0}...'.format(selected_step), flush=True) # SAVE RESULTS print('saving results...', flush=True) with open( '{0}/optimization_path_layer{1!s}_finetuning{2!s}.pickle'.format( d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), 'wb') as fw: pickle.dump( { 'reporting_steps': reporting_steps, 'valid_losses': valid_losses, 'train_losses': train_losses, 'valid_noisy_losses': valid_noisy_losses, 'train_noisy_losses': train_noisy_losses }, fw) if d['current_dimensions'] == d['all_dimensions'] and ( not d['use_finetuning'] or d['current_finetuning_run'] > 0): shutil.copyfile( '{0}/intermediate_variables_layer{1!s}_finetuning{2!s}_step{3!s}.pickle' .format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'], selected_step), '{0}/variables_layer{1!s}_finetuning{2!s}.pickle'.format( d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'])) else: shutil.move( '{0}/intermediate_variables_layer{1!s}_finetuning{2!s}_step{3!s}.pickle' .format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'], selected_step), '{0}/variables_layer{1!s}_finetuning{2!s}.pickle'.format( d['output_path'], d['current_hidden_layer'], d['current_finetuning_run'])) with open( '{0}/variables_layer{1!s}_finetuning{2!s}.pickle'.format( d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), 'rb') as fr: W, Be, Bd = pickle.load(fr)[1:] # global_step, W, bencode, bdecode recon = {} embed = {} error = {} embed_preactivation = {} for partition in partitions: recon[partition], embed[partition], error[ partition] = sdae_apply_functions.encode_and_decode( dataset[partition], W, Be, Bd, activation_function['np'], d['current_apply_activation_to_embedding'], d['apply_activation_to_output'], return_embedding=True, return_reconstruction_error=True) embed_preactivation[partition] = sdae_apply_functions.encode( dataset[partition], W, Be, activation_function['np'], apply_activation_to_embedding=False) print('{0} reconstruction error: {1:1.3g}'.format( partition, error[partition]), flush=True) if d['current_dimensions'] == d['all_dimensions'] and ( not d['use_finetuning'] or d['current_finetuning_run'] > 0): datasetIO.save_datamatrix( '{0}/{1}_embedding_layer{2!s}_finetuning{3!s}.pickle'.format( d['output_path'], partition, d['current_hidden_layer'], d['current_finetuning_run']), embed[partition]) datasetIO.save_datamatrix( '{0}/{1}_embedding_layer{2!s}_finetuning{3!s}.txt.gz'.format( d['output_path'], partition, d['current_hidden_layer'], d['current_finetuning_run']), embed[partition]) if d['current_apply_activation_to_embedding']: datasetIO.save_datamatrix( '{0}/{1}_embedding_preactivation_layer{2!s}_finetuning{3!s}.pickle' .format(d['output_path'], partition, d['current_hidden_layer'], d['current_finetuning_run']), embed_preactivation[partition]) datasetIO.save_datamatrix( '{0}/{1}_embedding_preactivation_layer{2!s}_finetuning{3!s}.txt.gz' .format(d['output_path'], partition, d['current_hidden_layer'], d['current_finetuning_run']), embed_preactivation[partition]) # PLOT LOSS print('plotting loss...', flush=True) fg, ax = plt.subplots(1, 1, figsize=(3.25, 2.25)) ax.set_position([0.55 / 3.25, 0.45 / 2.25, 2.6 / 3.25, 1.7 / 2.25]) ax.semilogx(reporting_steps, train_losses, ':r', linewidth=1, label='train') ax.semilogx(reporting_steps, valid_losses, '-g', linewidth=1, label='valid') ax.semilogx(reporting_steps, train_noisy_losses, '--b', linewidth=1, label='train,noisy') ax.semilogx(reporting_steps, valid_noisy_losses, '-.k', linewidth=1, label='valid,noisy') ax.legend(loc='best', fontsize=8) ax.set_ylabel('loss', fontsize=8) ax.set_xlabel('steps (selected step:{0!s})'.format(selected_step), fontsize=8) ax.set_xlim(reporting_steps[0] - 1, reporting_steps[-1] + 1) # ax.set_ylim(0, 1) ax.tick_params(axis='both', which='major', left='on', right='on', bottom='on', top='off', labelleft='on', labelright='off', labelbottom='on', labeltop='off', labelsize=8) fg.savefig('{0}/optimization_path_layer{1!s}_finetuning{2!s}.png'.format( d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), transparent=True, pad_inches=0, dpi=600) plt.close() # PLOT RECONSTRUCTIONS print('plotting reconstructions...', flush=True) x_valid = dataset['valid'].matrix[:d['reconstruction_rows'] * d['reconstruction_cols'], :] xr_valid = recon['valid'].matrix[:d['reconstruction_rows'] * d['reconstruction_cols'], :] if x_valid.shape[1] > 1000: x_valid = x_valid[:, :1000] xr_valid = xr_valid[:, :1000] lb = np.append(x_valid, xr_valid, 1).min(1) ub = np.append(x_valid, xr_valid, 1).max(1) fg, axs = plt.subplots(d['reconstruction_rows'], d['reconstruction_cols'], figsize=(6.5, 3.25)) for i, ax in enumerate(axs.reshape(-1)): ax.plot(x_valid[i, :], xr_valid[i, :], 'ok', markersize=0.5, markeredgewidth=0) ax.set_ylim(lb[i], ub[i]) ax.set_xlim(lb[i], ub[i]) ax.tick_params(axis='both', which='major', left='off', right='off', bottom='off', top='off', labelleft='off', labelright='off', labelbottom='off', labeltop='off', pad=4) ax.set_frame_on(False) ax.axvline(lb[i], linewidth=1, color='k') ax.axvline(ub[i], linewidth=1, color='k') ax.axhline(lb[i], linewidth=1, color='k') ax.axhline(ub[i], linewidth=1, color='k') fg.savefig('{0}/reconstructions_layer{1!s}_finetuning{2!s}.png'.format( d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), transparent=True, pad_inches=0, dpi=1200) plt.close() # PLOT 2D EMBEDDING if d['current_dimensions'][-1] == 2 and (not d['use_finetuning'] or d['current_finetuning_run'] > 0): print('plotting 2d embedding...', flush=True) fg, ax = plt.subplots(1, 1, figsize=(6.5, 6.5)) ax.set_position([0.15 / 6.5, 0.15 / 6.5, 6.2 / 6.5, 6.2 / 6.5]) ax.plot(embed['train'].matrix[:, 0], embed['train'].matrix[:, 1], 'ok', markersize=2, markeredgewidth=0, alpha=0.5, zorder=0) ax.plot(embed['valid'].matrix[:, 0], embed['valid'].matrix[:, 1], 'or', markersize=2, markeredgewidth=0, alpha=1.0, zorder=1) ax.tick_params(axis='both', which='major', bottom='off', top='off', labelbottom='off', labeltop='off', left='off', right='off', labelleft='off', labelright='off', pad=4) ax.set_frame_on(False) fg.savefig('{0}/embedding_layer{1!s}_finetuning{2!s}.png'.format( d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), transparent=True, pad_inches=0, dpi=600) plt.close() if d['current_apply_activation_to_embedding']: fg, ax = plt.subplots(1, 1, figsize=(6.5, 6.5)) ax.set_position([0.15 / 6.5, 0.15 / 6.5, 6.2 / 6.5, 6.2 / 6.5]) ax.plot(embed_preactivation['train'].matrix[:, 0], embed_preactivation['train'].matrix[:, 1], 'ok', markersize=2, markeredgewidth=0, alpha=0.5, zorder=0) ax.plot(embed_preactivation['valid'].matrix[:, 0], embed_preactivation['valid'].matrix[:, 1], 'or', markersize=2, markeredgewidth=0, alpha=1.0, zorder=1) ax.tick_params(axis='both', which='major', bottom='off', top='off', labelbottom='off', labeltop='off', left='off', right='off', labelleft='off', labelright='off', pad=4) ax.set_frame_on(False) fg.savefig( '{0}/embedding_preactivation_layer{1!s}_finetuning{2!s}.png'. format(d['output_path'], d['current_hidden_layer'], d['current_finetuning_run']), transparent=True, pad_inches=0, dpi=600) plt.close() print('done training phase.', flush=True) return d['current_hidden_layer'], d['current_finetuning_run'], d[ 'current_epochs']
def main(design_dict_or_path): # load design if type(design_dict_or_path) == str: print('loading design...', flush=True) print('base_design_path: {0}'.format(design_dict_or_path), flush=True) with open(design_dict_or_path, mode='rt', encoding='utf-8', errors='surrogateescape') as fr: d = json.load(fr) elif type(design_dict_or_path) == dict: d = design_dict_or_path else: raise ValueError('input to finish_design must be dict or string') # create paths print('creating input and output paths...', flush=True) d['input_path'] = 'data/prepared_data/{0}/{1}'.format( d['study_name'], d['orientation']) d['output_path'] = 'results/autoencoder/{0}/{1}/hl{2!s}_md{3!s}_fhlsf{4!s}_np{5!s}_ns{6!s}_nd{7}_no{8}_is{9!s}_id{10}_lr{11!s}_eps{12!s}_bf{13!s}_pte{14!s}_fte{15!s}_uft{16!s}_slt{17!s}_ubn{18!s}_aao{19!s}_aae{20!s}_{21}'\ .format(d['study_name'], d['orientation'], d['hidden_layers'], d['min_dimension'], d['first_hidden_layer_scaling_factor'], d['noise_probability'], d['noise_sigma'], d['noise_distribution'], d['noise_operation'], d['initialization_sigma'], d['initialization_distribution'], d['learning_rate'], d['epsilon'], d['batch_fraction'], d['pretraining_epochs'], d['finetuning_epochs'], d['use_finetuning'], d['skip_layerwise_training'], d['use_batchnorm'], d['apply_activation_to_output'], d['apply_activation_to_embedding'], d['activation_function']) # confirm paths print('confirm paths...', flush=True) print('input path: {0}'.format(d['input_path']), flush=True) print('output path: {0}'.format(d['output_path']), flush=True) # create output directory print('creating output directory...', flush=True) if not os.path.exists(d['output_path']): os.makedirs(d['output_path']) # confirm dimensions print('confirm dimensions...', flush=True) with open('{0}/valid.pickle'.format(d['input_path']), 'rb') as fr: d['input_dimension'] = pickle.load(fr).shape[1] d['all_dimensions'] = sdae_design_functions.get_layer_dimensions( d['input_dimension'], d['min_dimension'], d['hidden_layers'], d['first_hidden_layer_scaling_factor']) print('all_dimensions:', d['all_dimensions'], flush=True) # confirm reporting steps print('confirm reporting steps...', flush=True) for phase, phase_epochs in [('pretraining', d['pretraining_epochs']), ('finetuning', d['finetuning_epochs']), ('last_layer', d['last_layer_epochs'])]: phase_steps = int(phase_epochs / d['batch_fraction']) reporting_steps = sdae_design_functions.create_reporting_steps( phase_steps, d['firstcheckpoint'], d['maxstepspercheckpoint']) print('{0}_reporting_steps:'.format(phase), reporting_steps, flush=True) # confirm layer training schedule print('confirm layer training schedule...', flush=True) d['training_schedule'] = sdae_design_functions.create_layer_training_schedule( d['hidden_layers'], d['pretraining_epochs'], d['finetuning_epochs'], d['last_layer_epochs'], d['use_finetuning']) fields = sorted(list(d['training_schedule'][0].keys())) for phase in d['training_schedule']: print(', '.join( ['{0}:{1!s}'.format(field, phase[field]) for field in fields]), flush=True) # save design print('saving design...', flush=True) design_path = '{0}/design.json'.format(d['output_path']) print('design_path: {0}'.format(design_path), flush=True) with open(design_path, mode='wt', encoding='utf-8', errors='surrogateescape') as fw: json.dump(d, fw, indent=2) print('done finish_design.', flush=True) return design_path