def predict_validation(self, images, labels, phase=0, initialize_new=True): """ Function predict_validation Predict the data in batches as it could be too big to fit in the GPU memory, at once. Args: images (np array): Images to predict labels (np array): Labels for prediction phase (int): Input for bl_training (training/testing phase) initialize_new (boolean): Initialize not shared variables new Attributes: acc (float): Accuracy of prediction """ sess = self.sess loss = self.loss input_data = self.input_data y_output = self.y_output x = self.x bl_training = self.bl_training x_softmax = self.x_softmax batch_size = self.batch_size path = self.path x_images = images y_label = labels y_label = y_label.reshape((-1, 1)) iteration = 0 iteration_total = [] val_acc_total = [] test_acc_total = [] total_time_total = [] result = {} if initialize_new: variables_to_initialize = [ x for x in tf.global_variables() if not ( x.name in [y.name for y in self.variables_not_initialize]) ] sess.run(tf.variables_initializer(variables_to_initialize)) y_final, x_pred_final, loss_final = self.predict_batch( sess, [loss, x_softmax], input_data, y_output, bl_training, x_images, y_label, batch_size, phase) y_final = np.asarray(y_final) x_pred_final = np.asarray(x_pred_final) acc = np.mean( x_pred_final.reshape((-1, 1)) == y_final.reshape((-1, 1))) loss_final = np.mean(loss_final) log_to_textfile( self.path + 'log.txt', 'Validation loss: ' + str(loss_final) + 'Validation acc: ' + str(acc) + '\n') return (acc)
def train(self, full_examples, batch_size, learning_rate, coach_filename): """ Function train Train the LSTM model (one training step) Args: full_examples (list): Replay history batch_size (int): Batch size for LSTM to train learning_rate (float): Learning rate for LSTM model coach_filename (str): Filename of Coach model to log results Return: before_total_loss (float): Total loss for the current batch before_loss_value (float): Loss for state value for the current batch before_loss_prob (float): Loss for the probability distribution for the current batch """ idx = np.asarray(range(len(full_examples))) np.random.shuffle(idx) idx = idx[range(min(batch_size, idx.shape[0]))] states = [] probs = [] acc = [] perc = [] epoch = [] for i in idx: states.append(full_examples[i][0]) probs.append(full_examples[i][1]) acc.append(full_examples[i][2]) perc.append(full_examples[i][3]) epoch.append(full_examples[i][4]) acc = np.asarray(acc).reshape(-1,1) x_batch, tmp_batch_size, seq_len = self.prepare_action_sequence(states, perc, epoch) before_total_loss, before_loss_value, before_loss_prob = self.sess.run([self.total_loss, self.loss_value, self.loss_prob], feed_dict={self.x: x_batch, self.target_pis: probs, self.target_v: acc, self.seq_len: seq_len, self.batch_size: tmp_batch_size, self.learning_rate: learning_rate}) _, total_loss, loss_value, loss_prob = self.sess.run([self.train_step, self.total_loss, self.loss_value, self.loss_prob], feed_dict={self.x: x_batch, self.target_pis: probs, self.target_v: acc, self.seq_len: seq_len, self.batch_size: tmp_batch_size, self.learning_rate: learning_rate }) log_to_textfile(self.path + 'logs.txt', 'B LSTM Total loss: ' + str(before_total_loss) + ' B Loss prob: ' + str(before_loss_prob) + ' B Loss value: ' + str(before_loss_value) + '\n') log_to_textfile(self.path + 'logs.txt', 'LSTM Total loss: ' + str(total_loss) + ' Loss prob: ' + str(loss_prob) + ' Loss value: ' + str(loss_value) + '\n') log_to_textfile(coach_filename, 'B LSTM Total loss: ' + str(before_total_loss) + ' B Loss prob: ' + str(before_loss_prob) + ' B Loss value: ' + str(before_loss_value) + '\n') log_to_textfile(coach_filename, 'LSTM Total loss: ' + str(total_loss) + ' Loss prob: ' + str(loss_prob) + ' Loss value: ' + str(loss_value) + '\n') return(before_total_loss, before_loss_value, before_loss_prob)
def _apply_convcell(self, convcell, prev_cells, idx_nomberofconv, idx_convcell, list_trainable_weights, childname, bl_training, GLOBAL_WEIGHTS): """ Function _apply_convcell Applies the convcell to the neural network Args: convcell (list): A list of operation for the convolutional cell prev_cells (list): A list of output tensor of previous convolutional cells, which can be used as input idx_nomberofconv (int): Index of current convolutional cell batch (Not used anymore) idx_convcell (int): Index of current convolutional cell list_trainable_weights (list): List of trainable weights childname (str): The ChildModel name bl_training (tf placeholder): Defines training/testing phase GLOBAL_WEIGHTS (dict): Dictonary with shared tensorflow weights Attributes: final_out (tensorflow tensor): Output tensor list_trainable_weights (list): List of trainable weights bl_x1_used (boolean): If previous cell t-1 was used as input, then true bl_x2_used (boolean): If previous cell t-2 was used as input, then true """ not_used_prev_cells = [x + 2 for x in range(len(convcell))] bl_x1_used = False bl_x2_used = False print('-' * 10) print(idx_nomberofconv, idx_convcell) idx = 0 for convconfig in convcell: with tf.variable_scope('incell_' + str(idx)): idx += 1 log_to_textfile(self.path + 'log.txt', str(convconfig) + '\n') log_to_textfile(self.path + 'log.txt', str(not_used_prev_cells) + '\n') if convconfig[0] == 0: bl_x1_used = True if convconfig[1] == 1: bl_x2_used = True with tf.variable_scope('incell_block1'): if 'conv_' in convconfig[2]: if prev_cells[convconfig[0]].get_shape( )[3].value == self.no_channels_start: h_1 = tf.nn.conv2d( prev_cells[convconfig[0]], GLOBAL_WEIGHTS[0][1][0][convconfig[2]][0], [1, 1, 1, 1], "SAME", data_format="NHWC") if prev_cells[convconfig[0]].get_shape( )[3].value == 2 * self.no_channels_start: h_1 = tf.nn.conv2d( prev_cells[convconfig[0]], GLOBAL_WEIGHTS[0][1][0][convconfig[2]][1], [1, 1, 1, 1], "SAME", data_format="NHWC") h_1 = tf.layers.batch_normalization( h_1, training=bl_training) h_1 = tf.nn.relu(h_1) if 'convsep_' in convconfig[2]: if prev_cells[convconfig[0]].get_shape( )[3].value == self.no_channels_start: h_1 = tf.nn.separable_conv2d( prev_cells[convconfig[0]], GLOBAL_WEIGHTS[0][1][0][convconfig[2]][0], GLOBAL_WEIGHTS[0][1][0][convconfig[2]][1], [1, 1, 1, 1], "SAME", data_format="NHWC") if prev_cells[convconfig[0]].get_shape( )[3].value == 2 * self.no_channels_start: h_1 = tf.nn.separable_conv2d( prev_cells[convconfig[0]], GLOBAL_WEIGHTS[0][1][0][convconfig[2]][2], GLOBAL_WEIGHTS[0][1][0][convconfig[2]][3], [1, 1, 1, 1], "SAME", data_format="NHWC") h_1 = tf.layers.batch_normalization( h_1, training=bl_training) h_1 = tf.nn.relu(h_1) if 'id_' in convconfig[2]: h_1 = prev_cells[convconfig[0]] if 'maxpool_3x3' == convconfig[2]: h_1 = tf.layers.max_pooling2d( prev_cells[convconfig[0]], pool_size=(3, 3), strides=(1, 1), padding='same') if 'maxpool_5x5' == convconfig[2]: h_1 = tf.layers.max_pooling2d( prev_cells[convconfig[0]], pool_size=(5, 5), strides=(1, 1), padding='same') if 'maxpool_7x7' == convconfig[2]: h_1 = tf.layers.max_pooling2d( prev_cells[convconfig[0]], pool_size=(7, 7), strides=(1, 1), padding='same') if 'avgpool_3x3' == convconfig[2]: h_1 = tf.layers.average_pooling2d( prev_cells[convconfig[0]], pool_size=(3, 3), strides=(1, 1), padding='same') if 'avgpool_5x5' == convconfig[2]: h_1 = tf.layers.average_pooling2d( prev_cells[convconfig[0]], pool_size=(5, 5), strides=(1, 1), padding='same') if 'avgpool_7x7' == convconfig[2]: h_1 = tf.layers.average_pooling2d( prev_cells[convconfig[0]], pool_size=(7, 7), strides=(1, 1), padding='same') with tf.variable_scope('incell_block2'): if 'convsep_' in convconfig[3]: if prev_cells[convconfig[1]].get_shape( )[3].value == self.no_channels_start: h_2 = tf.nn.separable_conv2d( prev_cells[convconfig[1]], GLOBAL_WEIGHTS[0][1][0][convconfig[3]][0], GLOBAL_WEIGHTS[0][1][0][convconfig[3]][1], [1, 1, 1, 1], "SAME", data_format="NHWC") if prev_cells[convconfig[1]].get_shape( )[3].value == 2 * self.no_channels_start: h_2 = tf.nn.separable_conv2d( prev_cells[convconfig[1]], GLOBAL_WEIGHTS[0][1][0][convconfig[3]][2], GLOBAL_WEIGHTS[0][1][0][convconfig[3]][3], [1, 1, 1, 1], "SAME", data_format="NHWC") h_2 = tf.layers.batch_normalization( h_2, training=bl_training, name=str(idx_convcell) + '_' + str(idx) + '_batch2' + childname) h_2 = tf.nn.relu(h_2) if 'conv_' in convconfig[3]: if prev_cells[convconfig[1]].get_shape( )[3].value == self.no_channels_start: h_2 = tf.nn.conv2d( prev_cells[convconfig[1]], GLOBAL_WEIGHTS[0][1][0][convconfig[3]][0], [1, 1, 1, 1], "SAME", data_format="NHWC") if prev_cells[convconfig[1]].get_shape( )[3].value == 2 * self.no_channels_start: h_2 = tf.nn.conv2d( prev_cells[convconfig[1]], GLOBAL_WEIGHTS[0][1][0][convconfig[3]][1], [1, 1, 1, 1], "SAME", data_format="NHWC") h_2 = tf.layers.batch_normalization( h_2, training=bl_training) h_2 = tf.nn.relu(h_2) if 'id_' in convconfig[3]: h_2 = prev_cells[convconfig[1]] if 'maxpool_3x3' == convconfig[3]: h_2 = tf.layers.max_pooling2d( prev_cells[convconfig[1]], pool_size=(3, 3), strides=(1, 1), padding='same') if 'maxpool_5x5' == convconfig[3]: h_2 = tf.layers.max_pooling2d( prev_cells[convconfig[1]], pool_size=(5, 5), strides=(1, 1), padding='same') if 'maxpool_7x7' == convconfig[3]: h_2 = tf.layers.max_pooling2d( prev_cells[convconfig[1]], pool_size=(7, 7), strides=(1, 1), padding='same') if 'avgpool_3x3' == convconfig[3]: h_2 = tf.layers.average_pooling2d( prev_cells[convconfig[1]], pool_size=(3, 3), strides=(1, 1), padding='same') if 'avgpool_5x5' == convconfig[3]: h_2 = tf.layers.average_pooling2d( prev_cells[convconfig[1]], pool_size=(5, 5), strides=(1, 1), padding='same') if 'avgpool_7x7' == convconfig[3]: h_2 = tf.layers.average_pooling2d( prev_cells[convconfig[1]], pool_size=(7, 7), strides=(1, 1), padding='same') if convconfig[4] == 'add': if h_1.get_shape()[3].value != self.no_channels_start: w = get_weights( childname + '_' + str('tmpid1_') + str(idx_nomberofconv) + '_' + str(idx_convcell), [ 1, 1, h_1.get_shape()[3].value, self.no_channels_start ]) list_trainable_weights.append(w) h_1 = tf.nn.conv2d(h_1, w, [1, 1, 1, 1], "SAME") h_1 = tf.layers.batch_normalization( h_1, training=bl_training) h_1 = tf.nn.relu(h_1) if h_2.get_shape()[3].value != self.no_channels_start: w = get_weights( childname + '_' + str('tmpid2_') + str(idx_nomberofconv) + '_' + str(idx_convcell), [ 1, 1, h_2.get_shape()[3].value, self.no_channels_start ]) list_trainable_weights.append(w) h_2 = tf.nn.conv2d(h_2, w, [1, 1, 1, 1], "SAME") h_2 = tf.layers.batch_normalization( h_2, training=bl_training) h_2 = tf.nn.relu(h_2) h_out = tf.add(h_1, h_2) elif convconfig[4] == 'concat': h_out = tf.concat([h_1, h_2], axis=-1) else: print('Error') prev_cells.append(h_out) if not ('id_' in convconfig[2]) and not ('pool' in convconfig[2]): list_trainable_weights.append( GLOBAL_WEIGHTS[0][1][0][convconfig[2]]) if not ('id_' in convconfig[3]) and not ('pool' in convconfig[3]): list_trainable_weights.append( GLOBAL_WEIGHTS[0][1][0][convconfig[3]]) if convconfig[0] in not_used_prev_cells: not_used_prev_cells.remove(convconfig[0]) if convconfig[1] in not_used_prev_cells: not_used_prev_cells.remove(convconfig[1]) with tf.variable_scope('celloutput_' + str(idx_convcell)): final_out = tf.concat([prev_cells[i] for i in not_used_prev_cells], axis=-1) shp_in = final_out.get_shape()[3].value w = get_weights( childname + '_' + str('final_') + str(idx_nomberofconv) + '_' + str(idx_convcell), [1, 1, shp_in, self.global_param[4]]) list_trainable_weights.append(w) final_out = tf.nn.conv2d(final_out, w, [1, 1, 1, 1], "SAME") final_out = tf.layers.batch_normalization(final_out, training=bl_training) final_out = tf.nn.relu(final_out) return (final_out, list_trainable_weights, bl_x1_used, bl_x2_used)
def couch_train(self, images, labels, max_noimprovements, max_iteration, lr_iteration_step, max_epochs, no_global_variables=False, safe_model=False): """ Function couch_train Predict the data in batches as it could be too big to fit in the GPU memory, at once. Args: images (dict): Images used for training ChildModel labels (dict): Labels used for training ChildModel max_noimprovements (int): Stop training ChildModel if does not improve over number of epochs max_iteration (int): Number of maximal training steps in ChildModel lr_iteration_step (list): Epochs when learning rate is decayed in ChildModel max_epochs (int): Number of epochs for training ChildModel no_global_variables (boolean): If True, no global variables are trained in ChildModel safe_model (boolean): If True, then ChildModel weights are safed """ # Initialize / store Child attributes in local variables sess = self.sess loss = self.loss input_data = self.input_data y_output = self.y_output x = self.x bl_training = self.bl_training x_softmax = self.x_softmax batch_size = self.batch_size lr = 0.1 learning_rate = self.learning_rate path = self.path list_trainable_weights = self.list_trainable_weights createPath(path) createPath(path + '/model') x_train = images['train'] y_train = labels['train'] x_val = images['valid'] y_val = labels['valid'] y_train = y_train.reshape((-1, 1)) y_val = y_val.reshape((-1, 1)) # Creating some variables to store results iteration = 0 iteration_total = [] val_acc_total = [] total_time_total = [] result = {} update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Reinitialize child specific variables if no_global_variables: list_trainable_weights = [ x for x in list_trainable_weights if not ( x.name in [y.name for y in self.variables_not_initialize]) ] # Create optimnizer update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.MomentumOptimizer( learning_rate, 0.9, use_nesterov=True).minimize(loss, var_list=list_trainable_weights) variables_to_initialize = [ x for x in tf.global_variables() if not (x.name in [y.name for y in self.variables_not_initialize]) ] best_loss = 999 best_acc = 0 best_epoch = 0 bl_break = False saver = tf.train.Saver() # Initialize only new variables (not shared) sess.run(tf.variables_initializer(variables_to_initialize)) createPath(self.path + "/log/") createPath(self.path + "/model/") # Create writer for tensorboard writer = tf.summary.FileWriter( self.path + "/log/{}".format(self.childname), sess.graph) idx = np.asarray(range(x_train.shape[0])) counter_run_noimprovement = 0 # Epoch iterations for e in range(max_epochs): if bl_break: break start = timer() no_batches = idx.shape[0] // batch_size + 1 np.random.shuffle(idx) # One training loop for batch in range(no_batches - 1): x_train_batch = x_train[idx[(batch * batch_size):( min((1 + batch) * batch_size, x_train.shape[0]))]] x_train_batch = random_crop_and_flip(x_train_batch) y_train_batch = y_train[idx[(batch * batch_size):( min((1 + batch) * batch_size, x_train.shape[0]))]] o_loss, o_optimizer, o_x = sess.run( [loss, optimizer, x], feed_dict={ input_data: x_train_batch, y_output: y_train_batch, bl_training: 1, learning_rate: lr }) if iteration == max_iteration: bl_break = True iteration += 1 # Learning rate decay if iteration in lr_iteration_step: lr = 0.1 * lr log_to_textfile(self.path + 'log.txt', 'New learning rate: ' + str(lr) + '\n') # Predict validation set y_val_final, x_val_pred_final, val_o_loss_final = self.predict_batch( sess, [loss, x_softmax], input_data, y_output, bl_training, x_val, y_val, batch_size) y_val_final = np.asarray(y_val_final) x_val_pred_final = np.asarray(x_val_pred_final) val_acc = np.mean( x_val_pred_final.reshape((-1, 1)) == y_val_final.reshape((-1, 1))) val_o_loss_mean = np.mean(val_o_loss_final) # Save best model if best_acc < val_acc: log_to_textfile(self.path + 'log.txt', 'Safe best model' + '\n') best_acc = val_acc best_epoch = e counter_run_noimprovement = 0 if safe_model: saver.save(sess, self.path + 'model/{}'.format(self.childname)) else: counter_run_noimprovement = counter_run_noimprovement + 1 end = timer() total_time = end - start log_to_textfile( self.path + 'log.txt', 'Time: ' + str(total_time) + ' Epoch: ' + str(e) + ' Iteration: ' + str(iteration) + ' No Improv: ' + str(counter_run_noimprovement) + ' Val Loss: ' + str(val_o_loss_mean) + ' Best Acc: ' + str(best_acc) + ' Val Acc: ' + str(val_acc) + '\n') # Break training if the model does not improve over certain number of epochs if counter_run_noimprovement > max_noimprovements: bl_break = True
def build_model(self, GLOBAL_WEIGHTS): """ Function build_model Build the ChildModel model Args: GLOBAL_WEIGHTS (dict): Dictonary with shared tensorflow weights """ input_data = tf.placeholder(shape=[None, 32, 32, 3], dtype=tf.float32, name='x_input') y_output = tf.placeholder(shape=[None, 1], dtype=tf.float32, name='y_output') bl_training = tf.placeholder(tf.bool, name='training') learning_rate = tf.placeholder(tf.float32, shape=[]) N_numberofconv = self.global_param[0] N_convcells = self.global_param[1] B = self.global_param[2] action_space = self.global_param[3] no_channels_start = self.global_param[4] childname = self.childname convcell = self.convcell list_trainable_weights = [] list_concat = [] x_1 = tf.nn.conv2d(input_data, GLOBAL_WEIGHTS[-1][0], [1, 1, 1, 1], "SAME", name=childname + '_first_conv') x_1 = tf.layers.batch_normalization(x_1, training=bl_training, name=childname + '_first_batch') x_1 = tf.nn.relu(x_1, name=childname + '_first_relu') x_2 = x_1 list_trainable_weights.append(GLOBAL_WEIGHTS[-1][0]) for i in range(N_convcells): with tf.variable_scope('cell_' + str(i)): final_out, list_trainable_weights, x_1_used, x_2_used = self._apply_convcell( convcell, [x_1, x_2], 1, i, list_trainable_weights, childname, bl_training, GLOBAL_WEIGHTS) log_to_textfile(self.path + 'log.txt', str(x_1_used) + '\n') log_to_textfile(self.path + 'log.txt', str(x_2_used) + '\n') if not (x_1_used): list_concat.append(x_1) if not (x_2_used): list_concat.append(x_2) x_2 = x_1 x_1 = final_out list_concat.append(final_out) log_to_textfile(self.path + 'log.txt', str(list_concat) + '\n') if len(list_concat) > 1: x = tf.concat(list_concat, axis=-1) else: x = final_out x = tf.reduce_mean(x, axis=[1, 2]) log_to_textfile(self.path + 'log.txt', str(x) + '\n') x = tf.layers.dense(x, 10, kernel_regularizer=self.regularizer) x_softmax = tf.nn.softmax(x) y = tf.one_hot(tf.cast(y_output, tf.int32), self.num_classes) l2_loss = tf.losses.get_regularization_loss() loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=x, labels=y)) loss += l2_loss update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) list_trainable_weights += [ x for x in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) if 'dense' in str(x.name) ] self.loss = loss self.input_data = input_data self.y_output = y_output self.x = x self.bl_training = bl_training self.x_softmax = x_softmax self.learning_rate = learning_rate self.list_trainable_weights = list_trainable_weights
def search(self, state, ChildModel, sess, images, labels, child_output, batch_size, global_ops, global_param, childname, variables_not_initialize, GLOBAL_WEIGHTS, max_noimprovements, max_iteration, lr_iteration_step, max_epochs, no_global_variables, lstm_perc, lstm_epoch, lstm_model, use_uniform, alphax_version, debug_no_trainig): """ Function search Executes a action search for a given state based on MCTS and updates all values Args: state (tuple): starting state for MCTS search ChildModel (class ChildModel): class of ChildModel used for training a full state sess (tensorflow session): Tensorflow session used for training ChildModel images (dict): Images used for training ChildModel labels (dict): Labels used for training ChildModel child_output (str): Child output path batch_size (int): Batchsize for training child global_ops (list): Not used anymore global_param (list): List with some global parameters childname (str): Name of ChildModel variables_not_initialize (list): List of global variables, which should not be initialized GLOBAL_WEIGHTS (dict): Dictonary with shared tensorflow weights max_noimprovements (int): Stop training ChildModel if does not improve over number of epochs max_iteration (int): Number of maximal training steps in ChildModel lr_iteration_step (list): Epochs when learning rate is decayed in ChildModel max_epochs (int): Number of epochs for training ChildModel no_global_variables (boolean): If True, no global variables are trained in ChildModel lstm_perc (int): Percentage of data used for child model lstm_epoch (int): Number of epochs for child model lstm_model (object of class LSTMModel): LSTM model for prediction v, Ps use_uniform (boolean): If True uniform distribution is used for initializing Ps alphax_version (boolean): If True other MCTS formula is used (based on AlphaX paper) debug_no_trainig (boolean): If True no ChildModel are trained and only dummy value is returned Return: v, bl_trained, tmp_s _ (float): Value of requested state s _ (boolean): If True, then a new ChildModel was trained _ (tuple): Selected action """ s = state if len(s) // 5 == self.B: # Full state sequence is discovered if s not in self.Es: # If s hasn't been discovered yet - train a ChildModel convcell = s_to_convcell(s, self.B, self.action_space, self.combine_op) createPath(child_output) log_to_textfile( self.filename, 'MCTS State: ' + str(s) + ' MCTS cell: ' + str(convcell) + '\n') if debug_no_trainig: return (0.2, True, s) model = ChildModel(sess, images, labels, child_output, batch_size, convcell, global_ops, global_param, childname, variables_not_initialize, self.no_channels_start) model.build_model(GLOBAL_WEIGHTS) model.couch_train(images, labels, max_noimprovements, max_iteration, lr_iteration_step, max_epochs, no_global_variables) acc = model.predict_validation(images['valid'], labels['valid'], 0, initialize_new=False) self.Es[s] = acc return acc, True, s if self.Es[s] != 0: # s was already discovered return self.Es[s], False, s if s not in self.Ps: # Initialize Policy by neural network ps, v = lstm_model.pred_action_sequence([s], [lstm_perc], [lstm_epoch]) ps = ps[0] v = v[0] # Use uniform distribution instead of LSTM if use_uniform: ps = np.asarray(ps) ps = np.ones_like(ps) self.Ps[s] = ps # Only valid actions valids = legal_action_from_seq(s, self.B, self.num_ops, self.num_combine, self.action_size) valids_hotn = np.sum(one_hot(np.asarray(valids), self.action_size), axis=0) self.Ps[s] = self.Ps[s] * valids_hotn sum_Ps_s = np.sum(self.Ps[s]) if sum_Ps_s > 0: # Normalize it again self.Ps[s] /= sum_Ps_s else: print("All valid moves were masked, do workaround.") self.Vs[s] = valids_hotn self.Ns[s] = 0 return v, False, s valids = self.Vs[s] cur_best = -float('inf') best_act = -1 # Select the action with the highest upper confidence bound based on MCTS formula for a in range(self.action_size): if valids[a]: if (s, a) in self.Qsa: if alphax_version: # Use AlphaX rule u = self.Qsa[(s, a)] / self.Nsa[ (s, a)] + 2 * 200 * math.sqrt( 2 * math.log10(self.Ns[s]) / (1 + self.Nsa[(s, a)])) else: # Use AlphaGo rule u = self.Qsa[(s, a)] + 5 * self.Ps[s][a] * math.sqrt( self.Ns[s]) / (1 + self.Nsa[(s, a)]) else: if alphax_version: # Use AlphaX rule u = 0 / self.Nsa[(s, a)] + 2 * 200 * math.sqrt( 2 * math.log10(self.Ns[s]) / (1 + self.Nsa[(s, a)])) else: # Use AlphaGo rule u = 5 * self.Ps[s][a] * math.sqrt( self.Ns[s] + EPS) # Q = 0 ? if u > cur_best: # Keep best action cur_best = u best_act = a a = best_act next_s = s + (a, ) # Find value for best action a in state s v, bl_trained, tmp_s = self.search( next_s, ChildModel, sess, images, labels, child_output, batch_size, global_ops, global_param, childname, variables_not_initialize, GLOBAL_WEIGHTS, max_noimprovements, max_iteration, lr_iteration_step, max_epochs, no_global_variables, lstm_perc, lstm_epoch, lstm_model, use_uniform, alphax_version, debug_no_trainig) # Update Q value of state action pair (s,a) if (s, a) in self.Qsa: if alphax_version: self.Qsa[(s, a)] = self.Qsa[(s, a)] + v else: self.Qsa[(s, a)] = (self.Nsa[(s, a)] * self.Qsa[(s, a)] + v) / (self.Nsa[(s, a)] + 1) self.Nsa[(s, a)] += 1 else: self.Qsa[(s, a)] = v self.Nsa[(s, a)] = 1 self.Ns[s] += 1 return v, bl_trained, tmp_s
def train(self): """ Function train Trains the Coach, MCTS and ChildModels Args: """ lstm_perc = 0.05 lstm_epoch = 2 unique_idx = 0 total_results = [] no_mcts = 0 # First initialization of MCTS tree mcts = MCTS(self.num_actions, self.B, len(self.action_space.keys()), len(self.combine_op.keys()), self.action_space, self.combine_op, self.path + '/Coach/' + 'logs.txt', self.no_channels_start) # Iteration of searches for i in range(self.num_learning_iteration): print(i) use_uniform = False if self.no_for_uniform > i: use_uniform = True #if i % (int(self.num_learning_iteration / len(range(self.N_convcells_range[1]-self.N_convcells_range[0])))) == 0: #self.current_nconv = min(self.current_nconv+1,self.N_convcells_range[1]) self.global_param[1] = self.current_nconv #lstm_perc = random.uniform(self.search_perc_range[0], self.search_perc_range[1]) #lstm_epoch = random.randint(self.search_epoch_range[0], self.search_epoch_range[1]) lstm_perc = self.search_perc_range[0] lstm_epoch = self.search_epoch_range[0] tmp_result = [] # Only use X percent for training ChildModels N = self.images_total['train'].shape[0] idx = np.asarray(range(N)) np.random.shuffle(idx) images = self.images_total.copy() labels = self.labels_total.copy() images['train'] = self.images_total['train'][ idx[0:int(lstm_perc * N)], :, :, :].copy() labels['train'] = self.labels_total['train'][idx[0:int(lstm_perc * N)]].copy() # Logging to file log_to_textfile( self.path + '/Coach/' + 'logs.txt', '###################################### New Search ###################################### \n' ) log_to_textfile( self.path + '/Coach/' + 'logs.txt', 'Trained final: ' + str(self.no_trained_final) + ' Trained in search:' + str(self.no_trained_in_search) + ' \n') log_to_textfile( self.path + '/Coach/' + 'logs.txt', 'LSTM percentage: ' + str(lstm_perc) + ' LSTM Epoch:' + str(lstm_epoch) + ' Size:' + str(images['train'].shape) + ' \n') log_to_textfile(self.path + '/Coach/' + 'logs.txt', 'NConvcell: ' + str(self.current_nconv) + ' \n') if (i > 0) and (i % self.new_mcts_every_i == 0) and not (self.alphax_version): # Reinitialize MCTS tree after new_mcts_every_i iterations log_to_textfile(self.path + '/Coach/' + 'logs.txt', 'New MCTS \n') mcts = MCTS(self.num_actions, self.B, len(self.action_space.keys()), len(self.combine_op.keys()), self.action_space, self.combine_op, self.path + '/Coach/' + 'logs.txt', self.no_channels_start) no_mcts = no_mcts + 1 # Logging to dictionary tmp123_result = {} tmp123_result['i'] = i tmp123_result['no_trained_final'] = self.no_trained_final tmp123_result['no_trained_search'] = self.no_trained_in_search tmp123_result['lstm_perc'] = lstm_perc tmp123_result['lstm_epoch'] = lstm_epoch tmp123_result['nconvcell'] = self.current_nconv tmp123_result['no_mcts'] = no_mcts # Initial state s = ( 0, 0, ) examples = [] # Logging to file # Logging to dictionary tmp123_result['in_search'] = [] log_to_textfile(self.path + '/Coach/' + 'logs.txt', 'Use uniform: ' + str(use_uniform) + '\n') # After MCTS is initialized, it requires the first time two expansaions tmp_num_expansions = self.num_expansions if (i == 0) and (self.num_expansions == 1): tmp_num_expansions = 2 # Running search until full architecture is found while (len(s) // 5 != self.B): tmp_tmp_result = {} tmp_tmp_result['s'] = s # number of expansion for next move for j in range(tmp_num_expansions): v, bl_trained, tmp_s = mcts.search( s, ChildModel, self.sess, self.images, self.labels, self.path + '/Children/Insearchtrain/anychild_' + str(i) + '_' + str(j) + '_' + str(unique_idx) + '/', 64, self.global_ops, self.global_param, 'anychild_' + str(i) + '_' + str(j) + '_' + str(unique_idx), self.variables_not_initialize, self.GLOBAL_WEIGHTS, self.max_noimprovements, self.max_iteration, self.lr_iteration_step, 1, False, lstm_perc, lstm_epoch, self.lstm_model, use_uniform, self.alphax_version, self.debug_no_trainig) if bl_trained: # If a ChildModel was trained in mcts.search unique_idx = unique_idx + 1 log_to_textfile( self.path + '/Coach/' + 'logs.txt', 'In search iteration: ' + str(j) + ' v value: ' + str(v) + ' for ' + str(s) + ' \n') convcell = s_to_convcell(tmp_s, self.B, self.action_space, self.combine_op) log_to_textfile( self.path + '/Coach/' + 'logs.txt', 'Full state: ' + str(tmp_s) + ' convcell: ' + str(convcell) + ' \n') if not (self.debug_no_trainig): self.loader.save( self.sess, self.path + '/Coach/global_weights/global_weight') self._reset_graph(False) self.no_trained_in_search = self.no_trained_in_search + 1 # Get next action based on MCTS probability probs = mcts.get_prob(s) # Keep track in replay history examples.append([s, probs, None, lstm_perc, lstm_epoch]) action = np.random.choice(len(probs), p=probs) # Logging to dictionary tmp_tmp_nsa = {} for nsa_key in mcts.Nsa.keys(): if nsa_key[0] == s: tmp_tmp_nsa[nsa_key] = mcts.Nsa[nsa_key] tmp_tmp_result['Nsa'] = tmp_tmp_nsa tmp_tmp_result['Ns'] = mcts.Ns[s] tmp_tmp_result['Ps'] = mcts.Ps[s] tmp_tmp_result['probs'] = probs tmp_tmp_result['action'] = action tmp123_result['in_search'].append(tmp_tmp_result) # Combine state with selected action s = s + (action, ) tmp123_result['use_uniform'] = use_uniform pickle.dump( mcts, open(self.path + '/Coach/' + 'mcts' + str(no_mcts) + '.pickle', "wb")) log_to_textfile(self.path + '/Coach/' + 'logs.txt', 'Final action sequence: ' + str(s) + ' \n') convcell = s_to_convcell(s, self.B, self.action_space, self.combine_op) log_to_textfile(self.path + '/Coach/' + 'logs.txt', 'Final convcell: ' + str(convcell) + ' \n') createPath(self.path + '/Children/Finaltrain/finalchild_' + str(i)) tmp123_result['final_seq'] = s tmp123_result['final_convcell'] = convcell # Train selected architecture for more epoch and report the accuracy if not (self.debug_no_trainig): model = ChildModel( self.sess, self.images, self.labels, self.path + '/Children/Finaltrain/finalchild_' + str(i) + '/', 64, convcell, self.global_ops, self.global_param, 'finalchild_' + str(i), self.variables_not_initialize, self.no_channels_start) model.build_model(self.GLOBAL_WEIGHTS) model.couch_train(self.images, self.labels, self.max_noimprovements, self.max_iteration, self.lr_iteration_step, lstm_epoch, self.no_global_variables) acc = model.predict_validation(self.images['valid'], self.labels['valid'], 0, initialize_new=False) else: acc = 0.2 log_to_textfile( self.path + '/Coach/' + 'logs.txt', 'Final training validation accuracy: ' + str(acc) + ' \n') # Add the accuracy to the temporary replay history for e in examples: e[2] = acc self.no_trained_final = self.no_trained_final + 1 self.loader.save(self.sess, self.path + '/Coach/global_weights/global_weight') self._reset_graph(False) tmp123_result['final_convcell_acc'] = acc tmp123_result['total_before_total_loss'] = 0 tmp123_result['total_before_loss_value'] = 0 tmp123_result['total_before_loss_prob'] = 0 if i > 0: # Add temporary replay buffer to total replay buffer # Train LSTM while len(self.full_examples) > self.max_replay_size: self.full_examples.pop(0) self.full_examples = self.full_examples + examples total_before_total_loss = [] total_before_loss_value = [] total_before_loss_prob = [] for k in range((len(self.full_examples) // self.lstm_batchsize) + 1): before_total_loss, before_loss_value, before_loss_prob = self.lstm_model.train( self.full_examples, self.lstm_batchsize, self.lstm_learning_rate, self.path + '/Coach/' + 'logs.txt') total_before_total_loss.append(before_total_loss) total_before_loss_value.append(before_loss_value) total_before_loss_prob.append(before_loss_prob) tmp123_result['total_before_total_loss'] = np.mean( total_before_total_loss) tmp123_result['total_before_loss_value'] = np.mean( total_before_loss_value) tmp123_result['total_before_loss_prob'] = np.mean( total_before_loss_prob) # Save dictory log files to disc total_results.append(tmp123_result) pickle.dump( self.full_examples, open(self.path + '/Coach/' + 'full_examples.pickle', "wb")) pickle.dump( total_results, open(self.path + '/Coach/' + 'total_results.pickle', "wb")) self.mcts = mcts