def predict_validation(self, images, labels, phase=0, initialize_new=True):
        """
        Function predict_validation

        Predict the data in batches as it could be too big to fit in the GPU memory, at once.

        Args:
            images (np array): Images to predict
            labels (np array): Labels for prediction
            phase (int): Input for bl_training (training/testing phase)
            initialize_new (boolean): Initialize not shared variables new
        
        Attributes:
            acc (float): Accuracy of prediction
        """
        sess = self.sess
        loss = self.loss
        input_data = self.input_data
        y_output = self.y_output
        x = self.x
        bl_training = self.bl_training
        x_softmax = self.x_softmax
        batch_size = self.batch_size
        path = self.path

        x_images = images
        y_label = labels
        y_label = y_label.reshape((-1, 1))

        iteration = 0
        iteration_total = []
        val_acc_total = []
        test_acc_total = []
        total_time_total = []
        result = {}

        if initialize_new:
            variables_to_initialize = [
                x for x in tf.global_variables() if not (
                    x.name in [y.name for y in self.variables_not_initialize])
            ]
            sess.run(tf.variables_initializer(variables_to_initialize))

        y_final, x_pred_final, loss_final = self.predict_batch(
            sess, [loss, x_softmax], input_data, y_output, bl_training,
            x_images, y_label, batch_size, phase)

        y_final = np.asarray(y_final)
        x_pred_final = np.asarray(x_pred_final)
        acc = np.mean(
            x_pred_final.reshape((-1, 1)) == y_final.reshape((-1, 1)))
        loss_final = np.mean(loss_final)
        log_to_textfile(
            self.path + 'log.txt', 'Validation loss: ' + str(loss_final) +
            'Validation acc: ' + str(acc) + '\n')
        return (acc)
Exemple #2
0
    def train(self, full_examples, batch_size, learning_rate, coach_filename):
        """
        Function train

        Train the LSTM model (one training step)

        Args:
            full_examples (list): Replay history
            batch_size (int): Batch size for LSTM to train
            learning_rate (float): Learning rate for LSTM model 
            coach_filename (str): Filename of Coach model to log results
            
        Return:
            before_total_loss (float): Total loss for the current batch
            before_loss_value  (float): Loss for state value for the current batch
            before_loss_prob  (float): Loss for the probability distribution for the current batch
        """
        idx = np.asarray(range(len(full_examples)))
        np.random.shuffle(idx)
        idx = idx[range(min(batch_size, idx.shape[0]))]
        states = []
        probs = []
        acc = []
        perc = []
        epoch = []
        for i in idx:
            states.append(full_examples[i][0])
            probs.append(full_examples[i][1])
            acc.append(full_examples[i][2])
            perc.append(full_examples[i][3])
            epoch.append(full_examples[i][4])
        
        acc = np.asarray(acc).reshape(-1,1)
        x_batch, tmp_batch_size, seq_len = self.prepare_action_sequence(states, perc, epoch)
        before_total_loss, before_loss_value, before_loss_prob  = self.sess.run([self.total_loss, 
                                                                                 self.loss_value, 
                                                                                 self.loss_prob], feed_dict={self.x: x_batch, 
                                                                                                             self.target_pis: probs, 
                                                                                                             self.target_v: acc,
                                                                                                             self.seq_len: seq_len,
                                                                                                             self.batch_size: tmp_batch_size,
                                                                                                             self.learning_rate: learning_rate})
        
        _, total_loss, loss_value, loss_prob  = self.sess.run([self.train_step, 
                                                               self.total_loss, 
                                                               self.loss_value, 
                                                               self.loss_prob], feed_dict={self.x: x_batch, 
                                                                                           self.target_pis: probs, 
                                                                                           self.target_v: acc,
                                                                                           self.seq_len: seq_len,
                                                                                           self.batch_size: tmp_batch_size,
                                                                                           self.learning_rate: learning_rate
                                                                                          })
        log_to_textfile(self.path + 'logs.txt', 'B LSTM Total loss: ' + str(before_total_loss) + ' B Loss prob: ' + str(before_loss_prob) + ' B Loss value: ' + str(before_loss_value) + '\n')
        log_to_textfile(self.path + 'logs.txt', 'LSTM Total loss: ' + str(total_loss) + ' Loss prob: ' + str(loss_prob) + ' Loss value: ' + str(loss_value) + '\n')
        log_to_textfile(coach_filename, 'B LSTM Total loss: ' + str(before_total_loss) + ' B Loss prob: ' + str(before_loss_prob) + ' B Loss value: ' + str(before_loss_value) + '\n')
        log_to_textfile(coach_filename, 'LSTM Total loss: ' + str(total_loss) + ' Loss prob: ' + str(loss_prob) + ' Loss value: ' + str(loss_value) + '\n')
        return(before_total_loss, before_loss_value, before_loss_prob)
    def _apply_convcell(self, convcell, prev_cells, idx_nomberofconv,
                        idx_convcell, list_trainable_weights, childname,
                        bl_training, GLOBAL_WEIGHTS):
        """
        Function _apply_convcell

        Applies the convcell to the neural network

        Args:
            convcell (list): A list of operation for the convolutional cell
            prev_cells (list): A list of output tensor of previous convolutional cells, which can be used as input
            idx_nomberofconv (int): Index of current convolutional cell batch (Not used anymore) 
            idx_convcell (int): Index of current convolutional cell
            list_trainable_weights (list): List of trainable weights
            childname (str): The ChildModel name
            bl_training (tf placeholder): Defines training/testing phase  
            GLOBAL_WEIGHTS (dict): Dictonary with shared tensorflow weights
        
        Attributes:
            final_out (tensorflow tensor): Output tensor
            list_trainable_weights (list): List of trainable weights
            bl_x1_used (boolean): If previous cell t-1 was used as input, then true 
            bl_x2_used (boolean): If previous cell t-2 was used as input, then true 
        """
        not_used_prev_cells = [x + 2 for x in range(len(convcell))]
        bl_x1_used = False
        bl_x2_used = False
        print('-' * 10)
        print(idx_nomberofconv, idx_convcell)
        idx = 0
        for convconfig in convcell:
            with tf.variable_scope('incell_' + str(idx)):
                idx += 1
                log_to_textfile(self.path + 'log.txt', str(convconfig) + '\n')
                log_to_textfile(self.path + 'log.txt',
                                str(not_used_prev_cells) + '\n')
                if convconfig[0] == 0:
                    bl_x1_used = True
                if convconfig[1] == 1:
                    bl_x2_used = True
                with tf.variable_scope('incell_block1'):
                    if 'conv_' in convconfig[2]:
                        if prev_cells[convconfig[0]].get_shape(
                        )[3].value == self.no_channels_start:
                            h_1 = tf.nn.conv2d(
                                prev_cells[convconfig[0]],
                                GLOBAL_WEIGHTS[0][1][0][convconfig[2]][0],
                                [1, 1, 1, 1],
                                "SAME",
                                data_format="NHWC")
                        if prev_cells[convconfig[0]].get_shape(
                        )[3].value == 2 * self.no_channels_start:
                            h_1 = tf.nn.conv2d(
                                prev_cells[convconfig[0]],
                                GLOBAL_WEIGHTS[0][1][0][convconfig[2]][1],
                                [1, 1, 1, 1],
                                "SAME",
                                data_format="NHWC")
                        h_1 = tf.layers.batch_normalization(
                            h_1, training=bl_training)
                        h_1 = tf.nn.relu(h_1)
                    if 'convsep_' in convconfig[2]:
                        if prev_cells[convconfig[0]].get_shape(
                        )[3].value == self.no_channels_start:
                            h_1 = tf.nn.separable_conv2d(
                                prev_cells[convconfig[0]],
                                GLOBAL_WEIGHTS[0][1][0][convconfig[2]][0],
                                GLOBAL_WEIGHTS[0][1][0][convconfig[2]][1],
                                [1, 1, 1, 1],
                                "SAME",
                                data_format="NHWC")
                        if prev_cells[convconfig[0]].get_shape(
                        )[3].value == 2 * self.no_channels_start:
                            h_1 = tf.nn.separable_conv2d(
                                prev_cells[convconfig[0]],
                                GLOBAL_WEIGHTS[0][1][0][convconfig[2]][2],
                                GLOBAL_WEIGHTS[0][1][0][convconfig[2]][3],
                                [1, 1, 1, 1],
                                "SAME",
                                data_format="NHWC")
                        h_1 = tf.layers.batch_normalization(
                            h_1, training=bl_training)
                        h_1 = tf.nn.relu(h_1)
                    if 'id_' in convconfig[2]:
                        h_1 = prev_cells[convconfig[0]]
                    if 'maxpool_3x3' == convconfig[2]:
                        h_1 = tf.layers.max_pooling2d(
                            prev_cells[convconfig[0]],
                            pool_size=(3, 3),
                            strides=(1, 1),
                            padding='same')
                    if 'maxpool_5x5' == convconfig[2]:
                        h_1 = tf.layers.max_pooling2d(
                            prev_cells[convconfig[0]],
                            pool_size=(5, 5),
                            strides=(1, 1),
                            padding='same')
                    if 'maxpool_7x7' == convconfig[2]:
                        h_1 = tf.layers.max_pooling2d(
                            prev_cells[convconfig[0]],
                            pool_size=(7, 7),
                            strides=(1, 1),
                            padding='same')
                    if 'avgpool_3x3' == convconfig[2]:
                        h_1 = tf.layers.average_pooling2d(
                            prev_cells[convconfig[0]],
                            pool_size=(3, 3),
                            strides=(1, 1),
                            padding='same')
                    if 'avgpool_5x5' == convconfig[2]:
                        h_1 = tf.layers.average_pooling2d(
                            prev_cells[convconfig[0]],
                            pool_size=(5, 5),
                            strides=(1, 1),
                            padding='same')
                    if 'avgpool_7x7' == convconfig[2]:
                        h_1 = tf.layers.average_pooling2d(
                            prev_cells[convconfig[0]],
                            pool_size=(7, 7),
                            strides=(1, 1),
                            padding='same')
                with tf.variable_scope('incell_block2'):
                    if 'convsep_' in convconfig[3]:
                        if prev_cells[convconfig[1]].get_shape(
                        )[3].value == self.no_channels_start:
                            h_2 = tf.nn.separable_conv2d(
                                prev_cells[convconfig[1]],
                                GLOBAL_WEIGHTS[0][1][0][convconfig[3]][0],
                                GLOBAL_WEIGHTS[0][1][0][convconfig[3]][1],
                                [1, 1, 1, 1],
                                "SAME",
                                data_format="NHWC")
                        if prev_cells[convconfig[1]].get_shape(
                        )[3].value == 2 * self.no_channels_start:
                            h_2 = tf.nn.separable_conv2d(
                                prev_cells[convconfig[1]],
                                GLOBAL_WEIGHTS[0][1][0][convconfig[3]][2],
                                GLOBAL_WEIGHTS[0][1][0][convconfig[3]][3],
                                [1, 1, 1, 1],
                                "SAME",
                                data_format="NHWC")
                        h_2 = tf.layers.batch_normalization(
                            h_2,
                            training=bl_training,
                            name=str(idx_convcell) + '_' + str(idx) +
                            '_batch2' + childname)
                        h_2 = tf.nn.relu(h_2)
                    if 'conv_' in convconfig[3]:
                        if prev_cells[convconfig[1]].get_shape(
                        )[3].value == self.no_channels_start:
                            h_2 = tf.nn.conv2d(
                                prev_cells[convconfig[1]],
                                GLOBAL_WEIGHTS[0][1][0][convconfig[3]][0],
                                [1, 1, 1, 1],
                                "SAME",
                                data_format="NHWC")
                        if prev_cells[convconfig[1]].get_shape(
                        )[3].value == 2 * self.no_channels_start:
                            h_2 = tf.nn.conv2d(
                                prev_cells[convconfig[1]],
                                GLOBAL_WEIGHTS[0][1][0][convconfig[3]][1],
                                [1, 1, 1, 1],
                                "SAME",
                                data_format="NHWC")
                        h_2 = tf.layers.batch_normalization(
                            h_2, training=bl_training)
                        h_2 = tf.nn.relu(h_2)
                    if 'id_' in convconfig[3]:
                        h_2 = prev_cells[convconfig[1]]
                    if 'maxpool_3x3' == convconfig[3]:
                        h_2 = tf.layers.max_pooling2d(
                            prev_cells[convconfig[1]],
                            pool_size=(3, 3),
                            strides=(1, 1),
                            padding='same')
                    if 'maxpool_5x5' == convconfig[3]:
                        h_2 = tf.layers.max_pooling2d(
                            prev_cells[convconfig[1]],
                            pool_size=(5, 5),
                            strides=(1, 1),
                            padding='same')
                    if 'maxpool_7x7' == convconfig[3]:
                        h_2 = tf.layers.max_pooling2d(
                            prev_cells[convconfig[1]],
                            pool_size=(7, 7),
                            strides=(1, 1),
                            padding='same')
                    if 'avgpool_3x3' == convconfig[3]:
                        h_2 = tf.layers.average_pooling2d(
                            prev_cells[convconfig[1]],
                            pool_size=(3, 3),
                            strides=(1, 1),
                            padding='same')
                    if 'avgpool_5x5' == convconfig[3]:
                        h_2 = tf.layers.average_pooling2d(
                            prev_cells[convconfig[1]],
                            pool_size=(5, 5),
                            strides=(1, 1),
                            padding='same')
                    if 'avgpool_7x7' == convconfig[3]:
                        h_2 = tf.layers.average_pooling2d(
                            prev_cells[convconfig[1]],
                            pool_size=(7, 7),
                            strides=(1, 1),
                            padding='same')
                if convconfig[4] == 'add':
                    if h_1.get_shape()[3].value != self.no_channels_start:
                        w = get_weights(
                            childname + '_' + str('tmpid1_') +
                            str(idx_nomberofconv) + '_' + str(idx_convcell), [
                                1, 1,
                                h_1.get_shape()[3].value,
                                self.no_channels_start
                            ])
                        list_trainable_weights.append(w)
                        h_1 = tf.nn.conv2d(h_1, w, [1, 1, 1, 1], "SAME")
                        h_1 = tf.layers.batch_normalization(
                            h_1, training=bl_training)
                        h_1 = tf.nn.relu(h_1)
                    if h_2.get_shape()[3].value != self.no_channels_start:
                        w = get_weights(
                            childname + '_' + str('tmpid2_') +
                            str(idx_nomberofconv) + '_' + str(idx_convcell), [
                                1, 1,
                                h_2.get_shape()[3].value,
                                self.no_channels_start
                            ])
                        list_trainable_weights.append(w)
                        h_2 = tf.nn.conv2d(h_2, w, [1, 1, 1, 1], "SAME")
                        h_2 = tf.layers.batch_normalization(
                            h_2, training=bl_training)
                        h_2 = tf.nn.relu(h_2)
                    h_out = tf.add(h_1, h_2)
                elif convconfig[4] == 'concat':
                    h_out = tf.concat([h_1, h_2], axis=-1)
                else:
                    print('Error')
                prev_cells.append(h_out)
                if not ('id_' in convconfig[2]) and not ('pool'
                                                         in convconfig[2]):
                    list_trainable_weights.append(
                        GLOBAL_WEIGHTS[0][1][0][convconfig[2]])
                if not ('id_' in convconfig[3]) and not ('pool'
                                                         in convconfig[3]):
                    list_trainable_weights.append(
                        GLOBAL_WEIGHTS[0][1][0][convconfig[3]])
                if convconfig[0] in not_used_prev_cells:
                    not_used_prev_cells.remove(convconfig[0])
                if convconfig[1] in not_used_prev_cells:
                    not_used_prev_cells.remove(convconfig[1])

        with tf.variable_scope('celloutput_' + str(idx_convcell)):
            final_out = tf.concat([prev_cells[i] for i in not_used_prev_cells],
                                  axis=-1)
            shp_in = final_out.get_shape()[3].value
            w = get_weights(
                childname + '_' + str('final_') + str(idx_nomberofconv) + '_' +
                str(idx_convcell), [1, 1, shp_in, self.global_param[4]])
            list_trainable_weights.append(w)
            final_out = tf.nn.conv2d(final_out, w, [1, 1, 1, 1], "SAME")
            final_out = tf.layers.batch_normalization(final_out,
                                                      training=bl_training)
            final_out = tf.nn.relu(final_out)
        return (final_out, list_trainable_weights, bl_x1_used, bl_x2_used)
    def couch_train(self,
                    images,
                    labels,
                    max_noimprovements,
                    max_iteration,
                    lr_iteration_step,
                    max_epochs,
                    no_global_variables=False,
                    safe_model=False):
        """
        Function couch_train

        Predict the data in batches as it could be too big to fit in the GPU memory, at once.

        Args:
            images (dict): Images used for training ChildModel
            labels (dict): Labels used for training ChildModel 
            max_noimprovements (int): Stop training ChildModel if does not improve over number of epochs
            max_iteration (int): Number of maximal training steps in ChildModel
            lr_iteration_step (list): Epochs when learning rate is decayed in ChildModel
            max_epochs (int): Number of epochs for training ChildModel
            no_global_variables (boolean): If True, no global variables are trained in ChildModel
            safe_model (boolean): If True, then ChildModel weights are safed
        """
        # Initialize / store Child attributes in local variables
        sess = self.sess
        loss = self.loss
        input_data = self.input_data
        y_output = self.y_output
        x = self.x
        bl_training = self.bl_training
        x_softmax = self.x_softmax
        batch_size = self.batch_size
        lr = 0.1
        learning_rate = self.learning_rate
        path = self.path
        list_trainable_weights = self.list_trainable_weights

        createPath(path)
        createPath(path + '/model')
        x_train = images['train']
        y_train = labels['train']
        x_val = images['valid']
        y_val = labels['valid']
        y_train = y_train.reshape((-1, 1))
        y_val = y_val.reshape((-1, 1))

        # Creating some variables to store results
        iteration = 0
        iteration_total = []
        val_acc_total = []
        total_time_total = []
        result = {}

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        # Reinitialize child specific variables
        if no_global_variables:
            list_trainable_weights = [
                x for x in list_trainable_weights if not (
                    x.name in [y.name for y in self.variables_not_initialize])
            ]

        # Create optimnizer
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            optimizer = tf.train.MomentumOptimizer(
                learning_rate, 0.9,
                use_nesterov=True).minimize(loss,
                                            var_list=list_trainable_weights)

        variables_to_initialize = [
            x for x in tf.global_variables()
            if not (x.name in [y.name for y in self.variables_not_initialize])
        ]

        best_loss = 999
        best_acc = 0
        best_epoch = 0
        bl_break = False

        saver = tf.train.Saver()
        # Initialize only new variables (not shared)
        sess.run(tf.variables_initializer(variables_to_initialize))
        createPath(self.path + "/log/")
        createPath(self.path + "/model/")
        # Create writer for tensorboard
        writer = tf.summary.FileWriter(
            self.path + "/log/{}".format(self.childname), sess.graph)
        idx = np.asarray(range(x_train.shape[0]))
        counter_run_noimprovement = 0
        # Epoch iterations
        for e in range(max_epochs):
            if bl_break:
                break
            start = timer()
            no_batches = idx.shape[0] // batch_size + 1
            np.random.shuffle(idx)
            # One training loop
            for batch in range(no_batches - 1):
                x_train_batch = x_train[idx[(batch * batch_size):(
                    min((1 + batch) * batch_size, x_train.shape[0]))]]
                x_train_batch = random_crop_and_flip(x_train_batch)
                y_train_batch = y_train[idx[(batch * batch_size):(
                    min((1 + batch) * batch_size, x_train.shape[0]))]]
                o_loss, o_optimizer, o_x = sess.run(
                    [loss, optimizer, x],
                    feed_dict={
                        input_data: x_train_batch,
                        y_output: y_train_batch,
                        bl_training: 1,
                        learning_rate: lr
                    })
                if iteration == max_iteration:
                    bl_break = True

            iteration += 1
            # Learning rate decay
            if iteration in lr_iteration_step:
                lr = 0.1 * lr
                log_to_textfile(self.path + 'log.txt',
                                'New learning rate: ' + str(lr) + '\n')

            # Predict validation set
            y_val_final, x_val_pred_final, val_o_loss_final = self.predict_batch(
                sess, [loss, x_softmax], input_data, y_output, bl_training,
                x_val, y_val, batch_size)

            y_val_final = np.asarray(y_val_final)
            x_val_pred_final = np.asarray(x_val_pred_final)
            val_acc = np.mean(
                x_val_pred_final.reshape((-1, 1)) == y_val_final.reshape((-1,
                                                                          1)))
            val_o_loss_mean = np.mean(val_o_loss_final)
            # Save best model
            if best_acc < val_acc:
                log_to_textfile(self.path + 'log.txt',
                                'Safe best model' + '\n')
                best_acc = val_acc
                best_epoch = e
                counter_run_noimprovement = 0
                if safe_model:
                    saver.save(sess,
                               self.path + 'model/{}'.format(self.childname))
            else:
                counter_run_noimprovement = counter_run_noimprovement + 1

            end = timer()
            total_time = end - start
            log_to_textfile(
                self.path + 'log.txt', 'Time: ' + str(total_time) +
                ' Epoch: ' + str(e) + ' Iteration: ' + str(iteration) +
                ' No Improv: ' + str(counter_run_noimprovement) +
                ' Val Loss: ' + str(val_o_loss_mean) + ' Best Acc: ' +
                str(best_acc) + ' Val Acc: ' + str(val_acc) + '\n')
            # Break training if the model does not improve over certain number of epochs
            if counter_run_noimprovement > max_noimprovements:
                bl_break = True
    def build_model(self, GLOBAL_WEIGHTS):
        """
        Function build_model

        Build the ChildModel model

        Args:
            GLOBAL_WEIGHTS (dict): Dictonary with shared tensorflow weights
        """
        input_data = tf.placeholder(shape=[None, 32, 32, 3],
                                    dtype=tf.float32,
                                    name='x_input')
        y_output = tf.placeholder(shape=[None, 1],
                                  dtype=tf.float32,
                                  name='y_output')
        bl_training = tf.placeholder(tf.bool, name='training')
        learning_rate = tf.placeholder(tf.float32, shape=[])

        N_numberofconv = self.global_param[0]
        N_convcells = self.global_param[1]
        B = self.global_param[2]
        action_space = self.global_param[3]
        no_channels_start = self.global_param[4]

        childname = self.childname
        convcell = self.convcell

        list_trainable_weights = []
        list_concat = []

        x_1 = tf.nn.conv2d(input_data,
                           GLOBAL_WEIGHTS[-1][0], [1, 1, 1, 1],
                           "SAME",
                           name=childname + '_first_conv')
        x_1 = tf.layers.batch_normalization(x_1,
                                            training=bl_training,
                                            name=childname + '_first_batch')
        x_1 = tf.nn.relu(x_1, name=childname + '_first_relu')
        x_2 = x_1
        list_trainable_weights.append(GLOBAL_WEIGHTS[-1][0])

        for i in range(N_convcells):
            with tf.variable_scope('cell_' + str(i)):
                final_out, list_trainable_weights, x_1_used, x_2_used = self._apply_convcell(
                    convcell, [x_1, x_2], 1, i, list_trainable_weights,
                    childname, bl_training, GLOBAL_WEIGHTS)
                log_to_textfile(self.path + 'log.txt', str(x_1_used) + '\n')
                log_to_textfile(self.path + 'log.txt', str(x_2_used) + '\n')
                if not (x_1_used):
                    list_concat.append(x_1)
                if not (x_2_used):
                    list_concat.append(x_2)
                x_2 = x_1
                x_1 = final_out

        list_concat.append(final_out)
        log_to_textfile(self.path + 'log.txt', str(list_concat) + '\n')
        if len(list_concat) > 1:
            x = tf.concat(list_concat, axis=-1)
        else:
            x = final_out
        x = tf.reduce_mean(x, axis=[1, 2])
        log_to_textfile(self.path + 'log.txt', str(x) + '\n')
        x = tf.layers.dense(x, 10, kernel_regularizer=self.regularizer)
        x_softmax = tf.nn.softmax(x)

        y = tf.one_hot(tf.cast(y_output, tf.int32), self.num_classes)
        l2_loss = tf.losses.get_regularization_loss()
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=x, labels=y))
        loss += l2_loss

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        list_trainable_weights += [
            x for x in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            if 'dense' in str(x.name)
        ]

        self.loss = loss
        self.input_data = input_data
        self.y_output = y_output
        self.x = x
        self.bl_training = bl_training
        self.x_softmax = x_softmax
        self.learning_rate = learning_rate
        self.list_trainable_weights = list_trainable_weights
Exemple #6
0
    def search(self, state, ChildModel, sess, images, labels, child_output,
               batch_size, global_ops, global_param, childname,
               variables_not_initialize, GLOBAL_WEIGHTS, max_noimprovements,
               max_iteration, lr_iteration_step, max_epochs,
               no_global_variables, lstm_perc, lstm_epoch, lstm_model,
               use_uniform, alphax_version, debug_no_trainig):
        """
        Function search

        Executes a action search for a given state based on MCTS and updates all values 

        Args:
            state (tuple): starting state for MCTS search
            ChildModel (class ChildModel): class of ChildModel used for training a full state
            sess (tensorflow session): Tensorflow session used for training ChildModel
            images (dict): Images used for training ChildModel
            labels (dict): Labels used for training ChildModel
            child_output (str): Child output path 
            batch_size (int): Batchsize for training child  
            global_ops (list): Not used anymore 
            global_param (list): List with some global parameters
            childname (str): Name of ChildModel
            variables_not_initialize (list): List of global variables, which should not be initialized
            GLOBAL_WEIGHTS (dict): Dictonary with shared tensorflow weights
            max_noimprovements (int): Stop training ChildModel if does not improve over number of epochs
            max_iteration (int): Number of maximal training steps in ChildModel
            lr_iteration_step (list): Epochs when learning rate is decayed in ChildModel
            max_epochs (int): Number of epochs for training ChildModel
            no_global_variables (boolean): If True, no global variables are trained in ChildModel
            lstm_perc (int): Percentage of data used for child model
            lstm_epoch (int): Number of epochs for child model
            lstm_model (object of class LSTMModel): LSTM model for prediction v, Ps
            use_uniform (boolean): If True uniform distribution is used for initializing Ps
            alphax_version (boolean): If True other MCTS formula is used (based on AlphaX paper)
            debug_no_trainig (boolean): If True no ChildModel are trained and only dummy value is returned
        
        Return: v, bl_trained, tmp_s
            _ (float): Value of requested state s
            _ (boolean): If True, then a new ChildModel was trained
            _ (tuple): Selected action
        """
        s = state

        if len(s) // 5 == self.B:
            # Full state sequence is discovered
            if s not in self.Es:
                # If s hasn't been discovered yet - train a ChildModel
                convcell = s_to_convcell(s, self.B, self.action_space,
                                         self.combine_op)
                createPath(child_output)
                log_to_textfile(
                    self.filename, 'MCTS State: ' + str(s) + ' MCTS cell: ' +
                    str(convcell) + '\n')
                if debug_no_trainig:
                    return (0.2, True, s)
                model = ChildModel(sess, images, labels, child_output,
                                   batch_size, convcell, global_ops,
                                   global_param, childname,
                                   variables_not_initialize,
                                   self.no_channels_start)
                model.build_model(GLOBAL_WEIGHTS)
                model.couch_train(images, labels, max_noimprovements,
                                  max_iteration, lr_iteration_step, max_epochs,
                                  no_global_variables)
                acc = model.predict_validation(images['valid'],
                                               labels['valid'],
                                               0,
                                               initialize_new=False)
                self.Es[s] = acc
                return acc, True, s
            if self.Es[s] != 0:
                # s was already discovered
                return self.Es[s], False, s

        if s not in self.Ps:
            # Initialize Policy by neural network
            ps, v = lstm_model.pred_action_sequence([s], [lstm_perc],
                                                    [lstm_epoch])
            ps = ps[0]
            v = v[0]
            # Use uniform distribution instead of LSTM
            if use_uniform:
                ps = np.asarray(ps)
                ps = np.ones_like(ps)
            self.Ps[s] = ps
            # Only valid actions
            valids = legal_action_from_seq(s, self.B, self.num_ops,
                                           self.num_combine, self.action_size)
            valids_hotn = np.sum(one_hot(np.asarray(valids), self.action_size),
                                 axis=0)
            self.Ps[s] = self.Ps[s] * valids_hotn
            sum_Ps_s = np.sum(self.Ps[s])
            if sum_Ps_s > 0:
                # Normalize it again
                self.Ps[s] /= sum_Ps_s
            else:
                print("All valid moves were masked, do workaround.")
            self.Vs[s] = valids_hotn
            self.Ns[s] = 0
            return v, False, s

        valids = self.Vs[s]
        cur_best = -float('inf')
        best_act = -1

        # Select the action with the highest upper confidence bound based on MCTS formula
        for a in range(self.action_size):
            if valids[a]:
                if (s, a) in self.Qsa:
                    if alphax_version:
                        # Use AlphaX rule
                        u = self.Qsa[(s, a)] / self.Nsa[
                            (s, a)] + 2 * 200 * math.sqrt(
                                2 * math.log10(self.Ns[s]) /
                                (1 + self.Nsa[(s, a)]))
                    else:
                        # Use AlphaGo rule
                        u = self.Qsa[(s, a)] + 5 * self.Ps[s][a] * math.sqrt(
                            self.Ns[s]) / (1 + self.Nsa[(s, a)])
                else:
                    if alphax_version:
                        # Use AlphaX rule
                        u = 0 / self.Nsa[(s, a)] + 2 * 200 * math.sqrt(
                            2 * math.log10(self.Ns[s]) /
                            (1 + self.Nsa[(s, a)]))
                    else:
                        # Use AlphaGo rule
                        u = 5 * self.Ps[s][a] * math.sqrt(
                            self.Ns[s] + EPS)  # Q = 0 ?
                if u > cur_best:
                    # Keep best action
                    cur_best = u
                    best_act = a

        a = best_act
        next_s = s + (a, )
        # Find value for best action a in state s
        v, bl_trained, tmp_s = self.search(
            next_s, ChildModel, sess, images, labels, child_output, batch_size,
            global_ops, global_param, childname, variables_not_initialize,
            GLOBAL_WEIGHTS, max_noimprovements, max_iteration,
            lr_iteration_step, max_epochs, no_global_variables, lstm_perc,
            lstm_epoch, lstm_model, use_uniform, alphax_version,
            debug_no_trainig)

        # Update Q value of state action pair (s,a)
        if (s, a) in self.Qsa:
            if alphax_version:
                self.Qsa[(s, a)] = self.Qsa[(s, a)] + v
            else:
                self.Qsa[(s, a)] = (self.Nsa[(s, a)] * self.Qsa[(s, a)] +
                                    v) / (self.Nsa[(s, a)] + 1)
            self.Nsa[(s, a)] += 1

        else:
            self.Qsa[(s, a)] = v
            self.Nsa[(s, a)] = 1

        self.Ns[s] += 1
        return v, bl_trained, tmp_s
    def train(self):
        """
        Function train

        Trains the Coach, MCTS and ChildModels

        Args:
            
        """
        lstm_perc = 0.05
        lstm_epoch = 2
        unique_idx = 0
        total_results = []
        no_mcts = 0
        # First initialization of MCTS tree
        mcts = MCTS(self.num_actions, self.B, len(self.action_space.keys()),
                    len(self.combine_op.keys()), self.action_space,
                    self.combine_op, self.path + '/Coach/' + 'logs.txt',
                    self.no_channels_start)
        # Iteration of searches
        for i in range(self.num_learning_iteration):
            print(i)
            use_uniform = False
            if self.no_for_uniform > i:
                use_uniform = True
            #if i % (int(self.num_learning_iteration / len(range(self.N_convcells_range[1]-self.N_convcells_range[0])))) == 0:
            #self.current_nconv = min(self.current_nconv+1,self.N_convcells_range[1])
            self.global_param[1] = self.current_nconv

            #lstm_perc = random.uniform(self.search_perc_range[0], self.search_perc_range[1])
            #lstm_epoch = random.randint(self.search_epoch_range[0], self.search_epoch_range[1])
            lstm_perc = self.search_perc_range[0]
            lstm_epoch = self.search_epoch_range[0]
            tmp_result = []

            # Only use X percent for training ChildModels
            N = self.images_total['train'].shape[0]
            idx = np.asarray(range(N))
            np.random.shuffle(idx)
            images = self.images_total.copy()
            labels = self.labels_total.copy()
            images['train'] = self.images_total['train'][
                idx[0:int(lstm_perc * N)], :, :, :].copy()
            labels['train'] = self.labels_total['train'][idx[0:int(lstm_perc *
                                                                   N)]].copy()

            # Logging to file
            log_to_textfile(
                self.path + '/Coach/' + 'logs.txt',
                '###################################### New Search ###################################### \n'
            )
            log_to_textfile(
                self.path + '/Coach/' + 'logs.txt',
                'Trained final: ' + str(self.no_trained_final) +
                ' Trained in search:' + str(self.no_trained_in_search) + ' \n')
            log_to_textfile(
                self.path + '/Coach/' + 'logs.txt', 'LSTM percentage: ' +
                str(lstm_perc) + ' LSTM Epoch:' + str(lstm_epoch) + ' Size:' +
                str(images['train'].shape) + ' \n')
            log_to_textfile(self.path + '/Coach/' + 'logs.txt',
                            'NConvcell: ' + str(self.current_nconv) + ' \n')

            if (i > 0) and (i % self.new_mcts_every_i
                            == 0) and not (self.alphax_version):
                # Reinitialize MCTS tree after new_mcts_every_i iterations
                log_to_textfile(self.path + '/Coach/' + 'logs.txt',
                                'New MCTS \n')
                mcts = MCTS(self.num_actions, self.B,
                            len(self.action_space.keys()),
                            len(self.combine_op.keys()), self.action_space,
                            self.combine_op,
                            self.path + '/Coach/' + 'logs.txt',
                            self.no_channels_start)
                no_mcts = no_mcts + 1

            # Logging to dictionary
            tmp123_result = {}
            tmp123_result['i'] = i
            tmp123_result['no_trained_final'] = self.no_trained_final
            tmp123_result['no_trained_search'] = self.no_trained_in_search
            tmp123_result['lstm_perc'] = lstm_perc
            tmp123_result['lstm_epoch'] = lstm_epoch
            tmp123_result['nconvcell'] = self.current_nconv
            tmp123_result['no_mcts'] = no_mcts

            # Initial state
            s = (
                0,
                0,
            )
            examples = []

            # Logging to file
            # Logging to dictionary
            tmp123_result['in_search'] = []
            log_to_textfile(self.path + '/Coach/' + 'logs.txt',
                            'Use uniform: ' + str(use_uniform) + '\n')

            # After MCTS is initialized, it requires the first time two expansaions
            tmp_num_expansions = self.num_expansions
            if (i == 0) and (self.num_expansions == 1):
                tmp_num_expansions = 2

            # Running search until full architecture is found
            while (len(s) // 5 != self.B):
                tmp_tmp_result = {}
                tmp_tmp_result['s'] = s

                # number of expansion for next move
                for j in range(tmp_num_expansions):
                    v, bl_trained, tmp_s = mcts.search(
                        s, ChildModel, self.sess, self.images, self.labels,
                        self.path + '/Children/Insearchtrain/anychild_' +
                        str(i) + '_' + str(j) + '_' + str(unique_idx) + '/',
                        64, self.global_ops, self.global_param, 'anychild_' +
                        str(i) + '_' + str(j) + '_' + str(unique_idx),
                        self.variables_not_initialize, self.GLOBAL_WEIGHTS,
                        self.max_noimprovements, self.max_iteration,
                        self.lr_iteration_step, 1, False, lstm_perc,
                        lstm_epoch, self.lstm_model, use_uniform,
                        self.alphax_version, self.debug_no_trainig)
                    if bl_trained:
                        # If a ChildModel was trained in mcts.search
                        unique_idx = unique_idx + 1
                        log_to_textfile(
                            self.path + '/Coach/' + 'logs.txt',
                            'In search iteration: ' + str(j) + ' v value: ' +
                            str(v) + ' for ' + str(s) + ' \n')
                        convcell = s_to_convcell(tmp_s, self.B,
                                                 self.action_space,
                                                 self.combine_op)
                        log_to_textfile(
                            self.path + '/Coach/' + 'logs.txt',
                            'Full state: ' + str(tmp_s) + ' convcell: ' +
                            str(convcell) + ' \n')
                        if not (self.debug_no_trainig):
                            self.loader.save(
                                self.sess, self.path +
                                '/Coach/global_weights/global_weight')
                            self._reset_graph(False)
                        self.no_trained_in_search = self.no_trained_in_search + 1

                # Get next action based on MCTS probability
                probs = mcts.get_prob(s)
                # Keep track in replay history
                examples.append([s, probs, None, lstm_perc, lstm_epoch])
                action = np.random.choice(len(probs), p=probs)
                # Logging to dictionary
                tmp_tmp_nsa = {}
                for nsa_key in mcts.Nsa.keys():
                    if nsa_key[0] == s:
                        tmp_tmp_nsa[nsa_key] = mcts.Nsa[nsa_key]
                tmp_tmp_result['Nsa'] = tmp_tmp_nsa
                tmp_tmp_result['Ns'] = mcts.Ns[s]
                tmp_tmp_result['Ps'] = mcts.Ps[s]
                tmp_tmp_result['probs'] = probs
                tmp_tmp_result['action'] = action
                tmp123_result['in_search'].append(tmp_tmp_result)
                # Combine state with selected action
                s = s + (action, )

            tmp123_result['use_uniform'] = use_uniform
            pickle.dump(
                mcts,
                open(self.path + '/Coach/' + 'mcts' + str(no_mcts) + '.pickle',
                     "wb"))
            log_to_textfile(self.path + '/Coach/' + 'logs.txt',
                            'Final action sequence: ' + str(s) + ' \n')
            convcell = s_to_convcell(s, self.B, self.action_space,
                                     self.combine_op)
            log_to_textfile(self.path + '/Coach/' + 'logs.txt',
                            'Final convcell: ' + str(convcell) + ' \n')
            createPath(self.path + '/Children/Finaltrain/finalchild_' + str(i))
            tmp123_result['final_seq'] = s
            tmp123_result['final_convcell'] = convcell

            # Train selected architecture for more epoch and report the accuracy
            if not (self.debug_no_trainig):
                model = ChildModel(
                    self.sess, self.images, self.labels, self.path +
                    '/Children/Finaltrain/finalchild_' + str(i) + '/', 64,
                    convcell, self.global_ops, self.global_param,
                    'finalchild_' + str(i), self.variables_not_initialize,
                    self.no_channels_start)
                model.build_model(self.GLOBAL_WEIGHTS)
                model.couch_train(self.images, self.labels,
                                  self.max_noimprovements, self.max_iteration,
                                  self.lr_iteration_step, lstm_epoch,
                                  self.no_global_variables)
                acc = model.predict_validation(self.images['valid'],
                                               self.labels['valid'],
                                               0,
                                               initialize_new=False)
            else:
                acc = 0.2
            log_to_textfile(
                self.path + '/Coach/' + 'logs.txt',
                'Final training validation accuracy: ' + str(acc) + ' \n')

            # Add the accuracy to the temporary replay history
            for e in examples:
                e[2] = acc

            self.no_trained_final = self.no_trained_final + 1
            self.loader.save(self.sess,
                             self.path + '/Coach/global_weights/global_weight')
            self._reset_graph(False)
            tmp123_result['final_convcell_acc'] = acc
            tmp123_result['total_before_total_loss'] = 0
            tmp123_result['total_before_loss_value'] = 0
            tmp123_result['total_before_loss_prob'] = 0
            if i > 0:
                # Add temporary replay buffer to total replay buffer
                # Train LSTM
                while len(self.full_examples) > self.max_replay_size:
                    self.full_examples.pop(0)
                self.full_examples = self.full_examples + examples

                total_before_total_loss = []
                total_before_loss_value = []
                total_before_loss_prob = []
                for k in range((len(self.full_examples) //
                                self.lstm_batchsize) + 1):
                    before_total_loss, before_loss_value, before_loss_prob = self.lstm_model.train(
                        self.full_examples, self.lstm_batchsize,
                        self.lstm_learning_rate,
                        self.path + '/Coach/' + 'logs.txt')
                    total_before_total_loss.append(before_total_loss)
                    total_before_loss_value.append(before_loss_value)
                    total_before_loss_prob.append(before_loss_prob)
                tmp123_result['total_before_total_loss'] = np.mean(
                    total_before_total_loss)
                tmp123_result['total_before_loss_value'] = np.mean(
                    total_before_loss_value)
                tmp123_result['total_before_loss_prob'] = np.mean(
                    total_before_loss_prob)
            # Save dictory log files to disc
            total_results.append(tmp123_result)
            pickle.dump(
                self.full_examples,
                open(self.path + '/Coach/' + 'full_examples.pickle', "wb"))
            pickle.dump(
                total_results,
                open(self.path + '/Coach/' + 'total_results.pickle', "wb"))
        self.mcts = mcts