def shuffle_data_samples(GraphData): ''' Shuffle Data ''' GraphData.train_input_data, GraphData.train_output_data = data.shuffle_input_output( GraphData.train_input_data, GraphData.train_output_data) GraphData.val_input_data, GraphData.val_output_data = data.shuffle_input_output( GraphData.val_input_data, GraphData.val_output_data) GraphData.eval_input_data, GraphData.eval_output_data = data.shuffle_input_output( GraphData.eval_input_data, GraphData.eval_output_data)
def reset_train_batches(self, batch_size=None, num_batches=None): ''' This function resets the training batches Example: Training data set = 1000 samples, Batch size = 300 Framework: Keras - The output is an array of as many training samples that fit within the batch size. Train_input_batches.shape = [900, data_size] Framework: Tensorflow - The Tensorflow implementation requires each mini-batch to be explicitly set. Train_input_batches.shape = (# batches, ) - In each batch is a numpy array of size (batch_size, data_size) :param batch_size: :param num_batches: :return: ''' # Update batch size if passed if batch_size is not None: self.batch_size = batch_size # Calc number of batches if num_batches is not None: self.num_train_batches = int(num_batches) else: self.num_train_batches = int( np.floor(self.train_input_data.shape[0] / self.batch_size)) # Copy all training data self.train_input_batches = self.train_input_data self.train_output_batches = self.train_output_data # Shuffle Training data self.train_input_batches, self.train_output_batches = data.shuffle_input_output( self.train_input_batches, self.train_output_batches) ## Restrict the amount of training Data a number that fits in the number of batches self.train_input_batches = self.train_input_batches[:self.batch_size * self. num_train_batches] self.train_output_batches = self.train_output_batches[:self. batch_size * self. num_train_batches] if self.frame_work == 'Keras': return if self.frame_work == 'Tensorflow': self.train_input_batches, self.train_output_batches = data.convert_to_tensorflow_minbatch( self.train_input_batches, self.train_output_batches, self.batch_size)
def split_train_val_eval(self, val_split=0.3, eval_split=0, shuffle=False): if shuffle == True: self.all_input_data, self.all_output_data = data.shuffle_input_output( self.all_input_data, self.all_output_data) total_samples = int(self.all_input_data.shape[0]) # Split into training/validation/evaluation data self.eval_samples = int(self.all_input_data.shape[0] * eval_split) self.val_samples = int(self.all_input_data.shape[0] * val_split) self.train_samples = int(self.all_input_data.shape[0] - self.eval_samples - self.val_samples) print( 'Train Samples = {}({}%), Val Samples = {}({}%), Eval Samples = {}({}%)' .format(self.train_samples, np.round(self.train_samples / total_samples * 100, 2), self.val_samples, np.round(self.val_samples / total_samples * 100, 2), self.eval_samples, np.round(self.eval_samples / total_samples * 100, 2))) self.eval_input_data = self.all_input_data[total_samples - self.eval_samples:] self.eval_output_data = self.all_output_data[total_samples - self.eval_samples:] self.val_input_data = self.all_input_data[total_samples - self.eval_samples - self. val_samples:total_samples - self.eval_samples] self.val_output_data = self.all_output_data[total_samples - self.eval_samples - self. val_samples:total_samples - self.eval_samples] self.train_input_data = self.all_input_data[:total_samples - self.eval_samples - self.val_samples] self.train_output_data = self.all_output_data[:total_samples - self.eval_samples - self.val_samples]
def shuffle_training_only(self): print('Shuffling Training Data') self.train_input_data, self.train_output_data = data.shuffle_input_output( self.train_input_data, self.train_output_data)