def test_is_generator(self): gen = TestIterator() self.assertTrue(is_generator(gen)) def generator(): yield ([1, 2], [1, 2]) self.assertTrue(is_generator(generator))
def job_start(job_backend, trainer, keras_callback): """ Starts the training of a job. Needs job_prepare() first. :type job_backend: JobBackend :type trainer: Trainer :return: """ job_backend.set_status('STARTING') job_model = job_backend.get_job_model() model_provider = job_model.get_model_provider() job_backend.set_status('LOAD DATA') datasets = job_model.get_datasets(trainer) print('trainer.input_shape = %s\n' % (simplejson.dumps(trainer.input_shape, default=invalid_json_values),)) print('trainer.classes = %s\n' % (simplejson.dumps(trainer.classes, default=invalid_json_values),)) multiple_inputs = len(datasets) > 1 insights_x = [] if multiple_inputs else None for dataset_name in job_model.get_input_dataset_names(): dataset = datasets[dataset_name] if is_generator(dataset['X_train']): batch_x, batch_y = dataset['X_train'].next() if multiple_inputs: insights_x.append(batch_x[0]) else: insights_x = batch_x[0] else: if multiple_inputs: insights_x.append(dataset['X_train'][0]) else: insights_x = dataset['X_train'][0] keras_callback.insights_x = insights_x print('Insights sample shape', keras_callback.insights_x.shape) keras_callback.write("Possible data keys '%s'\n" % "','".join(list(datasets.keys()))) data_train = model_provider.get_training_data(trainer, datasets) data_validation = model_provider.get_validation_data(trainer, datasets) keras_callback.set_validation_data(data_validation, trainer.nb_val_samples) trainer.set_status('CONSTRUCT') model = model_provider.get_model(trainer) trainer.set_model(model) trainer.set_status('COMPILING') loss = model_provider.get_loss(trainer) optimizer = model_provider.get_optimizer(trainer) model_provider.compile(trainer, model, loss, optimizer) model.summary() trainer.callbacks.append(keras_callback) model_provider.train(trainer, model, data_train, data_validation)
def set_validation_data(self, validation_data, validation_data_size=None): self.data_validation = validation_data self.data_validation_size = None if self.data_validation is None: return input_data_x = None # It's dict of AETROS code generation if isinstance(self.data_validation, dict) and 'x' in self.data_validation: if is_generator(self.data_validation['x']): # single input input_data_x = self.data_validation['x'] elif isinstance(self.data_validation['x'], dict): # multiple inputs named input_data_x = next(six.itervalues(self.data_validation['x'])) # Not from AETROS code generation else: if is_generator(self.data_validation): input_data_x = self.data_validation elif isinstance(self.data_validation, dict): input_data_x = next(six.itervalues(self.data_validation)) elif isinstance(self.data_validation, tuple): input_data_x = self.data_validation[0] if is_generator(input_data_x): if validation_data_size is None: raise Exception( 'validation_data_size needs to be set when a generator is given.' ) self.data_validation_size = validation_data_size elif input_data_x is not None: self.data_validation_size = len(input_data_x) if self.data_validation_size is None: raise Exception( 'data_validation_size could not be determined for given validation_data. Please specify it.' )
def set_validation_data(self, validation_data, validation_data_size=None): self.data_validation = validation_data self.data_validation_size = None if self.data_validation is None: return input_data_x = None # It's dict of AETROS code generation if isinstance(self.data_validation, dict) and 'x' in self.data_validation: if is_generator(self.data_validation['x']): # single input input_data_x = self.data_validation['x'] elif isinstance(self.data_validation['x'], dict): # multiple inputs named input_data_x = next(six.itervalues(self.data_validation['x'])) # Not from AETROS code generation else: if is_generator(self.data_validation): input_data_x = self.data_validation elif isinstance(self.data_validation, dict): input_data_x = next(six.itervalues(self.data_validation)) elif isinstance(self.data_validation, tuple): input_data_x = self.data_validation[0] if is_generator(input_data_x): if validation_data_size is None: raise Exception('validation_data_size needs to be set when a generator is given.') self.data_validation_size = validation_data_size elif input_data_x is not None: self.data_validation_size = len(input_data_x) if self.data_validation_size is None: raise Exception('data_validation_size could not be determined for given validation_data. Please specify it.')
def test_set_validation_data(self): job_backend = JobBackend('test') job_backend.job = {'id': 'test', 'index': 1, 'modelId': 'test/model'} keras_callback = KerasCallback(job_backend, sys.stdout) keras_callback.set_validation_data(([1, 2, 3], [1, 2, 3])) self.assertEqual(keras_callback.data_validation_size, 3) with pytest.raises(Exception): keras_callback.set_validation_data([[1, 2], [1, 2]]) # aetros format keras_callback.set_validation_data({'x': {'input': [1, 2, 3]}, 'y': {'output': [1, 2, 3]}}) self.assertEqual(keras_callback.data_validation_size, 3) keras_callback.set_validation_data({'input': [1, 2, 3], 'output': [1, 2, 3]}) self.assertEqual(keras_callback.data_validation_size, 3) keras_callback.set_validation_data(([1, 2, 3], [1, 2, 3]), 5) self.assertEqual(keras_callback.data_validation_size, 3) def generator(): yield ([1, 2], [1, 2]) self.assertTrue(is_generator(generator)) with pytest.raises(Exception): keras_callback.set_validation_data(generator) with pytest.raises(Exception): keras_callback.set_validation_data((generator, generator)) with pytest.raises(Exception): keras_callback.set_validation_data([generator, generator], 6) keras_callback.set_validation_data(generator, 5) self.assertEqual(keras_callback.data_validation_size, 5) keras_callback.set_validation_data((generator, generator), 4) self.assertEqual(keras_callback.data_validation_size, 4)
def build_confusion_matrix(self): confusion_matrix = {} if self.data_validation_size is None: return confusion_matrix if len(self.model.output_layers) > 1: return confusion_matrix first_input_layer = self.model.input_layers[0] first_output_layer = self.model.output_layers[0] if 'Softmax' not in str(first_output_layer.output) or len( first_output_layer.output_shape) != 2: return confusion_matrix input_data_x = None input_data_y = [] # It's dict of AETROS code generation if isinstance(self.data_validation, dict) and 'x' in self.data_validation: if is_generator(self.data_validation['x']): # single input input_data_x = self.data_validation['x'] elif isinstance(self.data_validation['x'], dict): # multiple inputs named input_data_x = self.data_validation['x'][ first_input_layer.name] input_data_y = self.data_validation['y'][ first_output_layer.name] # Not from AETROS code generation else: if is_generator(self.data_validation): input_data_x = self.data_validation elif isinstance(self.data_validation, dict): if len(self.model.input_layers) > 1: input_data_x = [] for layer in self.model.input_layers: input_data_x.append(self.data_validation[layer.name]) input_data_y = self.data_validation[first_output_layer.name] elif isinstance(self.data_validation, tuple): input_data_x = self.data_validation[0] input_data_y = self.data_validation[1] if input_data_x is None: return confusion_matrix matrix = np.zeros((first_output_layer.output_shape[1], first_output_layer.output_shape[1])) if is_generator(input_data_x): processed_samples = 0 while processed_samples < self.data_validation_size: generator_output = next(input_data_x) if len(generator_output) == 2: x, y = generator_output sample_weight = None elif len(generator_output) == 3: x, y, sample_weight = generator_output else: self.model._stop.set() raise Exception('output of generator should be a tuple ' '(x, y, sample_weight) ' 'or (x, y). Found: ' + str(generator_output)) if type(x) is list: nb_samples = len(x[0]) elif type(x) is dict: nb_samples = len(list(x.values())[0]) else: nb_samples = len(x) processed_samples += nb_samples prediction = self.model.predict_on_batch(x) predicted_classes = prediction.argmax(axis=-1) expected_classes = y.argmax(axis=-1) try: for sample_idx, predicted_class in enumerate( predicted_classes): expected_class = expected_classes[sample_idx] matrix[expected_class, predicted_class] += 1 except: pass else: batch_size = self.current[ 'batch_size'] if 'batch_size' in self.current else 16 prediction = self.model.predict(input_data_x, batch_size=batch_size) predicted_classes = prediction.argmax(axis=-1) expected_classes = np.array(input_data_y).argmax(axis=-1) try: for sample_idx, predicted_class in enumerate( predicted_classes): expected_class = expected_classes[sample_idx] matrix[expected_class, predicted_class] += 1 except: pass confusion_matrix[first_output_layer.name] = matrix.tolist() return confusion_matrix
def job_start(job_backend, trainer, keras_callback): """ Starts the training of a job. Needs job_prepare() first. :type job_backend: JobBackend :type trainer: Trainer :return: """ job_backend.set_status('STARTING') job_model = job_backend.get_job_model() model_provider = job_model.get_model_provider() job_backend.set_status('LOAD DATA') datasets = job_model.get_datasets(trainer) print( 'trainer.input_shape = %s\n' % (simplejson.dumps(trainer.input_shape, default=invalid_json_values), )) print('trainer.classes = %s\n' % (simplejson.dumps(trainer.classes, default=invalid_json_values), )) multiple_inputs = len(datasets) > 1 insights_x = [] if multiple_inputs else None for dataset_name in job_model.get_input_dataset_names(): dataset = datasets[dataset_name] if is_generator(dataset['X_train']): batch_x, batch_y = dataset['X_train'].next() if multiple_inputs: insights_x.append(batch_x[0]) else: insights_x = batch_x[0] else: if multiple_inputs: insights_x.append(dataset['X_train'][0]) else: insights_x = dataset['X_train'][0] keras_callback.insights_x = insights_x print('Insights sample shape', keras_callback.insights_x.shape) keras_callback.write("Possible data keys '%s'\n" % "','".join(list(datasets.keys()))) data_train = model_provider.get_training_data(trainer, datasets) data_validation = model_provider.get_validation_data(trainer, datasets) keras_callback.set_validation_data(data_validation, trainer.nb_val_samples) trainer.set_status('CONSTRUCT') model = model_provider.get_model(trainer) trainer.set_model(model) trainer.set_status('COMPILING') loss = model_provider.get_loss(trainer) optimizer = model_provider.get_optimizer(trainer) model_provider.compile(trainer, model, loss, optimizer) model.summary() trainer.callbacks.append(keras_callback) model_provider.train(trainer, model, data_train, data_validation)
def build_confusion_matrix(self): confusion_matrix = {} if self.data_validation_size is None: return confusion_matrix if len(self.model.output_layers) > 1: return confusion_matrix first_input_layer = self.model.input_layers[0] first_output_layer = self.model.output_layers[0] if 'Softmax' not in str(first_output_layer.output) or len(first_output_layer.output_shape) != 2: return confusion_matrix input_data_x = None input_data_y = [] # It's dict of AETROS code generation if isinstance(self.data_validation, dict) and 'x' in self.data_validation: if is_generator(self.data_validation['x']): # single input input_data_x = self.data_validation['x'] elif isinstance(self.data_validation['x'], dict): # multiple inputs named input_data_x = self.data_validation['x'][first_input_layer.name] input_data_y = self.data_validation['y'][first_output_layer.name] # Not from AETROS code generation else: if is_generator(self.data_validation): input_data_x = self.data_validation elif isinstance(self.data_validation, dict): if len(self.model.input_layers) > 1: input_data_x = [] for layer in self.model.input_layers: input_data_x.append(self.data_validation[layer.name]) input_data_y = self.data_validation[first_output_layer.name] elif isinstance(self.data_validation, tuple): input_data_x = self.data_validation[0] input_data_y = self.data_validation[1] if input_data_x is None: return confusion_matrix matrix = np.zeros((first_output_layer.output_shape[1], first_output_layer.output_shape[1])) if is_generator(input_data_x): processed_samples = 0 while processed_samples < self.data_validation_size: generator_output = next(input_data_x) if len(generator_output) == 2: x, y = generator_output sample_weight = None elif len(generator_output) == 3: x, y, sample_weight = generator_output else: self.model._stop.set() raise Exception('output of generator should be a tuple ' '(x, y, sample_weight) ' 'or (x, y). Found: ' + str(generator_output)) if type(x) is list: nb_samples = len(x[0]) elif type(x) is dict: nb_samples = len(list(x.values())[0]) else: nb_samples = len(x) processed_samples += nb_samples prediction = self.model.predict_on_batch(x) predicted_classes = prediction.argmax(axis=-1) expected_classes = y.argmax(axis=-1) try: for sample_idx, predicted_class in enumerate(predicted_classes): expected_class = expected_classes[sample_idx] matrix[expected_class, predicted_class] += 1 except Exception: pass else: batch_size = self.current['batch_size'] if 'batch_size' in self.current else 16 prediction = self.model.predict(input_data_x, batch_size=batch_size) predicted_classes = prediction.argmax(axis=-1) expected_classes = np.array(input_data_y).argmax(axis=-1) try: for sample_idx, predicted_class in enumerate(predicted_classes): expected_class = expected_classes[sample_idx] matrix[expected_class, predicted_class] += 1 except Exception: pass confusion_matrix[first_output_layer.name] = matrix.tolist() return confusion_matrix