def test_with_generated_data(self, size_list=None, max_nodes=100): spark_context = pyspark.SparkContext() local_timers = [] spark_timers = [] dg = DataGenerator() for n in size_list: dg.data_as_file(n, max_nodes) local_timers.append(self.test_local( Consts.GENERATED_DATA_DEFAULT_NAME, ',' )) spark_timers.append(self.test_spark( spark_context, Consts.GENERATED_DATA_DEFAULT_NAME, ',' )) resulting_data = pd.DataFrame({ 'n': size_list, 'local': local_timers, 'spark': spark_timers }) resulting_data.to_csv('./out/test_out/generated_out.csv') plt.plot(size_list, spark_timers, 'red') plt.plot(size_list, local_timers, 'blue') plt.show()
def test_should_produce_augmented_samples_given_batch_size(self): images = np.random.rand(20, 64, 64, 1) labels = np.random.rand(20) generator = DataGenerator().fit(images, labels) batch_size = 10 batch, _ = generator.get_next_batch(batch_size) self.assertEqual(batch.shape[0], batch_size) self.assertEqual(images.shape[1:], batch.shape[1:])
def test_should_raise_error_if_model_not_fit_to_data_yet(self): with self.assertRaises(ValueError) as a: DataGenerator(time_delay=5).get_next_batch() with self.assertRaises(ValueError) as b: DataGenerator(time_delay=5).generate() self.assertEqual("Model is not fit to any data set yet", str(a.exception)) self.assertEqual("Model is not fit to any data set yet", str(b.exception))
def train(self): ip = ImageProcessing(self.data_path, test_size=self.run_parameters.get('test_size')) network_factory = NetworkFactory( network_parameters=self.run_parameters.get('network'), n_classes=ip.n_classes, input_shape=tuple(self.run_parameters.get('input_shape'))) model = network_factory.build_cnn() train_generator = DataGenerator( ip.X_train, ip.y_train, batch_size=self.run_parameters.get('batch_size'), run_parameters=self.run_parameters) valid_generator = DataGenerator( ip.X_test, ip.y_test, batch_size=self.run_parameters.get('batch_size'), run_parameters=self.run_parameters, validation=True) es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, restore_best_weights=True) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) K.clear_session() history = model.fit_generator(generator=train_generator, validation_data=valid_generator, epochs=self.run_parameters.get('epochs'), callbacks=[es]) if not os.path.exists(self.model_path): os.makedirs(os.path.join(self.model_path)) model.save(os.path.join(self.model_path, f"model-{self.run_number}.h5")) data = { 'history': history.history, 'epochs': len(history.history.get('loss')) } return data
def fit(self, curve_file_suffix=None): logging.info("Fitting model...") self.model.summary() config = self.config.get('fit') train_generator = DataGenerator(text=self.data.train_qs, labels=self.data.train_labels, text_mapper=self.text_mapper, batch_size=self.batch_size) val_generator = DataGenerator(text=self.data.val_qs, labels=self.data.val_labels, text_mapper=self.text_mapper, batch_size=self.batch_size) callbacks = self._get_callbacks(config.get('epochs'), config.get('batch_size')) # batch_size = [32, 64, 128, 256] # todo: write this in a for loop and change batch size, learning rate, and epsilon (K.set_epsilon(1e-2)) # for i in range(4): # train_generator.batch_size = batch_size[i] self.model.fit_generator( generator=train_generator, epochs=5, verbose=1, callbacks=callbacks, validation_data=val_generator, max_queue_size=10, # why not make this >>> workers=1, use_multiprocessing=False, shuffle=True) # self.history = self.model.fit(x=train_x, # y=train_y, # epochs=2, # batch_size=32, # validation_data=(val_x, val_y), # callbacks=callbacks) if config.get('save_curve'): if self.lr_finder: self.lr_finder.plot_schedule(filename="lr_schedule_" + str(self.name) + ".png") filename = 'training_curve' if self.name: filename += '_' + self.name if curve_file_suffix: filename += '_' + curve_file_suffix filename += '.png'
def test_should_raise_error_when_time_delay_was_not_set_and_input_is_time_series( self): images = np.random.rand(10, 2, 64, 64, 3) labels = np.random.rand(10) with self.assertRaises(ValueError) as e: DataGenerator().fit(images, labels) self.assertEqual( "Images have time axis length 2 but time_delay parameter was set to None", str(e.exception))
def test_should_raise_error_when_time_delay_parameter_is_set_and_input_is_simple_images( self): images = np.random.rand(10, 64, 64, 3) labels = np.random.rand(10) with self.assertRaises(ValueError) as e: DataGenerator(time_delay=4).fit(images, labels) self.assertEqual( "Time_delay parameter was set but Images say otherwise", str(e.exception))
def test_should_raise_error_if_time_delay_is_not_matching_input_time_axis( self): images = np.random.rand(10, 4, 64, 64, 3) labels = np.random.rand(10) with self.assertRaises(ValueError) as e: DataGenerator(time_delay=5).fit(images, labels) self.assertEqual( "Images have time axis length 4 but time_delay parameter was set to 5", str(e.exception))
def test_sine(self): default_ts = DataGenerator.generate_sine() expected_len = 10000 self.assertEquals(default_ts.shape[0], expected_len) self.assertAlmostEqual(default_ts[0], 0.0) self.assertAlmostEqual(default_ts[-1], 0.0) middle = expected_len // 2 self.assertAlmostEqual(default_ts[middle], 1.0)
def get_generators(model_name): def get_main_params(train_test): dataset = DataSet() list_IDs, targets = dataset.get_partition(train_test, balanced=True) return list_IDs, targets, train_test params = { 'dim': (96, 96), 'batch_size': 128, 'n_channels': 1, 'shuffle': True } # Generators train_generator = DataGenerator(*get_main_params("train"), **params) valid_generator = DataGenerator(*get_main_params("test"), **params) return train_generator, valid_generator
def predict_subset(self, subset='train'): if subset == 'train': questions = self.data.train_qs elif subset == 'val': questions = self.data.val_qs elif subset == 'test': questions = self.data.get_questions(subset) # input_x = self.prepare_model_inputs(questions) # preds = self.predict(input_x) data_gen = DataGenerator(text=questions, text_mapper=self.text_mapper, shuffle=False) preds = self.model.predict_generator(data_gen, workers=1, use_multiprocessing=True, max_queue_size=10) return preds
print('Loading data...') csv_file_path = "image_data/sample.csv" data_loader = CSVDataLoader(target_emotion_map=fer_dataset_label_map, datapath=csv_file_path, validation_split=validation_split, image_dimensions=raw_dimensions, csv_label_col=0, csv_image_col=1, out_channels=3) dataset = data_loader.load_data() if verbose: dataset.print_data_details() print('Creating training/testing data...') train_images, train_labels = dataset.get_training_data() train_gen = DataGenerator().fit(train_images, train_labels) test_images, test_labels = dataset.get_test_data() test_gen = DataGenerator().fit(test_images, test_labels) print('Initializing neural network with InceptionV3 base model...') model = TransferLearningNN(model_name=model_name, emotion_map=dataset.get_emotion_index_map()) print('Training model...') model.fit_generator(train_gen.generate(target_dimensions, 10), test_gen.generate(target_dimensions, 10), epochs=10) # Save model configuration # model.export_model('output/transfer_learning_model.json','output/transfer_learning_weights.h5',"output/transfer_learning_emotion_map.json", emotion_map)
model.compile(loss=custom_loss, optimizer=opt) print('Checking input directory...') Files = image_files_from_folder(train_dir) Data = [] for file in Files: labfile = splitext(file)[0] + '.txt' if isfile(labfile): L = readShapes(labfile) if L: I = cv2.imread(file) Data.append([I,L[0]]) Data = np.array(Data) train_data, validation_data = train_test_split(Data, test_size = 0.2, random_state = 42, shuffle = True) print('%d images with labels found' % len(Data)) train_data_generator = DataGenerator(data=train_data, batch_size=4, dim=dim, model_stride=model_stride) validation_data_generator = DataGenerator(data=validation_data, batch_size=4, dim=dim, model_stride=model_stride) model_path_final = '%s' % (model_path) model.fit(x = train_data_generator,validation_data = validation_data_generator , epochs=100, callbacks=get_callbacks()) print('Stopping data generator') print('Saving model (%s)' % model_path_final) # save_model(model,model_path_final)
# Example ID Datasets validation_split_point = int(dataset_size * (1 - VALIDATION_SET_RATIO - TEST_SET_RATIO)) test_split_point = int(dataset_size * (1 - TEST_SET_RATIO)) train_ids = all_ids[0:validation_split_point] validation_ids = all_ids[validation_split_point:test_split_point] test_ids = all_ids[test_split_point:dataset_size] title('Initialisation') log(f'TRAIN set size: {len(train_ids)}') log(f'VALIDATION set size: {len(validation_ids)}') log(f'TEST set size: {len(test_ids)}\n') # Data Generators training_generator = DataGenerator(available_ids=train_ids, **params) validation_generator = DataGenerator(available_ids=validation_ids, **params) test_generator = DataGenerator(available_ids=test_ids, **params) # Create keras model model = create_model(len(class_names)) epochs_count = len(training_generator) training_logger = TrainingLogger(epochs_count) # Print model architecture print(model.summary()) sys.stdout.flush() # Train model on dataset title('Training Model') model.fit_generator(generator=training_generator,
datapath=directory_path, time_steps=time_delay) image_data, labels, emotion_map = data_loader.get_data() if verbose: print('raw image data shape: ' + str(image_data.shape)) label_count = len(labels[0]) print('Training net...') validation_split = 0.15 X_train, X_test, y_train, y_test = train_test_split(image_data, labels, test_size=validation_split, random_state=42, stratify=labels) train_gen = DataGenerator(time_delay=time_delay).fit(X_train, y_train) test_gen = DataGenerator(time_delay=time_delay).fit(X_test, y_test) model = ConvolutionalLstmNN(target_dimensions, channels, emotion_map, time_delay=time_delay) model.fit_generator(train_gen.generate(target_dimensions, batch_size=5), test_gen.generate(target_dimensions, batch_size=5), epochs=10) ## if you want to save a graph of your model layers. model.save_model_graph() # Save model configuration # model.export_model('output/conv_lstm_model.json','output/conv_lstm_weights.h5',"output/conv_lstm_emotion_map.json", emotion_map)
image_dimensions=raw_dimensions, csv_label_col=0, csv_image_col=1, out_channels=3) images, labels, emotion_map = data_loader.get_data() if verbose: print('raw image shape: ' + str(images.shape)) print('Creating training/testing data...') validation_split = 0.15 X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=validation_split, random_state=42, stratify=labels) train_gen = DataGenerator().fit(X_train, y_train) test_gen = DataGenerator().fit(X_test, y_test) print('--------------- Inception-V3 Model -------------------') print('Initializing neural network with InceptionV3 base model...') model = TransferLearningNN(model_name=model_name, emotion_map=emotion_map) print('Training model...') print('numLayers: ' + str(len(model.model.layers))) model.fit_generator(train_gen.generate(target_dimensions, 10), test_gen.generate(target_dimensions, 10), epochs=10) # Save model configuration # model.export_model('output/transfer_learning_model.json','output/transfer_learning_weights.h5',"output/transfer_learning_emotion_map.json", emotion_map)
def __init__(self): self.gen = DataGenerator()
class CSVGenerator(): def __init__(self): self.gen = DataGenerator() def go(self, clients, commerces, transactions): self.__generate_and_save_clients(clients) self.__generate_and_save_commerces(commerces) self.__generate_and_save_transactions(transactions) def __generate_and_save_transactions(self, transactions): print "Generating Transactions" self.gen.generate_random_transaction_list(transactions) print "Saving Transactions" headers = self.__get_transaction_header_order() rows = [] for transaction in self.gen.transaction_list: rows.append(self.__document_to_row(transaction, headers)) self.__clean_and_fill_csv(TRANSACTION_CSV, headers, rows) def __generate_and_save_clients(self, quantity): print "Generating Clients" self.gen.generate_client_list(quantity) print "Saving Clients" headers = self.__get_client_header_order() rows = [] for client in self.gen.client_list: rows.append(self.__document_to_row(client, headers)) self.__clean_and_fill_csv(CLIENTS_CSV, headers, rows) def __generate_and_save_commerces(self, commerces): print "Generating Commerces" self.gen.generate_random_commerce_list(commerces) print "Saving Commerces" headers = self.__get_commerce_header_order() rows = [] for commerce in self.gen.commerce_list: rows.append(self.__document_to_row(commerce, headers)) self.__clean_and_fill_csv(COMMERCE_CSV, headers, rows) def __clean_and_fill_csv(self, file_path, headers, data_list): myfile = open(file_path, 'wb') wr = unicodecsv.writer(myfile, quoting=csv.QUOTE_NONE, delimiter = '\t') wr.writerow(headers) myfile = open(file_path, 'ab') wr = unicodecsv.writer(myfile, quoting=csv.QUOTE_NONE, delimiter = '\t') wr.writerows(data_list) ##============================================================================= ## DOCUMENT TO ROW CONVERSION ##============================================================================= def __document_to_row(self, document, header_order_list): row = [] for index, header in enumerate(header_order_list): row.append(document[header]) return row def __get_client_header_order(self): return [ "_id", "credit_card", "account_iban", "country", "contry_name", "birth_day", "address", "name", "last_name" ] def __get_commerce_header_order(self): return [ "tpv", "account_iban", "country", "contry_name", "_id", "url", "email" ] def __get_transaction_header_order(self): return [ "_id", "client_country", "client_id", "commerce_tpv", "client_credit_card", "transaction_ammount", "commerce_id", "client_country_name", "commerce_country", "commerce_contry_name", "commerce_account_iban", "transaction_datetime", "client_name", "client_last_name" ]
target_dimensions = (64, 64) channels = 1 verbose = True print('--------------- Convolutional Model -------------------') print('Loading data...') directory_path = "image_data/sample_image_directory" data_loader = DirectoryDataLoader(datapath=directory_path, validation_split=validation_split) dataset = data_loader.load_data() if verbose: dataset.print_data_details() print('Preparing training/testing data...') train_images, train_labels = dataset.get_training_data() train_gen = DataGenerator().fit(train_images, train_labels) test_images, test_labels = dataset.get_test_data() test_gen = DataGenerator().fit(test_images, test_labels) print('Training net...') model = ConvolutionalNN(target_dimensions, channels, dataset.get_emotion_index_map(), verbose=True) model.fit_generator(train_gen.generate(target_dimensions, batch_size=5), test_gen.generate(target_dimensions, batch_size=5), epochs=5) # Save model configuration # model.export_model('output/conv2d_model.json','output/conv2d_weights.h5',"output/conv2d_emotion_map.json", emotion_map)
def setUp(cls): """Set up class method """ cls.dg = DataGenerator(DATA_PATH, 8, True)
u1 = layers.Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(c3) u1 = layers.concatenate([u1, c1]) c4 = layers.Conv2D(16, (3, 3), activation='elu', padding='same')(u1) c4 = layers.Conv2D(16, (3, 3), activation='elu', padding='same')(c4) outputs = layers.Conv2D(4, (1, 1), activation='sigmoid')(c4) model = models.Model(inputs, outputs) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=[dice_coef]) train_data = pd.read_csv('data/train.csv') image_ids = get_ids(train_data) train_gen = DataGenerator(image_ids[:1000], batch_size=20) valid_gen = DataGenerator(image_ids[1000:2000], batch_size=20, shuffle=True) checkpoint = ModelCheckpoint('models/model.h5', monitor='val_dice_coef', verbose=0, save_best_only=True, save_weights_only=False, mode='auto') history = model.fit_generator(train_gen, validation_data=valid_gen, callbacks=[checkpoint], epochs=10)
raw_dimensions = (48, 48) target_dimensions = (64, 64) channels = 1 verbose = True print('--------------- Convolutional LSTM Model -------------------') print('Loading data...') directory_path = "image_data/sample_image_series_directory" data_loader = DirectoryDataLoader(datapath=directory_path, validation_split=validation_split, time_delay=2) dataset = data_loader.load_data() if verbose: dataset.print_data_details() print('Preparing training/testing data...') train_images, train_labels = dataset.get_training_data() train_gen = DataGenerator(time_delay=dataset.get_time_delay()).fit(train_images, train_labels) test_images, test_labels = dataset.get_test_data() test_gen = DataGenerator(time_delay=dataset.get_time_delay()).fit(test_images, test_labels) print('Training net...') model = ConvolutionalLstmNN(target_dimensions, channels, dataset.get_emotion_index_map(), time_delay=dataset.get_time_delay()) model.fit_generator(train_gen.generate(target_dimensions, batch_size=5), test_gen.generate(target_dimensions, batch_size=5), epochs=5) ## if you want to save a graph of your model layers. model.save_model_graph() # Save model configuration # model.export_model('output/conv_lstm_model.json','output/conv_lstm_weights.h5',"output/conv_lstm_emotion_map.json", emotion_map)
def test_should_resize_images_to_given_target_dimension(self): images = np.random.rand(20, 64, 64, 3) labels = np.random.rand(20) generator = DataGenerator().fit(images, labels) batch, _ = generator.get_next_batch(10, target_dimensions=(28, 28)) self.assertEqual(batch.shape, (10, 28, 28, 3))
def test_should_raise_error_when_labels_and_samples_are_mis_matched(self): images = np.random.rand(20, 64, 64) with self.assertRaises(ValueError) as e: DataGenerator().fit(images, [1]) self.assertEqual("Samples are not labeled properly", str(e.exception))
def test_should_raise_error_when_channel_axis_is_not_present(self): images = np.random.rand(20, 64, 64) labels = np.random.rand(20) with self.assertRaises(ValueError) as e: DataGenerator().fit(images, labels) self.assertEqual("Channel Axis should have vale", str(e.exception))
Files = image_files_from_folder(train_dir) Data = [] for file in Files: labfile = splitext(file)[0] + '.txt' if isfile(labfile): L = readShapes(labfile) I = cv2.imread(file) Data.append([I, L[0]]) print('%d images with labels found' % len(Data)) dg = DataGenerator( data=Data, \ process_data_item_func=lambda x: process_data_item(x,dim,model_stride),\ xshape=xshape, \ yshape=(yshape[0],yshape[1],yshape[2]+1), \ nthreads=2, \ pool_size=1000, \ min_nsamples=100 ) dg.start() Xtrain = np.empty((batch_size, dim, dim, 3), dtype='single') Ytrain = np.empty((int(batch_size), int(dim / model_stride), int(dim / model_stride), int(2 * 4 + 1))) model_path_backup = '%s/%s_backup' % (outdir, netname) model_path_final = '%s/%s_final' % (outdir, netname) for it in range(iterations): print('Iter. %d (of %d)' % (it + 1, iterations))
conv10 = Convolution2D(1, 1, name="segmentation")(conv9) # define model and compile model = Model(inputs=inputs, outputs=[conv10]) # compile model.compile( loss=binary_crossentropy, optimizer=SGD( lr=float(LEARN_RATE), decay=float(DECAY), momentum=MOMENTUM, ), ) return model # load u net u_net = get_2d_u_net_segmentation() # get data gen object data_gen = DataGenerator(DATA_PATH, BATCH_SIZE) # fit model u_net.fit_generator( generator=data_gen.data_generator(), steps_per_epoch=data_gen.get_num_steps(), epochs=EPOCHS, )