def build_mlp(IMG_SIZE): colorprint(Color.BLUE, 'Building MLP model...\n') # Build the Multi Layer Perceptron model model = Sequential() model.add( Reshape((IMG_SIZE * IMG_SIZE * 3, ), input_shape=(IMG_SIZE, IMG_SIZE, 3), name='first')) model.add(Dense(units=2048, activation='relu', name='second')) #model.add(Dense(units=1024, activation='relu', name='third')) model.add(Dense(units=8, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.summary() if not os.path.exists('dump/models'): os.mkdir('dump/models') plot_model(model, to_file='dump/models/' + str(hash(str(model.get_config()))) + '.png', show_shapes=True, show_layer_names=True) colorprint(Color.BLUE, 'Done!\n') return model
def build_patch_mlp(PATCH_SIZE, phase='TRAIN'): colorprint(Color.BLUE, 'Building MLP model...\n') model = Sequential() model.add( Reshape((PATCH_SIZE * PATCH_SIZE * 3, ), input_shape=(PATCH_SIZE, PATCH_SIZE, 3))) model.add(Dense(units=2048, activation='relu')) # model.add(Dense(units=1024, activation='relu')) if phase.capitalize() == 'TEST': model.add( Dense(units=8, activation='linear') ) # In test phase we softmax the average output over the image patches else: model.add(Dense(units=8, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.summary() if not os.path.exists('dump/patch_models'): os.mkdir('dump/patch_models') plot_model(model, to_file='dump/patch_models/' + str(hash(str(model.get_config()))) + '.png', show_shapes=True, show_layer_names=True) colorprint(Color.BLUE, 'Done!\n') return model
def test_config_deserialisation(self): # class MyClass: # def __init__(self, foo, bar): # self.foo = foo # self.bar = bar # # def __eq__(self, other): # if not isinstance(other, MyClass): # # don't attempt to compare against unrelated types # return NotImplemented # # return self.foo == other.foo and self.bar == other.bar # # self.assertEqual(MyClass('foo', 'bar'), MyClass('foo', 'bar')) seed = 1234 tf.random.set_seed(seed) model = Sequential() model.add( Conv2D(filters=64, kernel_size=(3, 3), padding='Same', activation='relu')) model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2))) model.add(Dropout(0.25, seed=seed)) model.add( Dense(12, input_dim=8, activation='relu', kernel_initializer='glorot_uniform', seed=seed)) model.add(Dense(8, activation='relu', seed=seed)) model.add(Dense(1, activation='sigmoid', seed=seed)) config = model.get_config() deserialized_model = Sequential.from_config(config) # self.assertEqual(model, deserialized_model) self.assertEqual(model.layers[0].input.shape, deserialized_model.layers[0].input.shape)
class CNNmodel7: def __init__(self, img_size=(256, 256), dump_path='dump/'): # Random parameters conv1_filters = np.random.randint(1, 65) conv2_filters = np.random.randint(1, 65) conv3_filters = np.random.randint(1, 65) conv1_kernel = np.random.randint(2, 10) conv2_kernel = np.random.randint(2, 10) conv3_kernel = np.random.randint(2, 10) conv1_strides = np.random.randint(1, conv1_kernel / 2 + 1) conv2_strides = np.random.randint(1, conv2_kernel / 2 + 1) conv3_strides = np.random.randint(1, conv3_kernel / 2 + 1) maxpool1_size = np.random.randint(2, 8) maxpool2_size = np.random.randint(2, 8) maxpool3_size = np.random.randint(2, 8) fc1_units = 2**np.random.randint(6, 11) fc2_units = 2**np.random.randint(6, 11) # Model architecture self.model = Sequential() self.model.add( Conv2D(filters=conv1_filters, kernel_size=(conv1_kernel, conv1_kernel), strides=(conv1_strides, conv1_strides), activation='relu', input_shape=(img_size[0], img_size[1], 3), name='conv1')) self.model.add( MaxPooling2D(pool_size=(maxpool1_size, maxpool1_size), strides=None, name='maxpool1')) self.model.add( Conv2D(filters=conv2_filters, kernel_size=(conv2_kernel, conv2_kernel), strides=(conv2_strides, conv2_strides), activation='relu', name='conv2')) self.model.add( MaxPooling2D(pool_size=(maxpool2_size, maxpool2_size), strides=None, name='maxpool2')) self.model.add( Conv2D(filters=conv3_filters, kernel_size=(conv3_kernel, conv3_kernel), strides=(conv3_strides, conv3_strides), activation='relu', name='conv3')) self.model.add( MaxPooling2D(pool_size=(maxpool3_size, maxpool3_size), strides=None, name='maxpool3')) self.model.add(Flatten()) self.model.add(Dense(units=fc1_units, activation='relu', name='fc1')) self.model.add(Dense(units=fc2_units, activation='relu', name='fc2')) self.model.add(Dense(units=8, activation='softmax', name='classif')) # Optimizer optimizer = Adam() # Compile self.model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) # Parameters self.born_time = time.strftime('%Y%m%d%H%M%S', time.gmtime()) self.identifier = str(hash(str(self.model.get_config()))) self.dump_path = os.path.join( dump_path, str(self.born_time) + '_' + self.identifier) self.input_img_size = img_size # Print if not os.path.exists(self.dump_path): os.makedirs(self.dump_path) self.model.summary() print('Current model: ' + self.identifier) plot_model(self.model, show_shapes=True, show_layer_names=True, to_file=os.path.join(self.dump_path, self.identifier + '.png')) def _train_generator(self, path, batch_size): datagen = ImageDataGenerator( preprocessing_function=self._preprocess_input, rotation_range=0, width_shift_range=0., height_shift_range=0., shear_range=0., zoom_range=0., channel_shift_range=0., fill_mode='reflect', cval=0., horizontal_flip=False, vertical_flip=False) return datagen.flow_from_directory(path, target_size=self.input_img_size, batch_size=batch_size, class_mode='categorical') def _test_val_generator(self, path, batch_size): datagen = ImageDataGenerator( preprocessing_function=self._preprocess_input) return datagen.flow_from_directory(path, target_size=self.input_img_size, batch_size=batch_size, class_mode='categorical', shuffle=False) def fit_directory(self, path, batch_size, epochs, val_path=None, save_weights=False): train_generator = self._train_generator(path, batch_size) if val_path is None: validation_generator = None validation_steps = None else: validation_generator = self._test_val_generator( val_path, batch_size) validation_steps = validation_generator.samples / batch_size history = self.model.fit_generator( train_generator, steps_per_epoch=train_generator.samples / batch_size, epochs=epochs, validation_data=validation_generator, validation_steps=validation_steps) utils.plot_history(history, self.dump_path, identifier='e' + str(epochs) + '_b' + str(batch_size)) with open( os.path.join( self.dump_path, 'e' + str(epochs) + '_b' + str(batch_size) + '_history.pklz'), 'wb') as f: cPickle.dump((history.epoch, history.history, history.params, history.validation_data, self.model.get_config()), f, cPickle.HIGHEST_PROTOCOL) if save_weights: self.model.save_weights( os.path.join( self.dump_path, 'e' + str(epochs) + '_b' + str(batch_size) + '_weights.h5')) return history def evaluate(self, path): test_generator = self._test_val_generator(path, batch_size=32) return self.model.evaluate_generator(test_generator) def _preprocess_input(self, x, dim_ordering='default'): if dim_ordering == 'default': dim_ordering = K.image_dim_ordering() assert dim_ordering in {'tf', 'th'} mean = [109.07621812, 115.45609435, 114.70990406] std = [56.91689916, 55.4694083, 59.14847488] if dim_ordering == 'th': # Zero-center by mean pixel x[0, :, :] -= mean[0] x[1, :, :] -= mean[1] x[2, :, :] -= mean[2] # Normalize by std x[0, :, :] /= std[0] x[1, :, :] /= std[1] x[2, :, :] /= std[2] else: # Zero-center by mean pixel x[:, :, 0] -= mean[0] x[:, :, 1] -= mean[1] x[:, :, 2] -= mean[2] # Normalize by std x[:, :, 0] /= std[0] x[:, :, 1] /= std[1] x[:, :, 2] /= std[2] return x
def main(): first_stage_network_depths = ((('Dense', { 'units': 128, 'activation': 'relu' }), ('Dropout', { 'rate': 0.4 }), ('Dense', { 'units': 64, 'activation': 'relu' }), ('Dense', { 'units': 1, 'activation': 'sigmoid' })), ) first_stage_data = BacteriaAndVirusKMers( fp= '/home/jklynch/host/project/viral-learning/data/perm_training_testing.h5', training_sample_count=100000, development_sample_count=1000, half_batch_size=50) first_stage_model_name, first_stage_model = build_model( model=Sequential(), input_dim=first_stage_data.get_input_dim(), layers=first_stage_network_depths[0]) first_stage_model_name = 'first_stage_' + first_stage_model_name training_metrics_df, dev_metrics_df = train_and_evaluate( model=first_stage_model, model_name=first_stage_model_name, training_epochs=5, the_data=first_stage_data) pprint(first_stage_model.get_config()) # store the model with open(first_stage_model_name + '.json', 'wt') as model_json: model_json.write(first_stage_model.to_json()) first_stage_model.save_weights(filepath=first_stage_model_name + '.h5', overwrite=True) second_stage_model = Sequential() second_stage_model.add(first_stage_model.get_layer(index=0)) second_stage_model.add(first_stage_model.get_layer(index=1)) second_stage_model.add(first_stage_model.get_layer(index=2)) second_stage_layers = ( ( #('Dense', {'units': 64, 'activation': 'relu'}), ('Dense', { 'units': 1, 'activation': 'sigmoid' }), ), ) second_stage_model_name, second_stage_model = build_model( model=second_stage_model, layers=second_stage_layers[0]) second_stage_model_name = 'second_stage_' + second_stage_model_name second_stage_data = BacteriaAndVirusGenomeKMers( fp= '/home/jklynch/host/project/viral-learning/data/riveal_refseq_prok_phage_500pb_kmers8.h5', pb=500, k=8, training_sample_count=100000, development_sample_count=1000, half_batch_size=50) pprint(second_stage_model.get_config()) genomic_training_metrics_df, genomic_dev_metrics_df = train_and_evaluate( model=second_stage_model, model_name=second_stage_model_name, training_epochs=5, the_data=second_stage_data) # store the model with open(second_stage_model_name + '.json', 'wt') as model_json: model_json.write(second_stage_model.to_json()) second_stage_model.save_weights(filepath=second_stage_model_name + '.h5', overwrite=True)