def build_mlp(IMG_SIZE):
    colorprint(Color.BLUE, 'Building MLP model...\n')

    # Build the Multi Layer Perceptron model
    model = Sequential()
    model.add(
        Reshape((IMG_SIZE * IMG_SIZE * 3, ),
                input_shape=(IMG_SIZE, IMG_SIZE, 3),
                name='first'))
    model.add(Dense(units=2048, activation='relu', name='second'))
    #model.add(Dense(units=1024, activation='relu', name='third'))
    model.add(Dense(units=8, activation='softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer='sgd',
                  metrics=['accuracy'])

    model.summary()
    if not os.path.exists('dump/models'):
        os.mkdir('dump/models')
    plot_model(model,
               to_file='dump/models/' + str(hash(str(model.get_config()))) +
               '.png',
               show_shapes=True,
               show_layer_names=True)

    colorprint(Color.BLUE, 'Done!\n')
    return model
def build_patch_mlp(PATCH_SIZE, phase='TRAIN'):
    colorprint(Color.BLUE, 'Building MLP model...\n')

    model = Sequential()
    model.add(
        Reshape((PATCH_SIZE * PATCH_SIZE * 3, ),
                input_shape=(PATCH_SIZE, PATCH_SIZE, 3)))
    model.add(Dense(units=2048, activation='relu'))
    # model.add(Dense(units=1024, activation='relu'))
    if phase.capitalize() == 'TEST':
        model.add(
            Dense(units=8, activation='linear')
        )  # In test phase we softmax the average output over the image patches
    else:
        model.add(Dense(units=8, activation='softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer='sgd',
                  metrics=['accuracy'])

    model.summary()
    if not os.path.exists('dump/patch_models'):
        os.mkdir('dump/patch_models')
    plot_model(model,
               to_file='dump/patch_models/' +
               str(hash(str(model.get_config()))) + '.png',
               show_shapes=True,
               show_layer_names=True)

    colorprint(Color.BLUE, 'Done!\n')
    return model
    def test_config_deserialisation(self):
        # class MyClass:
        #     def __init__(self, foo, bar):
        #         self.foo = foo
        #         self.bar = bar
        #
        #     def __eq__(self, other):
        #         if not isinstance(other, MyClass):
        #             # don't attempt to compare against unrelated types
        #             return NotImplemented
        #
        #         return self.foo == other.foo and self.bar == other.bar
        #
        # self.assertEqual(MyClass('foo', 'bar'), MyClass('foo', 'bar'))

        seed = 1234

        tf.random.set_seed(seed)
        model = Sequential()
        model.add(
            Conv2D(filters=64,
                   kernel_size=(3, 3),
                   padding='Same',
                   activation='relu'))
        model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
        model.add(Dropout(0.25, seed=seed))

        model.add(
            Dense(12,
                  input_dim=8,
                  activation='relu',
                  kernel_initializer='glorot_uniform',
                  seed=seed))
        model.add(Dense(8, activation='relu', seed=seed))
        model.add(Dense(1, activation='sigmoid', seed=seed))

        config = model.get_config()
        deserialized_model = Sequential.from_config(config)
        # self.assertEqual(model, deserialized_model)
        self.assertEqual(model.layers[0].input.shape,
                         deserialized_model.layers[0].input.shape)
Ejemplo n.º 4
0
class CNNmodel7:
    def __init__(self, img_size=(256, 256), dump_path='dump/'):
        # Random parameters
        conv1_filters = np.random.randint(1, 65)
        conv2_filters = np.random.randint(1, 65)
        conv3_filters = np.random.randint(1, 65)
        conv1_kernel = np.random.randint(2, 10)
        conv2_kernel = np.random.randint(2, 10)
        conv3_kernel = np.random.randint(2, 10)
        conv1_strides = np.random.randint(1, conv1_kernel / 2 + 1)
        conv2_strides = np.random.randint(1, conv2_kernel / 2 + 1)
        conv3_strides = np.random.randint(1, conv3_kernel / 2 + 1)
        maxpool1_size = np.random.randint(2, 8)
        maxpool2_size = np.random.randint(2, 8)
        maxpool3_size = np.random.randint(2, 8)
        fc1_units = 2**np.random.randint(6, 11)
        fc2_units = 2**np.random.randint(6, 11)

        # Model architecture
        self.model = Sequential()
        self.model.add(
            Conv2D(filters=conv1_filters,
                   kernel_size=(conv1_kernel, conv1_kernel),
                   strides=(conv1_strides, conv1_strides),
                   activation='relu',
                   input_shape=(img_size[0], img_size[1], 3),
                   name='conv1'))
        self.model.add(
            MaxPooling2D(pool_size=(maxpool1_size, maxpool1_size),
                         strides=None,
                         name='maxpool1'))
        self.model.add(
            Conv2D(filters=conv2_filters,
                   kernel_size=(conv2_kernel, conv2_kernel),
                   strides=(conv2_strides, conv2_strides),
                   activation='relu',
                   name='conv2'))
        self.model.add(
            MaxPooling2D(pool_size=(maxpool2_size, maxpool2_size),
                         strides=None,
                         name='maxpool2'))
        self.model.add(
            Conv2D(filters=conv3_filters,
                   kernel_size=(conv3_kernel, conv3_kernel),
                   strides=(conv3_strides, conv3_strides),
                   activation='relu',
                   name='conv3'))
        self.model.add(
            MaxPooling2D(pool_size=(maxpool3_size, maxpool3_size),
                         strides=None,
                         name='maxpool3'))
        self.model.add(Flatten())
        self.model.add(Dense(units=fc1_units, activation='relu', name='fc1'))
        self.model.add(Dense(units=fc2_units, activation='relu', name='fc2'))
        self.model.add(Dense(units=8, activation='softmax', name='classif'))

        # Optimizer
        optimizer = Adam()

        # Compile
        self.model.compile(loss='categorical_crossentropy',
                           optimizer=optimizer,
                           metrics=['accuracy'])
        # Parameters
        self.born_time = time.strftime('%Y%m%d%H%M%S', time.gmtime())
        self.identifier = str(hash(str(self.model.get_config())))
        self.dump_path = os.path.join(
            dump_path,
            str(self.born_time) + '_' + self.identifier)
        self.input_img_size = img_size

        # Print
        if not os.path.exists(self.dump_path):
            os.makedirs(self.dump_path)
        self.model.summary()
        print('Current model: ' + self.identifier)
        plot_model(self.model,
                   show_shapes=True,
                   show_layer_names=True,
                   to_file=os.path.join(self.dump_path,
                                        self.identifier + '.png'))

    def _train_generator(self, path, batch_size):
        datagen = ImageDataGenerator(
            preprocessing_function=self._preprocess_input,
            rotation_range=0,
            width_shift_range=0.,
            height_shift_range=0.,
            shear_range=0.,
            zoom_range=0.,
            channel_shift_range=0.,
            fill_mode='reflect',
            cval=0.,
            horizontal_flip=False,
            vertical_flip=False)
        return datagen.flow_from_directory(path,
                                           target_size=self.input_img_size,
                                           batch_size=batch_size,
                                           class_mode='categorical')

    def _test_val_generator(self, path, batch_size):
        datagen = ImageDataGenerator(
            preprocessing_function=self._preprocess_input)
        return datagen.flow_from_directory(path,
                                           target_size=self.input_img_size,
                                           batch_size=batch_size,
                                           class_mode='categorical',
                                           shuffle=False)

    def fit_directory(self,
                      path,
                      batch_size,
                      epochs,
                      val_path=None,
                      save_weights=False):
        train_generator = self._train_generator(path, batch_size)
        if val_path is None:
            validation_generator = None
            validation_steps = None
        else:
            validation_generator = self._test_val_generator(
                val_path, batch_size)
            validation_steps = validation_generator.samples / batch_size

        history = self.model.fit_generator(
            train_generator,
            steps_per_epoch=train_generator.samples / batch_size,
            epochs=epochs,
            validation_data=validation_generator,
            validation_steps=validation_steps)
        utils.plot_history(history,
                           self.dump_path,
                           identifier='e' + str(epochs) + '_b' +
                           str(batch_size))
        with open(
                os.path.join(
                    self.dump_path, 'e' + str(epochs) + '_b' +
                    str(batch_size) + '_history.pklz'), 'wb') as f:
            cPickle.dump((history.epoch, history.history, history.params,
                          history.validation_data, self.model.get_config()), f,
                         cPickle.HIGHEST_PROTOCOL)
        if save_weights:
            self.model.save_weights(
                os.path.join(
                    self.dump_path, 'e' + str(epochs) + '_b' +
                    str(batch_size) + '_weights.h5'))
        return history

    def evaluate(self, path):
        test_generator = self._test_val_generator(path, batch_size=32)
        return self.model.evaluate_generator(test_generator)

    def _preprocess_input(self, x, dim_ordering='default'):
        if dim_ordering == 'default':
            dim_ordering = K.image_dim_ordering()
        assert dim_ordering in {'tf', 'th'}

        mean = [109.07621812, 115.45609435, 114.70990406]
        std = [56.91689916, 55.4694083, 59.14847488]
        if dim_ordering == 'th':
            # Zero-center by mean pixel
            x[0, :, :] -= mean[0]
            x[1, :, :] -= mean[1]
            x[2, :, :] -= mean[2]
            # Normalize by std
            x[0, :, :] /= std[0]
            x[1, :, :] /= std[1]
            x[2, :, :] /= std[2]
        else:
            # Zero-center by mean pixel
            x[:, :, 0] -= mean[0]
            x[:, :, 1] -= mean[1]
            x[:, :, 2] -= mean[2]
            # Normalize by std
            x[:, :, 0] /= std[0]
            x[:, :, 1] /= std[1]
            x[:, :, 2] /= std[2]
        return x
def main():
    first_stage_network_depths = ((('Dense', {
        'units': 128,
        'activation': 'relu'
    }), ('Dropout', {
        'rate': 0.4
    }), ('Dense', {
        'units': 64,
        'activation': 'relu'
    }), ('Dense', {
        'units': 1,
        'activation': 'sigmoid'
    })), )

    first_stage_data = BacteriaAndVirusKMers(
        fp=
        '/home/jklynch/host/project/viral-learning/data/perm_training_testing.h5',
        training_sample_count=100000,
        development_sample_count=1000,
        half_batch_size=50)

    first_stage_model_name, first_stage_model = build_model(
        model=Sequential(),
        input_dim=first_stage_data.get_input_dim(),
        layers=first_stage_network_depths[0])

    first_stage_model_name = 'first_stage_' + first_stage_model_name

    training_metrics_df, dev_metrics_df = train_and_evaluate(
        model=first_stage_model,
        model_name=first_stage_model_name,
        training_epochs=5,
        the_data=first_stage_data)

    pprint(first_stage_model.get_config())

    # store the model
    with open(first_stage_model_name + '.json', 'wt') as model_json:
        model_json.write(first_stage_model.to_json())
    first_stage_model.save_weights(filepath=first_stage_model_name + '.h5',
                                   overwrite=True)

    second_stage_model = Sequential()
    second_stage_model.add(first_stage_model.get_layer(index=0))
    second_stage_model.add(first_stage_model.get_layer(index=1))
    second_stage_model.add(first_stage_model.get_layer(index=2))
    second_stage_layers = (
        (
            #('Dense', {'units': 64, 'activation': 'relu'}),
            ('Dense', {
                'units': 1,
                'activation': 'sigmoid'
            }), ), )
    second_stage_model_name, second_stage_model = build_model(
        model=second_stage_model, layers=second_stage_layers[0])

    second_stage_model_name = 'second_stage_' + second_stage_model_name

    second_stage_data = BacteriaAndVirusGenomeKMers(
        fp=
        '/home/jklynch/host/project/viral-learning/data/riveal_refseq_prok_phage_500pb_kmers8.h5',
        pb=500,
        k=8,
        training_sample_count=100000,
        development_sample_count=1000,
        half_batch_size=50)

    pprint(second_stage_model.get_config())

    genomic_training_metrics_df, genomic_dev_metrics_df = train_and_evaluate(
        model=second_stage_model,
        model_name=second_stage_model_name,
        training_epochs=5,
        the_data=second_stage_data)

    # store the model
    with open(second_stage_model_name + '.json', 'wt') as model_json:
        model_json.write(second_stage_model.to_json())
    second_stage_model.save_weights(filepath=second_stage_model_name + '.h5',
                                    overwrite=True)