def build_model(self):
        s_input = Input(self.obs_shape)
        prob_old_input = Input([])
        action_old_input = Input([], dtype='int32')
        gae_input = Input([])
        v_target_input = Input([])

        feature = Feature()
        x = feature(s_input)
        policy_dense = Dense(self.act_n, activation='softmax')
        value_dense = Dense(1)
        prob = policy_dense(x)
        v = value_dense(x)
        policy = Model(inputs=s_input, outputs=prob)
        value = Model(inputs=s_input, outputs=v)

        prob_cur = tf.gather(prob, action_old_input, batch_dims=1)
        ratio = prob_cur / (prob_old_input + 1e-3)
        surr1 = ratio * gae_input
        surr2 = K.clip(ratio, 1 - self.eps_clip, 1 + self.eps_clip) * gae_input

        # 第二项为熵值计算,由于已经按照动作概率采样,因此计算时不再乘上概率,并且只需要计算当前动作概率的对数
        policy_loss = -K.mean(K.minimum(surr1, surr2)) + K.mean(
            K.log(prob_cur + 1e-3)) * self.entropy_coef

        value_loss = K.mean((v[:, 0] - v_target_input)**2)
        loss = policy_loss + value_loss
        train_model = Model(inputs=[
            s_input, prob_old_input, action_old_input, gae_input,
            v_target_input
        ],
                            outputs=loss)
        train_model.add_loss(loss)
        train_model.compile(tf.keras.optimizers.Adam(self.lr))
        return policy, value, train_model
Example #2
0
    def build(self, hidden_layers=[16], activations=['relu'], dropout=0.5, learning_rate=0.01, l2_norm=5e-4, p1=1.4, p2=0.7, epsilon=0.01):

        with self.device:

            x = Input(batch_shape=[self.n_nodes, self.n_features], dtype=tf.float32, name='features')
            adj = Input(batch_shape=[self.n_nodes, self.n_nodes], dtype=tf.float32, sparse=True, name='adj_matrix')
            index = Input(batch_shape=[None],  dtype=tf.int32, name='index')

            self.GCN_layers = [GraphConvolution(hidden_layers[0], activation=activations[0], 
                                                kernel_regularizer=regularizers.l2(l2_norm)),
                               GraphConvolution(self.n_classes)]
            self.dropout_layer = Dropout(rate=dropout)
            logit = self.propagation(x, adj)
            logit = tf.ensure_shape(logit, (self.n_nodes, self.n_classes))
            output = tf.gather(logit, index)
            output = Softmax()(output)
            model = Model(inputs=[x, adj, index], outputs=output)
            model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(lr=learning_rate), metrics=['accuracy'])

            entropy_loss = entropy_y_x(logit)
            vat_loss = self.virtual_adversarial_loss(x, adj, logit, epsilon)
            model.add_loss(p1 * vat_loss + p2 * entropy_loss)

            self.model = model
            self.adv_optimizer = Adam(lr=learning_rate/10)
            self.built = True
def build_model(hp):

    params = hp.copy()
    params['e_dim'] = params['dim']
    params['r_dim'] = params['dim']
    params['name'] = 'embedding_model'

    embedding_model = models[params['embedding_model']]
    embedding_model = embedding_model(**params)
    triple = Input((3, ))
    ftriple = Input((3, ))

    inputs = [triple, ftriple]

    score = embedding_model(triple)
    fscore = embedding_model(ftriple)

    loss_function = loss_function_lookup(params['loss_function'])
    loss = loss_function(score, fscore, params['margin'] or 1, 1)

    model = Model(inputs=inputs, outputs=loss)
    model.add_loss(loss)

    model.compile(optimizer=Adam(learning_rate=ExponentialDecay(
        params['learning_rate'], decay_steps=100000, decay_rate=0.96)),
                  loss=None)

    return model
Example #4
0
def create_model(trainable=False):
    model = MobileNetV2(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
                        include_top=False,
                        alpha=ALPHA,
                        weights="imagenet")

    for layer in model.layers:
        layer.trainable = trainable

    block = model.get_layer("block_16_project_BN").output

    x = Conv2D(112,
               padding="same",
               kernel_size=3,
               strides=1,
               activation="relu")(block)
    x = Conv2D(112, padding="same", kernel_size=3, strides=1,
               use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)

    x = Conv2D(5, padding="same", kernel_size=1, activation="sigmoid")(x)

    model = Model(inputs=model.input, outputs=x)

    # divide by 2 since d/dweight learning_rate * weight^2 = 2 * learning_rate * weight
    # see https://arxiv.org/pdf/1711.05101.pdf
    regularizer = l2(WEIGHT_DECAY / 2)
    for weight in model.trainable_weights:
        with tf.keras.backend.name_scope("weight_regularizer"):
            model.add_loss(regularizer(weight))

    return model
Example #5
0
    def build(self,
              hiddens=[16],
              activations=['relu'],
              dropout=0.5,
              l2_norm=5e-4,
              use_bias=False,
              lr=0.01,
              p1=1.4,
              p2=0.7):

        if self.kind == "P":
            raise RuntimeError(
                f"Currently {self.name} only supports for tensorflow backend.")

        with tf.device(self.device):

            x = Input(batch_shape=[None, self.graph.n_attrs],
                      dtype=self.floatx,
                      name='attr_matrix')
            adj = Input(batch_shape=[None, None],
                        dtype=self.floatx,
                        sparse=True,
                        name='adj_matrix')
            index = Input(batch_shape=[None],
                          dtype=self.intx,
                          name='node_index')

            GCN_layers = []
            for hidden, activation in zip(hiddens, activations):
                GCN_layers.append(
                    GraphConvolution(
                        hidden,
                        activation=activation,
                        use_bias=use_bias,
                        kernel_regularizer=regularizers.l2(l2_norm)))

            GCN_layers.append(
                GraphConvolution(self.graph.n_classes, use_bias=use_bias))
            self.GCN_layers = GCN_layers
            self.dropout = Dropout(rate=dropout)

            logit = self.forward(x, adj)
            output = Gather()([logit, index])

            model = Model(inputs=[x, adj, index], outputs=output)
            model.compile(loss=SparseCategoricalCrossentropy(from_logits=True),
                          optimizer=Adam(lr=lr),
                          metrics=['accuracy'])

            self.r_vadv = tf.Variable(TruncatedNormal(stddev=0.01)(
                shape=[self.graph.n_nodes, self.graph.n_attrs]),
                                      name="r_vadv")
            entropy_loss = entropy_y_x(logit)
            vat_loss = self.virtual_adversarial_loss(x, adj, logit)
            model.add_loss(p1 * vat_loss + p2 * entropy_loss)

            self.model = model
            self.adv_optimizer = Adam(lr=lr / 10)
Example #6
0
    def build(self,
              hidden_layers=[64],
              activations=['relu'],
              use_bias=False,
              dropout=0.6,
              learning_rate=0.01,
              l2_norm=1e-4,
              para_kl=5e-4,
              gamma=1.0):

        x = Input(batch_shape=[self.n_nodes, self.n_features],
                  dtype=tf.float32,
                  name='features')
        adj = [
            Input(batch_shape=[self.n_nodes, self.n_nodes],
                  dtype=tf.float32,
                  sparse=True,
                  name='adj_matrix_1'),
            Input(batch_shape=[self.n_nodes, self.n_nodes],
                  dtype=tf.float32,
                  sparse=True,
                  name='adj_matrix_2')
        ]
        index = Input(batch_shape=[None], dtype=tf.int32, name='index')

        h = Dropout(rate=dropout)(x)
        h, KL_divergence = GaussionConvolution_F(
            hidden_layers[0],
            gamma=gamma,
            use_bias=use_bias,
            activation=activations[0],
            kernel_regularizer=regularizers.l2(l2_norm))([h, *adj])

        # additional layers (usually unnecessay)
        for hid, activation in zip(hidden_layers[1:], activations[1:]):
            h = Dropout(rate=dropout)(h)
            h = GaussionConvolution_D(hid,
                                      gamma=gamma,
                                      use_bias=use_bias,
                                      activation=activation)([h, *adj])

        h = Dropout(rate=dropout)(h)
        h = GaussionConvolution_D(self.n_classes,
                                  gamma=gamma,
                                  use_bias=use_bias)([h, *adj])
        h = tf.ensure_shape(h, [self.n_nodes, self.n_classes])
        h = tf.gather(h, index)
        output = Softmax()(h)

        model = Model(inputs=[x, *adj, index], outputs=output)
        model.compile(loss='sparse_categorical_crossentropy',
                      optimizer=Adam(lr=learning_rate),
                      metrics=['accuracy'])
        model.add_loss(para_kl * KL_divergence)

        self.model = model
        self.built = True
Example #7
0
    def _build_model(self, training=True):

        inputs = Input(shape=(self.n_features_, ))
        x = Dense(self.hidden_neurons_[0],
                  use_bias=False,
                  activation=self.hidden_activation,
                  activity_regularizer=l2(self.l2_regularizer))(inputs)
        for hidden_neurons in self.hidden_neurons_[1:-1]:
            x = Dense(hidden_neurons,
                      use_bias=False,
                      activation=self.hidden_activation,
                      activity_regularizer=l2(self.l2_regularizer))(x)
            x = Dropout(self.dropout_rate)(x)

        # add name to last hidden layer
        x = Dense(self.hidden_neurons_[-1],
                  use_bias=False,
                  activation=self.hidden_activation,
                  activity_regularizer=l2(self.l2_regularizer),
                  name='net_output')(x)

        # build distance loss
        dist = tf.math.reduce_sum((x - self.c)**2, axis=-1)
        outputs = dist
        loss = tf.math.reduce_mean(dist)

        # Instantiate Deep SVDD
        dsvd = Model(inputs, outputs)

        # Weight decay
        w_d = 1e-6 * sum([np.linalg.norm(w) for w in dsvd.get_weights()])

        # Use AutoEncoder version of DeepSVDD
        if self.use_ae:
            for reversed_neurons in self.hidden_neurons_[::-1]:
                x = Dense(reversed_neurons,
                          use_bias=False,
                          activation=self.hidden_activation,
                          activity_regularizer=l2(self.l2_regularizer))(x)
                x = Dropout(self.dropout_rate)(x)
            x = Dense(self.n_features_,
                      use_bias=False,
                      activation=self.output_activation,
                      activity_regularizer=l2(self.l2_regularizer))(x)
            dsvd.add_loss(loss +
                          tf.math.reduce_mean(tf.math.square(x - inputs)) +
                          w_d)
        else:
            dsvd.add_loss(loss + w_d)

        dsvd.compile(optimizer=self.optimizer)

        if self.verbose >= 1 and training:
            print(dsvd.summary())
        return dsvd
Example #8
0
    def build(self,
              hiddens=[16],
              activations=['relu'],
              dropout=0.,
              lr=0.01,
              l2_norm=5e-4,
              p1=1.4,
              p2=0.7,
              use_bias=False,
              epsilon=0.01):

        with tf.device(self.device):

            x = Input(batch_shape=[None, self.graph.n_attrs],
                      dtype=self.floatx,
                      name='attr_matrix')
            adj = Input(batch_shape=[None, None],
                        dtype=self.floatx,
                        sparse=True,
                        name='adj_matrix')
            index = Input(batch_shape=[None],
                          dtype=self.intx,
                          name='node_index')

            GCN_layers = []
            dropout_layers = []
            for hidden, activation in zip(hiddens, activations):
                GCN_layers.append(
                    GraphConvolution(
                        hidden,
                        activation=activation,
                        use_bias=use_bias,
                        kernel_regularizer=regularizers.l2(l2_norm)))
                dropout_layers.append(Dropout(rate=dropout))

            GCN_layers.append(
                GraphConvolution(self.graph.n_classes, use_bias=use_bias))
            self.GCN_layers = GCN_layers
            self.dropout_layers = dropout_layers

            logit = self.forward(x, adj)
            output = Gather()([logit, index])

            model = Model(inputs=[x, adj, index], outputs=output)
            model.compile(loss=SparseCategoricalCrossentropy(from_logits=True),
                          optimizer=Adam(lr=lr),
                          metrics=['accuracy'])

            entropy_loss = entropy_y_x(logit)
            vat_loss = self.virtual_adversarial_loss(x, adj, logit, epsilon)
            model.add_loss(p1 * vat_loss + p2 * entropy_loss)

            self.model = model
            self.adv_optimizer = Adam(lr=lr / 10)
def VAE_2():
    #,batch_size= _BatchSize
    # inputs = Input(shape=(28*28), name='encoder_input',batch_size= _BatchSize) # 使用方法3时 指定batch_size
    inputs = Input(shape=(28 * 28), name='encoder_input')
    x = layers.Dense(128, activation='relu')(inputs)
    z_mean = layers.Dense(2, name='z_mean')(x)
    z_log_var = layers.Dense(2, name='z_log_var')(x)

    # 方法1: 直接把采样嵌入到模型中!
        # 1. 设定一个正太分布
    eps = tf.random.normal((tf.shape(z_mean)[0],tf.shape(z_mean)[1]))
        # 2. 获得标准方差
    std = tf.exp(z_log_var)
        # 3. 通过元素乘法进行采样
    Sample_Z = layers.Add()([z_mean, layers.Multiply()([eps, std])])

    # 方法2: 使用匿名函数Lambda配合sampling函数对层中每一个元素都进行操作
    # Sample_Z = layers.Lambda(sampling, name='z')([z_mean, z_log_var])

    # 方法3: 自定义子类: 抽样层,但是此法和嵌入模型中没有区别,注意 使用此法 需要在两个Input函数中指定 batchsize = _BatchSize
    # Sample_Z = Sample(z_log_var)(z_mean,z_log_var)


    # instantiate encoder model
    encoder = Model(inputs, [z_mean, z_log_var, Sample_Z], name='encoder')
    # encoder.summary()

    # build decoder model
    # latent_inputs = Input(shape=(2), name='z_sampling',batch_size= _BatchSize) # 使用方法3时指定batch_size
    latent_inputs = Input(shape=(2), name='z_sampling')
    x = layers.Dense(128, activation='relu')(latent_inputs)
    outputs = layers.Dense(28*28, activation='sigmoid')(x)

    # instantiate decoder model
    decoder = Model(latent_inputs, outputs, name='decoder')
    # decoder.summary()

    # instantiate VAE model
    outputs = decoder(encoder(inputs)[2])
    vae = Model(inputs = inputs, outputs = outputs, name='vae_mlp')

    # 加入loss
    reconstruction_loss = tf.keras.losses.BinaryCrossentropy(reduction='sum',name='binary_crossentropy')(inputs, outputs)

    kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
    kl_loss =-0.5 *  tf.reduce_mean(kl_loss)
    # 如果这里的 kl_loss =-0.5 *  tf.reduce_sum(kl_loss) 那么就会发生和第一个里面一样的错误
    vae.add_loss(kl_loss)
    vae.add_metric(kl_loss, name='kl_loss',aggregation='mean')
    vae.add_loss(reconstruction_loss)
    vae.add_metric(reconstruction_loss, name='mse_loss',aggregation='mean')

    return vae,encoder,decoder
Example #10
0
 def ss(self, args, ripple_set):
     #super(RippleNet,self).__init__()
     #self._parse_args(args,ripple_set)
     self._build_embeddings()
     self._build_inputs()
     self._build_model()
     self._build_loss()
     print('self.user_id_shape', self.user_id.shape)
     model = Model(inputs=[self.user_id, self.item_id], outputs=self.score)
     model.add_loss(self.l2.l2 * self.l2_loss)
     model.add_loss(self.kge_weight * -self.kge_loss)
     return model
Example #11
0
    def build_RaGAN(self):
        def interpolating(x):
            u = K.random_uniform((K.shape(x[0])[0], ) + (1, ) *
                                 (K.ndim(x[0]) - 1))
            return x[0] * u + x[1] * (1 - u)

        def comput_loss(x):
            real, fake = x
            fake_logit = fake - K.mean(real)
            #            fake_logit = K.sigmoid(fake - K.mean(real))
            real_logit = real - K.mean(fake)
            #            real_logit = K.sigmoid(real - K.mean(fake))
            return [fake_logit, real_logit]

        # Input HR images
        imgs_hr = Input(self.shape_hr)
        generated_hr = Input(self.shape_hr)

        # Create a high resolution image from the low resolution one
        real_discriminator_logits = self.discriminator(imgs_hr)
        fake_discriminator_logits = self.discriminator(generated_hr)

        total_loss = Lambda(comput_loss, name='comput_loss')(
            [real_discriminator_logits, fake_discriminator_logits])
        # Output tensors to a Model must be the output of a Keras `Layer`
        fake_logit = Lambda(lambda x: x, name='fake_logit')(total_loss[0])
        real_logit = Lambda(lambda x: x, name='real_logit')(total_loss[1])

        #        dis_loss = K.mean(K.binary_crossentropy(K.zeros_like(fake_logit), fake_logit) +
        #                          K.binary_crossentropy(K.ones_like(real_logit), real_logit))

        epsilon = 0.000001
        dis_loss = -(K.mean(K.log(K.sigmoid(real_logit) + epsilon)) +
                     K.mean(K.log(1 - K.sigmoid(fake_logit) + epsilon)))
        # dis_loss = tf.reduce_mean(
        #     tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(fake_logit), logits=fake_logit) +
        #     tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_likes(real_logit), logits=real_logit))
        # dis_loss = K.mean(- (real_logit - fake_logit)) + 10 * K.mean((grad_norms - 1) ** 2)

        model = Model(inputs=[imgs_hr, generated_hr],
                      outputs=[fake_logit, real_logit])

        model.add_loss(dis_loss)
        model.compile(optimizer=Adam(self.dis_lr))

        model.metrics_names.append('dis_loss')
        model.metrics_tensors.append(dis_loss)

        #        model.summary()
        return model
def build_model(batch_size=1, lr=1e-4):
    # Input Layers
    input_o = Input(shape=num_rooms, dtype=tf.int32, batch_size=batch_size)
    input_p = Input(shape=num_edges, dtype=tf.float32, batch_size=batch_size)
    input_t = Input(shape=input_size_t, dtype=tf.int32, batch_size=batch_size)

    box_gt = Input(shape=(num_rooms, 4),
                   dtype=tf.float32,
                   batch_size=batch_size)
    mask_gt = Input(shape=(num_rooms, mask_size, mask_size),
                    dtype=tf.int32,
                    batch_size=batch_size)

    # Embeddings
    embedding_o = Embedding(input_dim=num_objects,
                            output_dim=embed_dim,
                            input_length=num_rooms,
                            mask_zero=True)(input_o)
    embedding_p = Embedding(input_dim=num_relation,
                            output_dim=embed_dim,
                            input_length=num_edges,
                            mask_zero=True)(input_p)

    # Graph Convolutions
    new_s_obj, new_p_obj = GraphTripleConvNet(input_dim=Din,
                                              hidden_dim=H,
                                              batch_size=batch_size)(
                                                  embedding_o, embedding_p,
                                                  input_t)

    # Box and Mask Regression Nets
    output_box = box_net(gconv_dim=Dout)(new_s_obj)
    output_mask = Mask_regression(num_chan=Dout,
                                  mask_size=mask_size)(new_s_obj)

    output_rel = rel_aux_net(gconv_out=Dout,
                             gconv_hidden_dim=H,
                             out_dim=num_relation,
                             batch_size=batch_size)(embedding_o, output_box,
                                                    input_t)

    # Model
    model = Model([input_o, input_p, input_t, box_gt, mask_gt],
                  [output_box, output_mask, output_rel])
    model.add_loss(
        total_loss(box_gt, mask_gt, input_p, output_box, output_mask,
                   output_rel))
    model.compile(optimizer=optimizers.Adam(learning_rate=lr))

    return model
Example #13
0
    def _build_compile(self, model_input):
        z_mean, z_log_var, z = self.encoder(model_input)
        surved_y_output = self.decoder_y(z)
        surved = Model(model_input, surved_y_output, name='SurVED')

        kl_loss_orig = -0.5 * tf.reduce_mean(z_log_var - tf.square(z_mean) -
                                             tf.exp(z_log_var) + 1)
        kl_loss = kl_loss_orig * self.kl_loss_weight
        surved.add_loss(K.mean(kl_loss))
        surved.add_metric(kl_loss_orig, name='kl_loss', aggregation='mean')
        opt = Adam(lr=self.surved_lr)
        surved.compile(loss=self._get_loss(),
                       optimizer=opt,
                       metrics=[self.cindex, self.surv_mse_loss])
        return surved
Example #14
0
def create_autoencoder(block, reencode=False):
    encoder = create_encoder(block)
    decoder = create_decoder(block)

    image = encoder.input
    encoded, *masks = encoder(image)
    decoded = decoder([encoded, *masks])
    outputs = [decoded]

    if reencode:
        encoder2 = create_encoder(block)
        reencoded, *_masks = encoder(decoded)

    autoencoder = Model(image, outputs=outputs)
    if reencode:
        autoencoder.add_loss(
            content_feature_loss(image, encoded, decoded, reencoded))
    return autoencoder
Example #15
0
def FirstStageModel(input_shape,
                    latent_dim,
                    base_dim=32,
                    fc_dim=512,
                    kernel_size=3,
                    num_scale=3,
                    block_per_scale=1,
                    depth_per_block=2):
    # base_dim refers to channels; they are doubled at each downscaling
    desired_scale = input_shape[1]
    scales, dims = [], []
    current_scale, current_dim = 4, base_dim
    while current_scale <= desired_scale:
        scales.append(current_scale)
        dims.append(current_dim)
        current_scale *= 2
        current_dim = min(current_dim * 2, 1024)
    assert (scales[-1] == desired_scale)
    dims = list(reversed(dims))
    print(dims, scales)

    encoder1 = Encoder1(input_shape, base_dim, kernel_size, num_scale,
                        block_per_scale, depth_per_block, fc_dim, latent_dim)
    decoder1 = Decoder1(input_shape, latent_dim, dims, scales, kernel_size,
                        block_per_scale, depth_per_block)

    x = Input(shape=input_shape)
    gamma = Input(shape=())  #adaptive gamma parameter
    z_mean, z_log_var, z = encoder1(x)
    x_hat = decoder1(z)
    vae1 = Model([x, gamma], x_hat)

    #loss
    k = (2 * input_shape[1] / latent_dim)**2
    L_rec = 0.5 * K.sum(K.square(x - x_hat), axis=[1, 2, 3]) / gamma
    L_KL = 0.5 * K.sum(K.square(z_mean) + K.exp(z_log_var) - 1 - z_log_var,
                       axis=-1)
    L_tot = K.mean(L_rec + k * L_KL)

    vae1.add_loss(L_tot)

    return (vae1, encoder1, decoder1)
    def build_model(self, n_links, params):
        self.day_hour_dim = params.get('dayHourDim', 4)
        self.lr = params.get('lr', 0.01)
        self.dropout_prop = params.get('dropoutProp', 0.1)
        in_day_time = Input(shape = (1, ), name = 'day_time')
        in_y_true = Input(shape = (n_links, ), name = 'y_true')
        in_mask = Input(shape = (n_links, ), name = 'mask')
        

        l_day_time = Embedding(7 * 24, self.day_hour_dim, name='embed_day_time')(in_day_time)
        l_day_time = Reshape((self.day_hour_dim,), name='reshape_day_time')(l_day_time)
        out_run = Dense(n_links // 3, name='fc_1', activation='relu')(l_day_time)
        out_run = Dropout(self.dropout_prop, name='dropout')(out_run)
        out_run = Dense(n_links, name='fc_out', activation='relu')(out_run)

        opt = tf.keras.optimizers.RMSprop(learning_rate=self.lr)
        model = Model(inputs = [in_day_time, in_y_true, in_mask], outputs = [out_run])
        model.add_loss(self.loss_fcn(in_y_true, out_run, in_mask))
        model.compile(opt)
        return model
Example #17
0
def init_detection_model(model_conf, trainable=False):
    """
    :param trainable: using pretrained model configuration or not
    :param model_conf: loaded from json configuration model scheme
    :return:
    """

    IMAGE_SIZE = model_conf['IMAGE_SIZE']
    WEIGHT_DECAY = model_conf['WEIGHT_DECAY']
    ALPHA = model_conf['ALPHA']
    model = MobileNetV2(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
                        include_top=False,
                        alpha=ALPHA,
                        weights="imagenet")

    for layer in model.layers:
        layer.trainable = trainable

    block = model.get_layer("block_16_project_BN").output

    x = Conv2D(112,
               padding="same",
               kernel_size=3,
               strides=1,
               activation="relu")(block)
    x = Conv2D(112, padding="same", kernel_size=3, strides=1,
               use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation("relu")(x)
    x = Conv2D(5, padding="same", kernel_size=1, activation="sigmoid")(x)

    model = Model(inputs=model.input, outputs=x)

    # divide by 2 since d/dweight learning_rate * weight^2 = 2 * learning_rate * weight
    # see https://arxiv.org/pdf/1711.05101.pdf
    regularizer = l2(WEIGHT_DECAY / 2)
    for weight in model.trainable_weights:
        with tf.keras.backend.name_scope("weight_regularizer"):
            model.add_loss(regularizer(weight))

    return model
Example #18
0
def build(
    latent_dim,
    input_shape,
    repeat=1,
    use_inception=True,
    batch_size=1,
    learning_rate=1e-4,
):
    encoder_input, encoder = _build_encoder(
        input_shape, latent_dim, repeat, use_inception
    )
    decoder_input, decoder = _build_decoder(
        latent_dim, input_shape, repeat, use_inception
    )
    z_mean, z_log_var, z = encoder(encoder_input)
    decoder_output = decoder(z)
    model = Model(encoder_input, decoder_output, name="vae")

    print(f"Encoder input: {encoder_input.shape}")
    print(f"Decoder output: {decoder_output.shape}")
    encoder_input.shape.assert_is_compatible_with(decoder_output.shape)
    #     assert encoder_input.shape.as_list() == decoder_output.shape.as_list()

    reconstruction_loss = ReconstructionLoss(mean=True)([encoder_input, decoder_output])
    #     reconstruction_loss = tf.losses.mse(encoder_input, decoder_output)
    #     reconstruction_loss = tf.reduce_sum(reconstruction_loss, axis=[1, 2])
    kl_loss = KLLoss(mean=True)([z, z_mean, z_log_var])
    #     logpz = log_normal_pdf(z, 0.0, 0.0)
    #     logqz_x = log_normal_pdf(z, z_mean, z_log_var)
    #     kl_loss = logqz_x - logpz
    vae_loss = reconstruction_loss + kl_loss
    model.add_loss(vae_loss)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate))
    # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate), loss=lambda yt, yp: vae_loss)

    model.add_metric(
        reconstruction_loss, aggregation="mean", name="reconstruction_loss"
    )
    model.add_metric(kl_loss, aggregation="mean", name="kl_loss")
    return model, encoder, decoder
def make_backbone(num_states,
                  hidden_units,
                  num_actions,
                  dropout_reg=1e-5,
                  wd=1e-3):
    """
    Build a tensorflow keras backbone model utilizing concrete dropout layers.
    """
    losses: list = []
    inp = Input(shape=(num_states, ))
    x = inp

    for i in hidden_units:
        x, loss = ConcreteDropout(Dense(i, activation='relu'),
                                  weight_regularizer=wd,
                                  dropout_regularizer=dropout_reg)(x)
        losses.append(loss)

    x = Dense(100, activation='relu')(x)
    out = Dense(num_actions, activation='linear')(x)
    model = Model(inp, out)
    model.add_loss(losses)

    return model
Example #20
0
class VAE(object):
    def __init__(self,
                 original_dimension=784,
                 encoding_dimension=512,
                 latent_dimension=2):
        self.original_dimension = original_dimension
        self.encoding_dimension = encoding_dimension
        self.latent_dimension = latent_dimension

        self.z_log_var = None
        self.z_mean = None

        self.inputs = None
        self.outputs = None

        self.encoder = None
        self.decoder = None
        self.vae = None

    def build_vae(self):
        # Build encoder
        self.inputs = Input(shape=(self.original_dimension, ))
        x = Dense(self.encoding_dimension)(self.inputs)
        x = ReLU()(x)
        self.z_mean = Dense(self.latent_dimension)(x)
        self.z_log_var = Dense(self.latent_dimension)(x)

        z = Lambda(sampling)([self.z_mean, self.z_log_var])

        self.encoder = Model(self.inputs, [self.z_mean, self.z_log_var, z])

        # Build decoder
        latent_inputs = Input(shape=(self.latent_dimension, ))
        x = Dense(self.encoding_dimension)(latent_inputs)
        x = ReLU()(x)
        self.outputs = Dense(self.original_dimension)(x)
        self.outputs = Activation('sigmoid')(self.outputs)
        self.decoder = Model(latent_inputs, self.outputs)

        # Build end-to-end VAE.
        self.outputs = self.encoder(self.inputs)[2]
        self.outputs = self.decoder(self.outputs)
        self.vae = Model(self.inputs, self.outputs)

    @tf.function
    def train(self, X_train, X_test, epochs=50, batch_size=64):
        reconstruction_loss = mse(self.inputs, self.outputs)
        reconstruction_loss *= self.original_dimension

        kl_loss = (1 + self.z_log_var - K.square(self.z_mean) -
                   K.exp(self.z_log_var))
        kl_loss = K.sum(kl_loss, axis=-1)
        kl_loss *= -0.5

        vae_loss = K.mean(reconstruction_loss + kl_loss)

        self.vae.add_loss(vae_loss)
        self.vae.compile(optimizer=Adam(lr=1e-3))
        self.vae.fit(X_train,
                     epochs=epochs,
                     batch_size=batch_size,
                     validation_data=(X_test, None))

        return self.encoder, self.decoder, self.vae
Example #21
0
    def vae(self, config):

        enc_units = config["enc_units"]
        encoder_layers = len(enc_units)
        dec_units = config["dec_units"]
        decoder_layers = len(dec_units)
        interm_dim = config["interm_dim"]
        latent_dim = config["latent_dim"]
        activation = config["activation"]
        kernel_initializer = config["kernel_initializer"]
        kernel_regularizer = config["kernel_regularizer"]

        org_inputs = Input(shape=self.image_size[0] * self.image_size[1] *
                           self.image_size[2])
        x = Dense(
            enc_units[0] * 2,
            activation=activation,
            kernel_initializer=kernel_initializer,
            kernel_regularizer=kernel_regularizer,
        )(org_inputs)

        for i in range(encoder_layers):
            x = Dense(
                enc_units[i],
                activation=activation,
                kernel_initializer=kernel_initializer,
                kernel_regularizer=kernel_regularizer,
            )(x)

        x = Dense(
            interm_dim,
            activation=activation,
            kernel_initializer=kernel_initializer,
            kernel_regularizer=kernel_regularizer,
        )(x)

        z_mean = Dense(latent_dim)(x)
        z_var = Dense(latent_dim)(x)

        # Sampling from intermediate dimensiont to get a probability density
        z = Lambda(self.sampling, output_shape=(latent_dim, ))([z_mean, z_var])

        # Encoder model
        enc_model = Model(org_inputs, [z_mean, z_var])

        latent_inputs = Input(shape=(latent_dim, ))
        outputs = Dense(
            dec_units[0] // 2,
            activation=activation,
            kernel_initializer=kernel_initializer,
            kernel_regularizer=kernel_regularizer,
        )(latent_inputs)

        for i in range(decoder_layers):
            outputs = Dense(
                dec_units[i],
                activation=activation,
                kernel_initializer=kernel_initializer,
                kernel_regularizer=kernel_regularizer,
            )(outputs)

        final_outputs = Dense(self.image_size[0] * self.image_size[1] *
                              self.image_size[2],
                              activation="sigmoid")(outputs)

        # Decoder model
        dec_model = Model(latent_inputs, final_outputs)

        out = dec_model(z)
        model = Model(org_inputs, out)

        kl_loss = -0.5 * tf.math.reduce_mean(z_var - tf.math.square(z_mean) -
                                             tf.math.exp(z_var) + 1)
        model.add_loss(kl_loss)

        return model
Example #22
0
def build_model(hp):

    params = hp.values.copy()

    params1 = {k.replace('1', ''): params[k] for k in params if not '2' in k}
    params2 = {k.replace('2', ''): params[k] for k in params if not '1' in k}

    params1['e_dim'], params1['r_dim'] = params1['dim'], params1['dim']
    params2['e_dim'], params2['r_dim'] = params2['dim'], params2['dim']

    m1 = models[params1['embedding_model']]
    m2 = models[params2['embedding_model']]

    embedding_model1 = m1(**params1)
    embedding_model2 = m2(**params2)

    triple1 = Input((3, ))
    triple2 = Input((3, ))
    ci = Input((1, ))
    si = Input((1, ))
    conc = Input((1, ))
    inputs = [triple1, triple2, ci, si, conc]

    _, l1 = embedding_model1(triple1)
    _, l2 = embedding_model2(triple2)

    c = embedding_model1.entity_embedding(ci)
    s = embedding_model2.entity_embedding(si)
    c = tf.squeeze(c, axis=1)
    s = tf.squeeze(s, axis=1)

    for i, layer_num in enumerate(
            range(params['branching_num_layers_chemical'])):
        c = Dense(params['branching_units_chemical_' + str(i + 1)],
                  activation='relu')(c)
        c = Dropout(0.2)(c)

    for i, layer_num in enumerate(range(
            params['branching_num_layers_species'])):
        s = Dense(params['branching_units_species_' + str(i + 1)],
                  activation='relu')(s)
        s = Dropout(0.2)(s)

    for i, layer_num in enumerate(
            range(hp.Int('branching_num_layers_conc', 0, 3, default=1))):
        conc = Dense(params['branching_units_conc_' + str(i + 1)],
                     activation='relu')(conc)
        conc = Dropout(0.2)(conc)

    x = Concatenate(axis=-1)([c, s, conc])

    for i, layer_num in enumerate(range(hp.Int('num_layers', 0, 3,
                                               default=1))):
        x = Dense(params['units_' + str(i + 1)], activation='relu')(x)
        x = Dropout(0.2)(x)

    x = Dense(params['output_dim'], activation='sigmoid', name='output_1')(x)

    model = Model(inputs=inputs, outputs=[x])
    model.add_loss(params1['loss_weight'] * l1 / 2 +
                   params2['loss_weight'] * l2 / 2)

    model.compile(
        optimizer=Adam(learning_rate=params['learning_rate']),
        loss={'output_1': 'binary_crossentropy'},
        loss_weights={'output_1': params['classification_loss_weight']},
        metrics=['acc', f1, f2, Precision(),
                 Recall(), AUC()])

    return model
Example #23
0
def build_model(hp, norm_params=None):

    params = hp.copy()

    params1 = {k.replace('1', ''): params[k] for k in params if not '2' in k}
    params2 = {k.replace('2', ''): params[k] for k in params if not '1' in k}

    params1['e_dim'], params1['r_dim'] = params1['dim'], params1['dim']
    params2['e_dim'], params2['r_dim'] = params2['dim'], params2['dim']
    params1['name'] = 'chemical_embedding_model'
    params2['name'] = 'species_embedding_model'

    m1 = models[params1['embedding_model']]
    m2 = models[params2['embedding_model']]

    embedding_model1 = m1(**params1)
    embedding_model2 = m2(**params2)

    triple1 = Input((3, ))
    triple2 = Input((3, ))
    ftriple1 = Input((3, ))
    ftriple2 = Input((3, ))

    ci = Input((1, ))
    si = Input((1, ))
    conc = Input((1, ))
    inputs = [triple1, ftriple1, triple2, ftriple2, ci, si, conc]

    score1 = embedding_model1(triple1)
    fscore1 = embedding_model1(ftriple1)
    loss_function1 = loss_function_lookup(params1['loss_function'])
    loss1 = loss_function1(score1, fscore1, params1['margin'] or 1, 1)

    score2 = embedding_model2(triple2)
    fscore2 = embedding_model2(ftriple2)
    loss_function2 = loss_function_lookup(params2['loss_function'])
    loss2 = loss_function2(score2, fscore2, params2['margin'] or 1, 1)

    c = embedding_model1.entity_embedding(ci)
    s = embedding_model2.entity_embedding(si)
    c = tf.squeeze(c, axis=1)
    s = tf.squeeze(s, axis=1)

    c = LayerNormalization(axis=-1)(c)
    s = LayerNormalization(axis=-1)(s)

    x = base_model(c, s, conc, params)

    model = Model(inputs=inputs, outputs=[x])
    model.add_loss(params1['loss_weight'] * loss1 +
                   params2['loss_weight'] * loss2)

    if params['use_pretrained']:
        for layer in embedding_model1.layers:
            if isinstance(layer, Embedding):
                layer.trainable = False
        for layer in embedding_model2.layers:
            if isinstance(layer, Embedding):
                layer.trainable = False

    compile_model(model, hp)

    return model
Example #24
0
def build_model():

    encoder_input = Input(shape=(time_step, input_dim), name='encoder_input')

    rnn1 = Bidirectional(GRU(rnn_dim, return_sequences=True),
                         name='rnn1')(encoder_input)
    rnn2 = Bidirectional(GRU(rnn_dim), name='rnn2')(rnn1)

    z_mean = Dense(z_dim, name='z_mean')(rnn2)
    z_log_var = Dense(z_dim, name='z_log_var')(rnn2)

    def sampling(args):
        z_mean, z_log_var = args
        batch = K.shape(z_mean)[0]
        dim = K.int_shape(z_mean)[1]
        # by default, random_normal has mean=0 and std=1.0
        epsilon = K.random_normal(shape=(batch, dim))
        return z_mean + K.exp(0.5 * z_log_var) * epsilon

    z = Lambda(sampling, output_shape=(z_dim, ), name='z')([z_mean, z_log_var])

    class kl_beta(tf.keras.layers.Layer):
        def __init__(self):
            super(kl_beta, self).__init__()

            # your variable goes here
            self.beta = tf.Variable(0.0, trainable=False, dtype=tf.float32)

        def call(self, inputs, **kwargs):
            # your mul operation goes here
            return -self.beta * inputs

    beta = kl_beta()
    encoder = Model(encoder_input, z, name='encoder')

    # decoder

    decoder_latent_input = Input(shape=z_dim, name='z_sampling')

    repeated_z = RepeatVector(time_step,
                              name='repeated_z_tension')(decoder_latent_input)

    rnn1_output = GRU(rnn_dim, name='decoder_rnn1',
                      return_sequences=True)(repeated_z)

    rnn2_output = GRU(rnn_dim, name='decoder_rnn2',
                      return_sequences=True)(rnn1_output)

    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss = tf.reduce_mean(kl_loss)

    kl_loss = 0.5 * kl_loss

    kl_loss = beta(kl_loss)
    tensile_middle_output = TimeDistributed(
        Dense(tension_middle_dim, activation='elu'),
        name='tensile_strain_dense1')(rnn2_output)

    tensile_output = TimeDistributed(
        Dense(tension_output_dim, activation='elu'),
        name='tensile_strain_dense2')(tensile_middle_output)

    diameter_middle_output = TimeDistributed(
        Dense(tension_middle_dim, activation='elu'),
        name='diameter_strain_dense1')(rnn2_output)

    diameter_output = TimeDistributed(
        Dense(tension_output_dim, activation='elu'),
        name='diameter_strain_dense2')(diameter_middle_output)

    melody_rhythm_1 = TimeDistributed(Dense(start_middle_dim,
                                            activation='elu'),
                                      name='melody_start_dense1')(rnn2_output)
    melody_rhythm_output = TimeDistributed(
        Dense(melody_note_start_dim, activation='sigmoid'),
        name='melody_start_dense2')(melody_rhythm_1)

    melody_pitch_1 = TimeDistributed(Dense(melody_bass_dense_1_dim,
                                           activation='elu'),
                                     name='melody_pitch_dense1')(rnn2_output)

    melody_pitch_output = TimeDistributed(
        Dense(melody_output_dim, activation='softmax'),
        name='melody_pitch_dense2')(melody_pitch_1)

    bass_rhythm_1 = TimeDistributed(Dense(start_middle_dim, activation='elu'),
                                    name='bass_start_dense1')(rnn2_output)

    bass_rhythm_output = TimeDistributed(
        Dense(bass_note_start_dim, activation='sigmoid'),
        name='bass_start_dense2')(bass_rhythm_1)

    bass_pitch_1 = TimeDistributed(Dense(melody_bass_dense_1_dim,
                                         activation='elu'),
                                   name='bass_pitch_dense1')(rnn2_output)
    bass_pitch_output = TimeDistributed(Dense(bass_output_dim,
                                              activation='softmax'),
                                        name='bass_pitch_dense2')(bass_pitch_1)

    decoder_output = [
        melody_pitch_output, melody_rhythm_output, bass_pitch_output,
        bass_rhythm_output, tensile_output, diameter_output
    ]

    decoder = Model(decoder_latent_input, decoder_output, name='decoder')

    model_input = encoder_input

    vae = Model(model_input,
                decoder(encoder(model_input)),
                name='encoder_decoder')

    vae.add_loss(kl_loss)

    vae.add_metric(kl_loss, name='kl_loss', aggregation='mean')

    optimizer = keras.optimizers.Adam()

    vae.compile(optimizer=optimizer,
                loss=[
                    'categorical_crossentropy', 'binary_crossentropy',
                    'categorical_crossentropy', 'binary_crossentropy', 'mse',
                    'mse'
                ],
                metrics=[[keras.metrics.CategoricalAccuracy()],
                         [keras.metrics.BinaryAccuracy()],
                         [keras.metrics.CategoricalAccuracy()],
                         [keras.metrics.BinaryAccuracy()],
                         [keras.metrics.MeanSquaredError()],
                         [keras.metrics.MeanSquaredError()]])

    return vae
    def build_model(self, n_links, freq, lags, preds, params):
        # Parse parameters
        day_hour_dim = params.get('dayHourDim', 2)
        lr = params.get('lr', 0.01)
        dropout_prop = params.get('dropoutProp', 0.1)
        rnn_hidden_state = params.get('rnnHiddenState', 10)

        time_steps_per_day = int(
            pd.to_timedelta('24H') / pd.to_timedelta(freq))

        # Inputs
        in_dow_tod = Input(shape=(lags + preds, ), name='dow_tod')
        in_lags = Input(shape=(
            lags,
            n_links,
        ), name='lags')
        in_y_true = Input(shape=(
            preds,
            n_links,
        ), name='y_true')
        in_mask = Input(shape=(
            preds,
            n_links,
        ), name='mask')

        # We feed the entire time stampts through the same embeddings layer for coherent learning
        time_embedding = Embedding(7 * time_steps_per_day,
                                   day_hour_dim,
                                   name='time_embedding')(in_dow_tod)
        time_embedding_lags = tf.keras.layers.Lambda(lambda x: x[:, :lags, :])(
            time_embedding)
        time_embedding_preds = tf.keras.layers.Lambda(
            lambda x: x[:, -preds:, :])(time_embedding)

        # Pre-processing, encoder input
        bn_lags = tf.keras.layers.BatchNormalization(name='bn_1')(in_lags)
        concat_lags_time = tf.keras.layers.Concatenate(
            name=f'concat_lags_time')([time_embedding_lags, bn_lags])

        # Encoder
        rnn_1 = tf.keras.layers.GRU(rnn_hidden_state,
                                    return_sequences=True,
                                    return_state=True,
                                    unroll=True,
                                    name=f'encoder',
                                    activation='tanh',
                                    dropout=dropout_prop,
                                    recurrent_dropout=dropout_prop)
        out_rnn, encoder_state = rnn_1(concat_lags_time)
        out_rnn = tf.keras.layers.Lambda(lambda x: x[:, -preds:, :],
                                         name=f'preds')(out_rnn)
        out_rnn = tf.keras.layers.BatchNormalization(name='bn_preds')(out_rnn)

        # Decoder
        rnn_2 = tf.keras.layers.GRU(rnn_hidden_state,
                                    return_sequences=True,
                                    return_state=False,
                                    unroll=True,
                                    name=f'decoder',
                                    activation='tanh',
                                    dropout=dropout_prop,
                                    recurrent_dropout=dropout_prop)
        out_rnn = rnn_2(out_rnn, initial_state=encoder_state)
        out_rnn = tf.keras.layers.Concatenate(name=f'concat_rnn_time')(
            [time_embedding_preds, out_rnn])

        # Dense
        out_rnn = tf.keras.layers.TimeDistributed(
            tf.keras.layers.Dense(n_links // 3, name='fc_1',
                                  activation='relu'),
            name='time_distributed_fc_1')(out_rnn)
        out_rnn = Dropout(dropout_prop, name='dropout')(out_rnn)
        out = tf.keras.layers.TimeDistributed(
            tf.keras.layers.Dense(n_links, name='fc_out', activation='linear'),
            name='time_distributed_output')(out_rnn)

        opt = tf.keras.optimizers.RMSprop(learning_rate=lr)
        model = Model(inputs=[in_lags, in_dow_tod, in_y_true, in_mask],
                      outputs=[out])
        model.add_loss(self.loss_fcn(in_y_true, out, in_mask))
        model.compile(opt)
        return model
Example #26
0
class WANN(object):
    """
    WANN: Weighting Adversarial Neural Network is an instance-based domain adaptation
    method suited for regression tasks. It supposes the supervised setting where some
    labeled target data are available.
    
    The goal of WANN is to compute a source instances reweighting which correct
    "shifts" between source and target domain. This is done by minimizing the
    Y-discrepancy distance between source and target distributions
    
    WANN involves three networks:
        - the weighting network which learns the source weights.
        - the task network which learns the task.
        - the discrepancy network which is used to estimate a distance 
          between the reweighted source and target distributions: the Y-discrepancy
    
    Parameters
    ----------
    get_base_model: callable, optional
        Constructor for the two networks: task and discrepancer.
        The constructor should take the four following
        arguments:
        - shape: the input shape
        - C: the projecting constant
        - activation: the last layer activation function
        - name: the model name
        If None, get_default_model is used.
        
    get_weighting_model: callable, optional
        Constructor for the weightig network.
        The constructor should take the same arguments 
        as get_base_model.
        If None, get_base_model is used.
        
    C: float, optional (default=1.)
        Projecting constant: networks should be
        regularized by projecting the weights of each layer
        on the ball of radius C.
        
    C_w: float, optional (default=None)
        Projecting constant of the weighting network.
        If None C_w = C.
        
    optimizer: tf.keras Optimizer, optional (default="adam")
        Optimizer of WANN
        
    save_hist: boolean, optional (default=False)
        Wether to save the predicted weights and labels
        at each epochs or not
    """
    
    def __init__(self, get_base_model=None, get_weighting_model=None,
                 C=1., C_w=None, optimizer='adam', save_hist=False):
        
        self.get_base_model = get_base_model
        if self.get_base_model is None:
            self.get_base_model = _get_default_model
        
        self.get_weighting_model = get_weighting_model
        if self.get_weighting_model is None:
            self.get_weighting_model = get_base_model
        
        self.C = C
        self.C_w = C_w
        if self.C_w is None:
            self.C_w = C
        
        self.save_hist = save_hist
        self.optimizer = optimizer
        

    def fit(self, X, y, index=None, weights_target=None, **fit_params):
        """
        Fit WANN
        
        Parameters
        ----------
        X, y: numpy arrays
            Input data
            
        index: iterable
            Index should contains 2 lists or 1D-arrays
            corresponding to:
            index[0]: indexes of source labeled data in X, y
            index[1]: indexes of target labeled data in X, y
            
        weights_target: numpy array, optional (default=None)
            Weights for target sample.
            If None, all weights are set to 1.
            
        fit_params: key, value arguments
            Arguments to pass to the fit method (epochs, batch_size...)
            
        Returns
        -------
        self 
        """
        self.fit_params = fit_params
        assert hasattr(index, "__iter__"), "index should be iterable"
        assert len(index) == 2, "index length should be 2"
        src_index = index[0]
        tgt_index = index[1]
        self._fit(X, y, src_index, tgt_index, weights_target)        
        return self


    def _fit(self, X, y, src_index, tgt_index, weights_target):
        # Resize source and target index to the same length
        max_size = max((len(src_index), len(tgt_index)))
        resize_src_ind = np.array([src_index[i%len(src_index)]
                                   for i in range(max_size)])
        resize_tgt_ind = np.array([tgt_index[i%len(tgt_index)]
                                   for i in range(max_size)])
        
        # If no target weights, all are set to one 
        if weights_target is None:
             resize_weights_target = np.ones(max_size)
        else:
            resize_weights_target = np.array([weights_target[i%len(weights_target)]
                                              for i in range(max_size)])
                     
        # Create WANN model
        if not hasattr(self, "model"):
            self._create_wann(shape=X.shape[1])

        # Callback to save predicted weights and labels
        callbacks = []
        if "callbacks" in self.fit_params:
            callbacks = self.fit_params["callbacks"]
            del self.fit_params["callbacks"]
            
        # Initialize weighting network
        self.weights_predictor.compile(optimizer=copy.deepcopy(self.optimizer), loss="mse") #copy.deepcopy(self.optimizer)
        self.weights_predictor.fit(X[src_index], np.ones(len(src_index)), **self.fit_params)
        
        # Fit
        self.model.fit([X[resize_src_ind], X[resize_tgt_ind],
                        y[resize_src_ind], y[resize_tgt_ind],
                        resize_weights_target],
                       callbacks = callbacks,
                       **self.fit_params)
        return self
            
            
    def _create_wann(self, shape):
        # Build task, weights_predictor and discrepancer network
        # Weights_predictor should end with a relu activation
        self.weights_predictor = self.get_weighting_model(
                shape, activation='relu', C=self.C_w, name="weights")
        self.task = self.get_base_model(
                shape, activation=None, C=self.C, name="task")
        self.discrepancer = self.get_base_model(
                shape, activation=None, C=self.C, name="discrepancer")
        
        # Create input layers for Xs, Xt, ys, yt and target weights
        input_source = Input(shape=(shape,))
        input_target = Input(shape=(shape,))
        output_source = Input(shape=(1,))
        output_target = Input(shape=(1,))
        weights_target = Input(shape=(1,))
        Flip = _GradReverse()
        
        # Get networks output for both source and target
        weights_source = self.weights_predictor(input_source)      
        output_task_s = self.task(input_source)
        output_task_t = self.task(input_target)
        output_disc_s = self.discrepancer(input_source)
        output_disc_t = self.discrepancer(input_target)
        
        # Reversal layer at the end of discrepancer
        output_disc_s = Flip(output_disc_s)
        output_disc_t = Flip(output_disc_t)

        # Create model and define loss
        self.model = Model([input_source, input_target, output_source, output_target, weights_target],
                           [output_task_s, output_task_t, output_disc_s, output_disc_t, weights_source],
                           name='WANN')
            
        loss_task_s = K.mean(multiply([weights_source, K.square(output_source - output_task_s)]))
        loss_task_t = K.mean(multiply([weights_target, K.square(output_target - output_task_t)]))
            
        loss_disc_s = K.mean(multiply([weights_source, K.square(output_source - output_disc_s)]))
        loss_disc_t = K.mean(multiply([weights_target, K.square(output_target - output_disc_t)]))
            
        loss_task = loss_task_s #+ loss_task_t
        loss_disc = loss_disc_t - loss_disc_s
                         
        loss = loss_task + loss_disc
   
        self.model.add_loss(loss)
        self.model.add_metric(tf.reduce_sum(K.mean(weights_source)), name="weights", aggregation="mean")
        self.model.add_metric(tf.reduce_sum(loss_task_s), name="task_s", aggregation="mean")
        self.model.add_metric(tf.reduce_sum(loss_task_t), name="task_t", aggregation="mean")
        self.model.add_metric(tf.reduce_sum(loss_disc), name="disc", aggregation="mean")
        self.model.add_metric(tf.reduce_sum(loss_disc_s), name="disc_s", aggregation="mean")
        self.model.add_metric(tf.reduce_sum(loss_disc_t), name="disc_t", aggregation="mean")
        self.model.compile(optimizer=self.optimizer)
        return self
    
    
    def predict(self, X):
        """
        Predict method: return the prediction of task network
        
        Parameters
        ----------
        X: array
            input data
            
        Returns
        -------
        y_pred: array
            prediction of task network
        """
        return self.task.predict(X)
    
    
    def get_weight(self, X):
        """
        Return the predictions of weighting network
        
        Parameters
        ----------
        X: array
            input data
            
        Returns
        -------
        array:
            weights
        """
        return self.weights_predictor.predict(X)
    
    
    def save(self, path):
        """
        Save task network
        
        Parameters
        ----------
        path: str
            path where to save the model
        """
        self.task.save(path)
        self.weights_predictor.save(path + "_weights")
        return self
Example #27
0
class ADDA:
    """
    ADDA: Adversarial Discriminative Domain Adaptation

    ADDA is a feature-based domain adaptation method.
    
    The purpose of ADDA is to build a new feature representation
    in which source and target data could not be distinguished by
    any **discriminator** network. This feature representation is
    built with two **encoder** networks:
    
    - a **source encoder** trained to provide good features in order
      to learn the task on the source domain. The task is learned
      through a **task** network trained with the **source encoder**.
    - a **target encoder** trained to fool a **discriminator** network
      which tries to classify source and target data in the encoded space.
      The **target encoder** and the **discriminator** are trained
      in an adversarial fashion in the same way as GAN.
      
    The parameters of the four networks are optimized in a two stage
    algorithm where **source encoder** and **task** networks are first
    fitted according to the following optimization problem:
    
    .. math::
    
        \min_{\phi_S, F} \mathcal{L}_{task}(F(\phi_S(X_S)), y_S)
    
    In the second stage, **target encoder** and **discriminator**
    networks are fitted according to:
    
    .. math::
    
        \max_{\phi_T} \min_{D} \mathcal{L}_{01}(D(\phi_S(X_S)), \\textbf{0})
        + \mathcal{L}_{01}(D(\phi_T(X_T)), \\textbf{1})
    
    Where:
    
    - :math:`(X_S, y_S), (X_T)` are respectively the labeled source data
      and the unlabeled target data.
    - :math:`\phi_S, \phi_T, F, D` are respectively the **source encoder**,
      the **target encoder**, the **task** and the **discriminator** networks.
    
    The method has been originally introduced for **unsupervised**
    classification DA but it could be widen to other task in **supervised**
    DA straightforwardly.
    
    Parameters
    ----------
    get_src_encoder : callable, optional (default=None)
        Constructor for source encoder networks.
        The constructor should return a tensorflow compiled Model.
        It should also take at least an ``input_shape`` argument
        giving the input shape of the network.
        If ``None``, shallow networks with 10 neurons are used
        as encoder networks.
        
    get_tgt_encoder : callable, optional (default=None)
        Constructor for target encoder networks.
        The constructor should return a tensorflow compiled Model.
        It should also take at least an ``input_shape`` argument
        giving the input shape of the network.
        If ``None``, shallow networks with 10 neurons are used
        as encoder networks.
        
    get_task : callable, optional (default=None)
        Constructor for task network.
        The constructor should return a tensorflow compiled Model. 
        It should also take at least an ``input_shape`` argument
        giving the input shape of the network and an ``output_shape``
        argument giving the shape of the last layer.
        If ``None``, a linear network is used as task network.
        
    get_discriminator : callable, optional (default=None)
        Constructor for discriminator network.
        The constructor should return a tensorflow compiled Model. 
        It should also take at least an ``input_shape`` argument
        giving the input shape of the network.
        If ``None``, a linear network is used as discriminator
        network.
    
    src_enc_params : dict, optional (default=None)
        Additional arguments for ``get_src_encoder``.
        
    tgt_enc_params : dict, optional (default=None)
        Additional arguments for ``get_tgt_encoder``.
        
    task_params : dict, optional (default=None)
        Additional arguments for ``get_task``.
        
    disc_params : dict, optional (default=None)
        Additional arguments for ``get_task``.
        
    compil_params : key, value arguments, optional
        Additional arguments for network compiler
        (loss, optimizer...).
        If none, loss is set to ``"binary_crossentropy"``
        and optimizer to ``"adam"``.

    Attributes
    ----------
    src_encoder_ : tensorflow Model
        Fitted source encoder network.
        
    tgt_encoder_ : tensorflow Model
        Fitted source encoder network.
        
    task_ : tensorflow Model
        Fitted task network.
        
    discriminator_ : tensorflow Model
        Fitted discriminator network.
    
    src_model_ : tensorflow Model
        Fitted source model: the union of
        source encoder and task networks.
        
    tgt_model_ : tensorflow Model
        Fitted target model: the union of
        target encoder, task and discriminator networks.

    References
    ----------
    .. [1] `[1] <https://arxiv.org/pdf/1702.05464.pdf>`_ E. Tzeng, J. Hoffman, \
K. Saenko, and T. Darrell. "Adversarial discriminative domain adaptation". \
In CVPR, 2017.
    """
    def __init__(self,
                 get_src_encoder=None,
                 get_tgt_encoder=None,
                 get_task=None,
                 get_discriminator=None,
                 src_enc_params={},
                 tgt_enc_params={},
                 task_params={},
                 disc_params={},
                 **compil_params):
        self.get_src_encoder = get_src_encoder
        self.get_tgt_encoder = get_tgt_encoder
        self.get_task = get_task
        self.get_discriminator = get_discriminator
        self.src_enc_params = src_enc_params
        self.tgt_enc_params = tgt_enc_params
        self.task_params = task_params
        self.disc_params = disc_params
        self.compil_params = compil_params

        if self.get_src_encoder is None:
            self.get_src_encoder = get_default_encoder
        if self.get_tgt_encoder is None:
            self.get_tgt_encoder = get_default_encoder
        if self.get_task is None:
            self.get_task = get_default_task
        if self.get_discriminator is None:
            self.get_discriminator = get_default_task

        if self.src_enc_params is None:
            self.src_enc_params = {}
        if self.tgt_enc_params is None:
            self.tgt_enc_params = {}
        if self.task_params is None:
            self.task_params = {}
        if self.disc_params is None:
            self.disc_params = {}

    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            tgt_index_labeled=None,
            fit_params_src=None,
            **fit_params_tgt):
        """
        Fit ADDA.

        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
            
        tgt_index_labeled : iterable, optional (default=None)
            indexes of target labeled data in X, y.

        fit_params_src : dict, optional (default=None)
            Arguments given to the fit process of source encoder
            and task networks (epochs, batch_size...).
            If None, ``fit_params_src = fit_params_tgt``
        
        fit_params_tgt : key, value arguments
            Arguments given to the fit method of the ADDA model,
            i.e. fitting of target encoder and discriminator.
            (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        check_indexes(src_index, tgt_index, tgt_index_labeled)

        if fit_params_src is None:
            fit_params_src = fit_params_tgt

        if tgt_index_labeled is None:
            src_index_bis = src_index
        else:
            src_index_bis = np.concatenate((src_index, tgt_index_labeled))

        self._create_model(X.shape[1:], y.shape[1:])

        max_size = max(len(src_index_bis), len(tgt_index))
        resize_tgt_ind = np.resize(tgt_index, max_size)
        resize_src_ind = np.resize(src_index_bis, max_size)

        self.src_model_.fit(X[src_index_bis], y[src_index_bis],
                            **fit_params_src)

        self.tgt_model_.fit(
            [self.src_encoder_.predict(X[resize_src_ind]), X[resize_tgt_ind]],
            **fit_params_tgt)
        return self

    def _create_model(self, shape_X, shape_y):

        compil_params = copy.deepcopy(self.compil_params)
        if not "loss" in compil_params:
            compil_params["loss"] = "binary_crossentropy"
        if not "optimizer" in compil_params:
            compil_params["optimizer"] = "adam"

        self.src_encoder_ = check_network(self.get_src_encoder,
                                          "get_src_encoder",
                                          input_shape=shape_X,
                                          **self.src_enc_params)
        self.tgt_encoder_ = check_network(self.get_tgt_encoder,
                                          "get_tgt_encoder",
                                          input_shape=shape_X,
                                          **self.tgt_enc_params)

        if self.src_encoder_.output_shape != self.tgt_encoder_.output_shape:
            raise ValueError("Target encoder output shape does not match "
                             "the one of source encoder.")

        self.task_ = check_network(
            self.get_task,
            "get_task",
            input_shape=self.src_encoder_.output_shape[1:],
            output_shape=shape_y,
            **self.task_params)
        self.discriminator_ = check_network(
            self.get_discriminator,
            "get_discriminator",
            input_shape=self.src_encoder_.output_shape[1:],
            **self.disc_params)

        input_task = Input(shape_X)
        encoded_source = self.src_encoder_(input_task)
        tasked = self.task_(encoded_source)
        self.src_model_ = Model(input_task, tasked, name="ModelSource")
        self.src_model_.compile(**compil_params)

        input_source = Input(self.src_encoder_.output_shape[1:])
        input_target = Input(shape_X)
        encoded_target = self.tgt_encoder_(input_target)
        discrimined_target = GradientReversal()(encoded_target)
        discrimined_target = self.discriminator_(discrimined_target)
        discrimined_source = self.discriminator_(input_source)

        loss = (-K.mean(K.log(discrimined_target)) -
                K.mean(K.log(1 - discrimined_source)))

        self.tgt_model_ = Model([input_source, input_target],
                                [discrimined_source, discrimined_target],
                                name="ModelTarget")
        self.tgt_model_.add_loss(loss)

        compil_params.pop("loss")
        self.tgt_model_.compile(**compil_params)
        return self

    def predict(self, X, domain="target"):
        """
        Return the predictions of task network on the encoded feature space.

        ``domain`` arguments specify how features from ``X``
        will be considered: as ``"source"`` or ``"target"`` features.
        If ``"source"``, source encoder will be used. 
        If ``"target"``, target encoder will be used.

        Parameters
        ----------
        X : array
            Input data.

        domain : str, optional (default="target")
            Choose between ``"source"`` and ``"target"`` encoder.

        Returns
        -------
        y_pred : array
            Prediction of task network.

        Notes
        -----
        As ADDA is an anti-symetric feature-based method, one should
        indicates the domain of ``X`` in order to apply the appropriate
        feature transformation.
        """
        if domain == "target":
            X = self.tgt_encoder_.predict(X)
        elif domain == "source":
            X = self.src_encoder_.predict(X)
        else:
            raise ValueError("Choose between source or target for domain name")
        return self.task_.predict(X)
Example #28
0
class CVAE():

    def __init__(self, 
                 x_input_size,
                 b_input_size,
                 lb_input_size,
                 sf_input_size = 1,
                 enc = (256, 256, 128),
                 dec = (128, 256, 256),
                 latent_k = 30,
                 alpha = 0.01,
                 input_dropout = 0.,
                 encoder_dropout = 0.1,
                 nonmissing_indicator = None,
                 init = tf.keras.initializers.Orthogonal(),
                 optimizer = None,
                 lr = 0.001,
                 clipvalue = 5,
                 clipnorm = 1,
                 theta_min = 1e-6,
                 theta_max = 1e2):

        self.x_input_size = x_input_size
        self.b_input_size = b_input_size
        self.lb_input_size = lb_input_size
        self.z_input_size = latent_k
        self.sf_input_size = sf_input_size
        self.disp_input_size = b_input_size
        self.enc = enc
        self.dec = dec
        self.latent_k = latent_k
        self.alpha = alpha
        self.input_dropout = input_dropout
        self.encoder_dropout = encoder_dropout
        self.init = init
        self.lr = lr
        self.clipvalue = clipvalue
        self.clipnorm = clipnorm
        self.theta_min = theta_min
        self.theta_max = theta_max



        if optimizer is None:
            self.optimizer = tf.keras.optimizers.Adam(learning_rate = lr, 
                                                  clipnorm = clipnorm, clipvalue = clipvalue)
        else:
            self.optimizer = optimizer

        
        self.extra_models = {}
        self.model = None


    
    def build(self, print_model = False):


        """ Inputs. """
        self.x_input = Input(shape = (self.x_input_size, ), name = 'x_input')
        self.b_input = Input(shape = (self.b_input_size, ), name = 'B')
        self.sf_input = Input(shape = (self.sf_input_size, ), name = 'sf_input')
        self.z_input = Input(shape = (self.z_input_size, ), name = 'z_input')
        self.disp_input = Input(shape = (self.disp_input_size, ), name = 'nb_input')
        self.x_raw_input = Input(shape = (self.x_input_size, ), name = 'x_raw_input')
        self.lb_input = Input(shape = (self.lb_input_size, ), name = 'lb_input')



        """ Build the encoder. """
        self.z = keras.layers.concatenate([self.x_input, self.b_input])

        for i, hid_size in enumerate(self.enc):
            dense_layer_name = 'e%s' % i
            bn_layer_name = 'be%s' % i
            self.z = Dense(hid_size, activation = None, use_bias = True, 
                        kernel_initializer = self.init, name = dense_layer_name)(self.z)
            self.z = LeakyReLU(alpha = 0.01)(self.z)
            self.z = BatchNormalization(center = False, scale = True, name = bn_layer_name)(self.z)
            if i == 0:
                self.z = Dropout(self.encoder_dropout)(self.z)
            
        self.z_mean = Dense(self.latent_k, activation = None, use_bias = True, 
                            kernel_initializer = self.init, name = 'z_mean_dense')(self.z)
        self.z_mean = LeakyReLU(alpha = 0.01, name = 'z_mean_act')(self.z_mean)
        self.z_mean = BatchNormalization(center = False, scale = True, name = 'bz')(self.z_mean)
        self.z_log_var = Dense(self.latent_k, activation = None, use_bias = True, 
                            kernel_initializer = tf.keras.initializers.Orthogonal(gain = 0.01), 
                            name = 'z_log_var')(self.z)

        # Sampling latent space
        self.z_out = Lambda(sample_z, output_shape = (self.latent_k, ))([self.z_mean, self.z_log_var])

        self.extra_models['mean_out'] = Model([self.x_input, self.b_input], self.z_mean, name = 'mean_out')
        self.extra_models['var_out'] = Model([self.x_input, self.b_input], self.z_log_var, name = 'var_out')
        self.extra_models['samp_out'] = Model([self.x_input, self.b_input], self.z_out, name = 'samp_out')


        """ Build the prediction network. """
        self.lb_pred = Dense(self.latent_k, activation = 'sigmoid', use_bias = True, 
                            kernel_initializer = self.init, name = 'pred_sigmoid')(self.z_mean)
        self.lb_pred = BatchNormalization(center = False, scale = True, name = 'lz1')(self.lb_pred)
        self.lb_pred = Dense(int(0.5*self.latent_k), activation = 'sigmoid', use_bias = True, 
                            kernel_initializer = self.init, name = 'pred_sigmoid2')(self.lb_pred)
        self.lb_pred = BatchNormalization(center = False, scale = True, name = 'lz2')(self.lb_pred)
        self.lb_pred = Dense(self.lb_input_size, activation = 'softmax', use_bias = True, 
                            kernel_initializer = self.init, name = 'pred_softmax')(self.lb_pred)
        self.extra_models['lb_pred'] = Model([self.x_input, self.b_input], self.lb_pred, name = 'lb_pred')


        """ Build the decoder. """
        #### decoder network
        self.decoder_dense_layers = []
        self.decoder_leaky_layers = []
        for i, hid_size in enumerate(self.dec):
            dense_layer_name = 'd%s' % i
            self.decoder_dense_layers.append ( Dense(hid_size, activation = None, use_bias = True, 
                                                kernel_initializer = self.init, name = dense_layer_name) )
            self.decoder_leaky_layers.append ( LeakyReLU(alpha = 0.01) )
        self.last_layer_mu = Dense(self.x_input_size, activation = None, use_bias = True, 
                                kernel_initializer = self.init, name = 'mu_out')


        #### start from sampled latent values
        self.decoder11 = keras.layers.concatenate([self.z_out, self.b_input])
        for i, hid_size in enumerate(self.dec):
            self.decoder11 = self.decoder_dense_layers[i](self.decoder11)
            self.decoder11 = self.decoder_leaky_layers[i](self.decoder11)
        self.mu_hat = self.last_layer_mu(self.decoder11)
        self.mu_hat_sf = AddLayer(name = 'mu_hat_sf')([self.mu_hat, self.sf_input])
        self.mu_hat_exp_sf = ExpLayer(name = 'mu_hat_exp_sf')(self.mu_hat_sf)
        self.mu_hat_exp = ExpLayer(name = 'mu_hat_exp')(self.mu_hat)


        #### start from zeroed latent values
        self.decoder12_mean = keras.layers.concatenate([self.z_input, self.b_input])
        for i, hid_size in enumerate(self.dec):
            self.decoder12_mean = self.decoder_dense_layers[i](self.decoder12_mean)
            self.decoder12_mean = self.decoder_leaky_layers[i](self.decoder12_mean)
        self.mu_hat_mean = self.last_layer_mu(self.decoder12_mean)
        self.mu_hat_mean_sf = AddLayer(name = 'mu_hat_mean_sf')([self.mu_hat_mean, self.sf_input])
        self.mu_hat_mean_exp_sf = ExpLayer(name = 'mu_hat_mean_exp_sf')(self.mu_hat_mean_sf)
        self.mu_hat_mean_exp = ExpLayer(name = 'mu_hat_mean_exp')(self.mu_hat_mean)

        self.extra_models['decoder_mean'] = Model([self.z_input, self.b_input], [self.mu_hat_mean_exp], name = 'decoder_mean')



        """ Build the dispersion network. """
        self.last_layer_theta = Dense(self.x_input_size, activation = None, use_bias = True, 
                                      kernel_initializer = self.init, name = 'theta_out')

        #### start from sampled latent values
        self.theta_hat = self.last_layer_theta(self.disp_input)
        self.theta_hat = ClipLayer(name = 'clip_theta_hat')(self.theta_hat)
        self.theta_hat_exp = ExpLayer(name = 'theta_hat_exp')(self.theta_hat)

        #### start from zeroed latent values
        self.theta_hat_mean = self.last_layer_theta(self.disp_input)
        self.theta_hat_mean = ClipLayer(name = 'clip_theta_hat_mean')(self.theta_hat_mean)
        self.theta_hat_mean_exp = ExpLayer(name = 'theta_hat_mean_exp')(self.theta_hat_mean)

        self.extra_models['disp_model'] = Model(self.disp_input, self.theta_hat_mean_exp, name = 'disp_model')



        """ Build the whole network. """
        # decoder output
        self.out_hat = keras.layers.concatenate([self.mu_hat_sf, self.theta_hat], name = 'out')
        self.out_hat_mean = keras.layers.concatenate([self.mu_hat_mean_sf, self.theta_hat_mean], name = 'out_mean')
        # the whole model
        self.model = Model(inputs = [self.z_input, self.x_input, self.b_input, self.sf_input, self.disp_input, self.x_raw_input, self.lb_input], 
                           outputs = [self.out_hat, self.out_hat_mean, self.lb_pred], 
                           name = 'model')

        if print_model:
            self.model.summary()


        self.pred_loss = K.sum( tf.keras.losses.categorical_crossentropy(self.lb_input, self.lb_pred), axis = -1)
        self.kl_loss = -0.5 * K.sum(1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var), axis = -1)
        self.recon_loss = ((1 - self.alpha) * self.nb_loss_func(self.x_raw_input, self.mu_hat_exp_sf) 
                            + self.alpha * self.nb_loss0_func(self.x_raw_input, self.mu_hat_mean_exp_sf))
        


    def add_loss(self, pred_weight, kl_weight=1):
        self.final_loss = kl_weight * self.kl_loss + self.recon_loss + pred_weight * self.pred_loss
        self.model.add_loss(self.final_loss)
        self.model.add_metric(self.pred_loss, name='pred_loss')
        self.model.add_metric(self.kl_loss, name='kl_loss')
        self.model.add_metric(self.recon_loss, name='recon_loss')



    def compile_model(self, pred_weight, kl_weight=1, optimizer = None):

        self.add_loss(pred_weight, kl_weight)

        if optimizer is not None:
            self.optimizer = optimizer

        self.model.compile(optimizer = self.optimizer)



    def kl_loss_func(self):

        kl_loss = -0.5 * K.sum(1 + self.z_log_var - K.square(self.z_mean) - K.exp(self.z_log_var), axis = -1)

        return kl_loss



    def nb_loss_func(self, y_true, y_pred):
    
        log_mu = self.mu_hat_sf
        log_theta = self.theta_hat
        mu = self.mu_hat_exp_sf
        theta = self.theta_hat_exp
        f0 = -1 * tf.math.lgamma(y_true + 1)
        f1 = -1 * tf.math.lgamma(theta)
        f2 = tf.math.lgamma(y_true + theta)
        f3 = - (y_true + theta) * tf.math.log(theta + mu)
        f4 = theta * log_theta
        f5 = y_true * log_mu
        final = - K.sum(f0 + f1 + f2 + f3 + f4 + f5, axis = 1)

        return final



    def nb_loss0_func(self, y_true, y_pred):
        
        log_mu = self.mu_hat_mean_sf
        log_theta = self.theta_hat_mean
        mu = self.mu_hat_mean_exp_sf
        theta = self.theta_hat_mean_exp
        f0 = -1 * tf.math.lgamma(y_true + 1)
        f1 = -1 * tf.math.lgamma(theta)
        f2 = tf.math.lgamma(y_true + theta)
        f3 = - (y_true + theta) * tf.math.log(theta + mu)
        f4 = theta * log_theta
        f5 = y_true * log_mu
        final = - K.sum(f0 + f1 + f2 + f3 + f4 + f5, axis = 1)

        return final


    def load_weights(self, filename):

        self.model.load_weights(filename)


    def save_weights(self, filename, save_extra = False, extra_filenames = None):

        self.model.save_weights(filename)

        if save_extra:
            self.extra_models['mean_out'].save_weights(extra_filenames["mean_out"])
            self.extra_models['var_out'].save_weights(extra_filenames["var_out"])
            self.extra_models['samp_out'].save_weights(extra_filenames["samp_out"])
            self.extra_models['disp_model'].save_weights(extra_filenames["disp_model"])
            self.extra_models['decoder_mean'].save_weights(extra_filenames["decoder_mean"])


    def predict_latent(self, X, B):

        latent_mean = self.extra_models['mean_out'].predict([X, B])

        return latent_mean


    
    def predict_beta(self, X, B, sf):

        zmean = self.extra_models['mean_out'].predict([X, B])
        X_lambda = self.extra_models['decoder_mean'].predict([zmean, B])
        X_theta = self.extra_models['disp_model'].predict(B)
        X_lambda = (X_lambda.T * sf).T

        return X_lambda, X_theta



    def model_initialize(self, adata, 
                         epochs=300, batch_size=64, 
                         validation_split=0.1, 
                         shuffle=True, fit_verbose=1, 
                         lr_patience=1, 
                         lr_factor=0.1, 
                         lr_verbose=True,
                         es_patience=2,
                         es_verbose=True):

        callbacks = []
        lr_cb = ReduceLROnPlateau(monitor='val_pred_loss', patience=lr_patience, factor=lr_factor, verbose=lr_verbose)
        callbacks.append(lr_cb)
        es_cb = EarlyStopping(monitor='val_pred_loss', patience=es_patience, verbose=es_verbose)
        callbacks.append(es_cb)

        z_blank = np.zeros((adata.n_obs, self.latent_k), dtype=np.float32)
        inputs = [z_blank, 
                  adata.X, 
                  adata.obsm['saver_batch'], 
                  np.log(adata.obs.size_factors), 
                  adata.obsm['saver_batch'], 
                  adata.raw.X, 
                  adata.obsm['saver_targetL']]
        outputs = [adata.raw.X, 
                  adata.raw.X, 
                  adata.obsm['saver_targetL']]

        loss = self.model.fit(inputs, outputs,
                              epochs=epochs,
                              batch_size=batch_size,
                              shuffle=shuffle,
                              callbacks=callbacks,
                              validation_split=validation_split,
                              verbose=fit_verbose)


        return loss




    def model_finetune(self, adata, 
                         epochs=300, batch_size=64, 
                         validation_split=0.1, 
                         shuffle=True, fit_verbose=1, 
                         lr_patience=4, 
                         lr_factor=0.1, 
                         lr_verbose=True,
                         es_patience=6,
                         es_verbose=True):

        callbacks = []
        lr_cb = ReduceLROnPlateau(monitor='val_loss', patience=lr_patience, factor=lr_factor, verbose=lr_verbose)
        callbacks.append(lr_cb)
        es_cb = EarlyStopping(monitor='val_loss', patience=es_patience, verbose=es_verbose)
        callbacks.append(es_cb)

        z_blank = np.zeros((adata.n_obs, self.latent_k), dtype=np.float32)
        inputs = [z_blank, 
                  adata.X, 
                  adata.obsm['saver_batch'], 
                  np.log(adata.obs.size_factors), 
                  adata.obsm['saver_batch'], 
                  adata.raw.X, 
                  adata.obsm['saver_targetL']]
        outputs = [adata.raw.X, 
                  adata.raw.X, 
                  adata.obsm['saver_targetL']]

        loss = self.model.fit(inputs, outputs,
                              epochs=epochs,
                              batch_size=batch_size,
                              shuffle=shuffle,
                              callbacks=callbacks,
                              validation_split=validation_split,
                              verbose=fit_verbose)

        return loss
Example #29
0
class DeepCORAL:
    """
    DeepCORAL: Deep CORrelation ALignment
    
    DeepCORAL is an extension of CORAL method. It learns a nonlinear
    transformation which aligns correlations of layer activations in
    deep neural networks.
    
    The method consist in training both an **encoder** and a **task**
    network. The **encoder** network maps input features into new
    encoded ones on which the **task** network is trained.
    
    The parameters of the two networks are optimized in order to
    minimize the following loss function:
    
    .. math::
    
        \mathcal{L} = \mathcal{L}_{task} + \\lambda ||C_S - C_T||_F^2
        
    Where:
    
    - :math:`\mathcal{L}_{task}` is the task loss computed with
      source and labeled target data.
    - :math:`C_S` is the correlation matrix of source data in the
      encoded feature space.
    - :math:`C_T` is the correlation matrix of target data in the
      encoded feature space.
    - :math:`||.||_F` is the Frobenius norm.
    - :math:`\\lambda` is a trade-off parameter.
    
    Thus the **encoder** network learn a new feature representation
    on wich the correlation matrixes of source and target data are
    "close" and where a **task** network is able to learn the task
    with source labeled data.
    
    Notice that DeepCORAL only uses labeled source and unlabeled target
    data. It belongs then to "unsupervised" domain adaptation methods.
    However, labeled target data can be added to the training process
    straightforwardly.
    
    Parameters
    ----------
    get_encoder: callable, optional (default=None)
        Constructor for encoder network.
        The constructor should return a tensorflow compiled Model. 
        It should also take at least an ``input_shape`` argument
        giving the input shape of the network and an ``output_shape``
        argument giving the shape of the last layer.
        If ``None``, a shallow network with 10 neurons is used
        as encoder network.
        
    get_task: callable, optional (default=None)
        Constructor for task network.
        The constructor should return a tensorflow compiled Model. 
        It should also take at least an ``input_shape`` argument
        giving the input shape of the network.
        If ``None``, a linear network is used as task network.
        
    lambdap : float, optional (default=1.0)
        Trade-Off parameter.
               
    enc_params: dict, optional (default=None)
        Additional arguments for ``get_encoder``
        
    task_params: dict, optional (default=None)
        Additional arguments for ``get_task``
        
    compil_params: key, value arguments, optional
        Additional arguments for network compiler
        (loss, optimizer...).
        If none, loss is set to ``"mean_squared_error"``
        and optimizer to ``"adam"``.

    Attributes
    ----------
    encoder_ : tensorflow Model
        Fitted encoder network.
        
    task_ : tensorflow Model
        Fitted task network.
    
    model_ : tensorflow Model
        Fitted model: the union of
        encoder and task networks.
        
    See also
    --------
    CORAL

    References
    ----------
    .. [1] `[1] <https://arxiv.org/pdf/1607.01719.pdf>`_ Sun B. and Saenko K. \
"Deep CORAL: correlation alignment for deep domain adaptation." In ICCV, 2016.
    """
    def __init__(self,
                 get_encoder=None,
                 get_task=None,
                 lambdap=1.0,
                 enc_params=None,
                 task_params=None,
                 **compil_params):
        self.get_encoder = get_encoder
        self.get_task = get_task
        self.lambdap = lambdap
        self.enc_params = enc_params
        self.task_params = task_params
        self.compil_params = compil_params

        if self.get_encoder is None:
            self.get_encoder = get_default_encoder
        if self.get_task is None:
            self.get_task = get_default_task

        if self.enc_params is None:
            self.enc_params = {}
        if self.task_params is None:
            self.task_params = {}

    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            tgt_index_labeled=None,
            sample_weight=None,
            **fit_params):
        """
        Fit encoder and task networks. 
        
        Source data and unlabeled target data are used for the correlation
        alignment in the encoded space.
        
        Source data and labeled target data are used to learn the task.

        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
            
        tgt_index_labeled : iterable, optional (default=None)
            indexes of target labeled data in X, y.

        sample_weight : numpy array, optional (default=None)
            Individual weights for each sample.

        fit_params : key, value arguments
            Arguments given to the fit method of the estimator
            (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        check_indexes(src_index, tgt_index, tgt_index_labeled)

        self._create_model(X.shape[1:], y.shape[1:])

        if tgt_index_labeled is None:
            task_index = src_index
        else:
            task_index = np.concatenate((src_index, tgt_index_labeled))

        max_size = max((len(src_index), len(tgt_index), len(task_index)))
        resized_src_ind = np.resize(src_index, max_size)
        resized_tgt_ind = np.resize(tgt_index, max_size)
        resized_task_ind = np.resize(task_index, max_size)

        self.model_.fit([
            X[resized_src_ind], X[resized_tgt_ind], X[resized_task_ind],
            y[resized_task_ind],
            np.ones(max_size)
        ], **fit_params)
        return self

    def predict(self, X):
        """
        Return the prediction of task network
        on the encoded features.
        
        Parameters
        ----------
        X: array
            input data
            
        Returns
        -------
        y_pred: array
            prediction of task network
        """
        return self.task_.predict(self.encoder_.predict(X))

    def _create_model(self, shape_X, shape_y):

        self.encoder_ = self.get_encoder(input_shape=shape_X,
                                         **self.enc_params)
        self.task_ = self.get_task(input_shape=self.encoder_.output_shape[1:],
                                   output_shape=shape_y,
                                   **self.task_params)

        input_src = Input(shape_X)
        input_tgt = Input(shape_X)
        input_task = Input(shape_X)
        output_src = Input(shape_y)
        input_ones = Input((1, ))

        encoded_src = self.encoder_(input_src)
        encoded_tgt = self.encoder_(input_tgt)
        encoded_task = self.encoder_(input_task)

        tasked = self.task_(encoded_task)

        compil_params = copy.deepcopy(self.compil_params)
        if "loss" in compil_params:
            task_loss = K.mean(self.compil_params["loss"](output_src, tasked))
            compil_params.pop('loss')
        else:
            task_loss = K.mean(losses.mean_squared_error(output_src, tasked))

        ones_dot_encoded_src = K.dot(K.transpose(input_ones), encoded_src)
        corr_src = (1 / (K.sum(input_ones) - 1)) * (
            K.dot(K.transpose(encoded_src), encoded_src) -
            (1 / K.sum(input_ones)) *
            K.dot(K.transpose(ones_dot_encoded_src), ones_dot_encoded_src))
        ones_dot_encoded_tgt = K.dot(K.transpose(input_ones), encoded_tgt)
        corr_tgt = (1 / (K.sum(input_ones) - 1)) * (
            K.dot(K.transpose(encoded_tgt), encoded_tgt) -
            (1 / K.sum(input_ones)) *
            K.dot(K.transpose(ones_dot_encoded_tgt), ones_dot_encoded_tgt))

        corr_loss = (1. / 4.) * K.mean(K.square(corr_src - corr_tgt))

        loss = task_loss + self.lambdap * corr_loss

        self.model_ = Model(
            [input_src, input_tgt, input_task, output_src, input_ones],
            [encoded_src, encoded_tgt, tasked],
            name="DeepCORAL")
        self.model_.add_loss(loss)

        if not "optimizer" in compil_params:
            compil_params["optimizer"] = "adam"

        self.model_.compile(**compil_params)

        return self
Example #30
0
def create_model(x_train, x_test, y_test, encoding_dim, intermediate_dim,
                 epochs):
    # ---------------------
    # Run Random Forest Classifier
    # ---------------------
    def run_rf(train_img, y_train, test_img, y_test, msg):
        clf = RandomForestClassifier(50)
        clf.fit(train_img, np.squeeze(y_train))

        print(msg + 'Score')
        print(clf.score(test_img, np.squeeze(y_test)))

        return clf.score(test_img, np.squeeze(y_test))

    def sampling(args):
        # construct the search space
        """Reparameterization trick by sampling fr an isotropic unit Gaussian.
                # Arguments:
                    args (tensor): mean and log of variance of Q(z|X)
                # Returns:
                    z (tensor): sampled latent vector
                """

        z_mean, z_log_var = args
        batch = K.shape(z_mean)[0]
        dim = K.int_shape(z_mean)[1]
        # by default, random_normal has mean=0 and std=1.0
        epsilon = K.random_normal(shape=(batch, dim))
        return z_mean + K.exp(0.5 * z_log_var) * epsilon

    original_dim = len(x_train[1])
    input_shape = (original_dim, )
    batch_size = 32
    latent_dim = encoding_dim
    act_fncs = elu
    learn_rate = 0.0026607621768993824
    lr_decay = 0.0021721614264192577

    # network parameters
    # VAE model = encoder + decoder
    # build encoder model
    inputs = Input(shape=input_shape, name='encoder_input')
    x = Dense(intermediate_dim, activation=act_fncs)(inputs)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)

    # use reparameterization trick to push the sampling out as input
    # note that "output_shape" isn't necessary with the TensorFlow backend
    z = Lambda(sampling, output_shape=(latent_dim, ),
               name='z')([z_mean, z_log_var])

    # instantiate encoder model
    encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
    encoder.summary()
    # plot_model(encoder, to_file='vae_mlp_encoder.png', show_shapes=True)

    # build decoder model
    latent_inputs = Input(shape=(latent_dim, ), name='z_sampling')
    x = Dense(intermediate_dim, activation=act_fncs)(latent_inputs)
    outputs = Dense(original_dim, activation=act_fncs)(x)

    # instantiate decoder model
    decoder = Model(latent_inputs, outputs, name='decoder')
    decoder.summary()
    # plot_model(decoder, to_file='vae_mlp_decoder.png', show_shapes=True)

    # instantiate VAE model
    outputs = decoder(encoder(inputs)[2])
    vae = Model(inputs, outputs, name='vae_mlp')

    reconstruction_loss = binary_crossentropy(inputs, outputs)
    reconstruction_loss *= original_dim
    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5
    vae_loss = K.mean(reconstruction_loss + kl_loss)
    vae.add_loss(vae_loss)

    vae.compile(optimizer=Adam(lr=learn_rate, decay=lr_decay),
                metrics=['accuracy'])
    vae.summary()
    vae.save_weights('VAE_weights.h5')

    train_steps = x_train.shape[0] // batch_size
    valid_steps = x_test.shape[0] // batch_size

    result = vae.fit(x_train,
                     shuffle=True,
                     epochs=epochs,
                     verbose=1,
                     batch_size=batch_size,
                     validation_data=(x_test, None))

    encoder = Model(inputs, z_mean)
    z_test = encoder.predict(x_test, batch_size=batch_size)
    train_img = encoder.predict(x_train, batch_size=batch_size)
    # take max validation accuracy as metric

    # Run Random Forest Classifier and plot result if n < 4
    ok = run_rf(train_img, y_train, z_test, y_test, "VAE-RF ")

    return ok, z_test, train_img, result.history['loss'], result.history[
        'val_loss']