def build(self, hp):
    maxnorm = hp.Choice('maxnorm', values=self.hyperparam['maxnorm'])

    resnet_model = Resnet(input_shape=(self.seqlen, self.channels), norm_max=maxnorm)
    if self.pretrained_weights is not None:
      resnet_model.set_weights(self.pretrained_weights)
    inp = Input(shape=(self.seqlen, self.channels))
    enc_inp = resnet_model(inp)

    dense_units = hp.Int('preclassification', min_value = self.hyperparam['dense_units']['min'],\
                         max_value = self.hyperparam['dense_units']['max'], step = self.hyperparam['dense_units']['step'])
    dense_out = Dense(units = dense_units, activation='relu',
                 kernel_constraint=MaxNorm(maxnorm,axis=[0,1]),
                 bias_constraint=MaxNorm(maxnorm,axis=0),
                 kernel_initializer=glorot_uniform(seed=0))(enc_inp)
    dense_out = Dropout(rate=hp.Choice('dropout', values = self.hyperparam['dropout']))(dense_out)
    output = Dense(self.num_classes, activation='softmax',
                 kernel_constraint=MaxNorm(maxnorm,axis=[0,1]),
                 bias_constraint=MaxNorm(maxnorm,axis=0),
                 kernel_initializer=glorot_uniform(seed=0))(dense_out)
    model = Model(inputs=inp, outputs=output)

    model.compile(optimizer=Adam(lr=hp.Choice('lr', values = self.hyperparam['lr'])),
                  loss=focal_loss(), metrics=['accuracy', macro_f1])

    return model
Esempio n. 2
0
    def loss(self):
        ######### -*- Softmax Loss -*- #########
        self.softmax_b1, self.ce1 = pw_softmaxwithloss_2d(self.Y, self.b1)
        self.softmax_b2, self.ce2 = pw_softmaxwithloss_2d(self.Y, self.b2)
        self.softmax_b3, self.ce3 = pw_softmaxwithloss_2d(self.Y, self.b3)
        self.softmax_b4, self.ce4 = pw_softmaxwithloss_2d(self.Y, self.b4)
        self.softmax_fuse, self.cefuse = pw_softmaxwithloss_2d(
            self.Y, self.fuse)
        self.total_ce = self.ce1 + self.ce2 + self.ce3 + self.ce4 + self.cefuse

        ######### -*- Focal Loss -*- #########
        self.fl = focal_loss(self.Y,
                             self.o,
                             alpha=self.alpha,
                             gamma=self.gamma)

        ######### -*- Total Loss -*- #########
        self.total_loss = self.total_ce + self.fl_weight * self.fl
Esempio n. 3
0
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10, activation='softmax'))

model.summary()

model.compile(loss=lambda y, y_hat: focal_loss(y, y_hat, gamma),
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
def compile_model(model, num_classes, metrics, loss, lr):
    from keras.losses import binary_crossentropy
    from keras.losses import categorical_crossentropy

    from keras.metrics import binary_accuracy
    from keras.metrics import categorical_accuracy

    from keras.optimizers import Adam

    from metrics import dice_coeff
    from metrics import jaccard_index
    from metrics import class_jaccard_index
    from metrics import pixelwise_precision
    from metrics import pixelwise_sensitivity
    from metrics import pixelwise_specificity
    from metrics import pixelwise_recall

    from losses import focal_loss

    if isinstance(loss, str):
        if loss in {'ce', 'crossentropy'}:
            if num_classes == 1:
                loss = binary_crossentropy
            else:
                loss = categorical_crossentropy
        elif loss in {'focal', 'focal_loss'}:
            loss = focal_loss(num_classes)
        else:
            raise ValueError('unknown loss %s' % loss)

    if isinstance(metrics, str):
        metrics = [metrics, ]

    for i, metric in enumerate(metrics):
        if not isinstance(metric, str):
            continue
        elif metric == 'acc':
            metrics[i] = binary_accuracy if num_classes == 1 else categorical_accuracy
        elif metric == 'jaccard_index':
            metrics[i] = jaccard_index(num_classes)
        elif metric == 'jaccard_index0':
            metrics[i] = class_jaccard_index(0)
        elif metric == 'jaccard_index1':
            metrics[i] = class_jaccard_index(1)
        elif metric == 'jaccard_index2':
            metrics[i] = class_jaccard_index(2)
        elif metric == 'jaccard_index3':
            metrics[i] = class_jaccard_index(3)
        elif metric == 'jaccard_index4':
            metrics[i] = class_jaccard_index(4)
        elif metric == 'jaccard_index5':
            metrics[i] = class_jaccard_index(5)
        elif metric == 'dice_coeff':
            metrics[i] = dice_coeff(num_classes)
        elif metric == 'pixelwise_precision':
            metrics[i] = pixelwise_precision(num_classes)
        elif metric == 'pixelwise_sensitivity':
            metrics[i] = pixelwise_sensitivity(num_classes)
        elif metric == 'pixelwise_specificity':
            metrics[i] = pixelwise_specificity(num_classes)
        elif metric == 'pixelwise_recall':
            metrics[i] = pixelwise_recall(num_classes)
        else:
            raise ValueError('metric %s not recognized' % metric)

    model.compile(optimizer=Adam(lr=lr),
                  loss=loss,
                  metrics=metrics)
def build_model(lr, l2, img_shape, activation='sigmoid'):
    ##############
    # BRANCH MODEL
    ##############
    regul = regularizers.l2(l2)
    optim = Adam(lr=lr)
    kwargs = {'padding': 'same', 'kernel_regularizer': regul}

    inp = Input(shape=img_shape)  # 384x384x1
    x = Conv2D(64, (9, 9), strides=2, activation='relu',
               **kwargs)(inp)  # 192x192x64

    x = MaxPooling2D((2, 2), strides=(2, 2))(x)  # 96x96x64
    for _ in range(2):
        x = BatchNormalization()(x)
        x = Conv2D(64, (3, 3), activation='relu', **kwargs)(x)

    x = MaxPooling2D((2, 2), strides=(2, 2))(x)  # 48x48x64
    x = BatchNormalization()(x)
    x = Conv2D(128, (1, 1), activation='relu', **kwargs)(x)  # 48x48x128
    for _ in range(4):
        x = subblock(x, 64, **kwargs)

    x = MaxPooling2D((2, 2), strides=(2, 2))(x)  # 24x24x128
    x = BatchNormalization()(x)
    x = Conv2D(256, (1, 1), activation='relu', **kwargs)(x)  # 24x24x256
    for _ in range(4):
        x = subblock(x, 64, **kwargs)

    x = MaxPooling2D((2, 2), strides=(2, 2))(x)  # 12x12x256
    x = BatchNormalization()(x)
    x = Conv2D(384, (1, 1), activation='relu', **kwargs)(x)  # 12x12x384
    for _ in range(4):
        x = subblock(x, 96, **kwargs)

    x = MaxPooling2D((2, 2), strides=(2, 2))(x)  # 6x6x384
    x = BatchNormalization()(x)
    x = Conv2D(512, (1, 1), activation='relu', **kwargs)(x)  # 6x6x512
    for _ in range(4):
        x = subblock(x, 128, **kwargs)

    x = GlobalMaxPooling2D()(x)  # 512
    branch_model = Model(inp, x)

    ############
    # HEAD MODEL
    ############
    mid = 32
    xa_inp = Input(shape=branch_model.output_shape[1:])
    xb_inp = Input(shape=branch_model.output_shape[1:])
    x1 = Lambda(lambda x: x[0] * x[1])([xa_inp, xb_inp])
    x2 = Lambda(lambda x: x[0] + x[1])([xa_inp, xb_inp])
    x3 = Lambda(lambda x: K.abs(x[0] - x[1]))([xa_inp, xb_inp])
    x4 = Lambda(lambda x: K.square(x))(x3)
    x = Concatenate()([x1, x2, x3, x4])  # ?x2048
    x = Reshape((4, branch_model.output_shape[1], 1),
                name='reshape1')(x)  # ?x4x512x1

    # Per feature NN with shared weight is implemented using CONV2D with appropriate stride.
    x = Conv2D(mid, (4, 1), activation='relu',
               padding='valid')(x)  # ?x1x512xmid
    x = Reshape((branch_model.output_shape[1], mid, 1))(x)  # ?x512xmidx1
    x = Conv2D(1, (1, mid), activation='linear',
               padding='valid')(x)  # ?x512x1x1
    x = Flatten(name='flatten')(x)  # ?x512

    # Weighted sum implemented as a Dense layer.
    x = Dense(1, use_bias=True, activation=activation,
              name='weighted-average')(x)  # ?x1
    head_model = Model([xa_inp, xb_inp], x, name='head')

    ########################
    # SIAMESE NEURAL NETWORK
    ########################
    # Complete model is constructed by calling the branch model on each input image,
    # and then the head model on the resulting 512-vectors.
    img_a = Input(shape=img_shape)
    img_b = Input(shape=img_shape)
    xa = branch_model(img_a)
    xb = branch_model(img_b)
    x = head_model([xa, xb])
    model = Model([img_a, img_b], x)
    model.compile(optim,
                  loss=focal_loss(gamma=2., alpha=.5),
                  metrics=['binary_crossentropy', 'acc'])
    # model.compile(optim, loss='binary_crossentropy', metrics=['binary_crossentropy', 'acc'])
    print(f'loss_functions is : {model.loss_functions}')
    return model, branch_model, head_model
Esempio n. 6
0
def main(argv):
    indir = args.indir
    mode = args.mode  # binary or multiclass or nonwear
    outdir = args.outdir

    if mode == 'multiclass':
        states = ['Wake', 'NREM 1', 'NREM 2', 'NREM 3', 'REM', 'Wake_ext']
    elif mode == 'binary':
        states = ['Wake', 'Sleep', 'Wake_ext']
        collate_states = ['NREM 1', 'NREM 2', 'NREM 3', 'REM']
    elif mode == 'nonwear':
        states = ['Wear', 'Nonwear']
        collate_states = ['Wake', 'NREM 1', 'NREM 2', 'NREM 3', 'REM']

    valid_states = [state for state in states if state != 'Wake_ext']
    num_classes = len(valid_states)

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    resultdir = os.path.join(outdir, mode, 'models')
    if not os.path.exists(resultdir):
        os.makedirs(resultdir)

    # Read data from disk
    data = pd.read_csv(os.path.join(indir, 'features_30.0s.csv'))
    labels = data['label'].values
    users = data['user'].values
    if mode == 'binary':
        labels = np.array(
            ['Sleep' if lbl in collate_states else lbl for lbl in labels])
    elif mode == 'nonwear':
        labels = np.array(
            ['Wear' if lbl in collate_states else lbl for lbl in labels])

    # Read raw data
    shape_df = pd.read_csv(os.path.join(indir, 'datashape_30.0s.csv'))
    num_samples = shape_df['num_samples'].values[0]
    seqlen = shape_df['num_timesteps'].values[0]
    n_channels = shape_df['num_channels'].values[0]
    raw_data = np.memmap(os.path.join(indir, 'rawdata_30.0s.npz'),
                         dtype='float32',
                         mode='r',
                         shape=(num_samples, seqlen, n_channels))

    # Hyperparameters
    lr = args.lr  # learning rate
    num_epochs = args.num_epochs
    batch_size = args.batchsize
    max_seqlen = 1504
    num_channels = args.num_channels  # number of raw data channels
    feat_channels = args.feat_channels  # Add ENMO, z-angle and LIDS as additional channels

    # Use nested cross-validation based on users
    # Outer CV
    unique_users = list(set(users))
    random.shuffle(unique_users)
    cv_splits = 5
    user_cnt = Counter(users[np.isin(labels, valid_states)]).most_common()
    samp_per_fold = len(users) // cv_splits

    # Get users to be used in test for each fold such that each fold has similar
    # number of samples
    fold_users = [[] for i in range(cv_splits)]
    fold_cnt = [[] for i in range(cv_splits)]
    for user, cnt in user_cnt:
        idx = -1
        maxdiff = 0
        for j in range(cv_splits):
            if (samp_per_fold - sum(fold_cnt[j])) > maxdiff:
                maxdiff = samp_per_fold - sum(fold_cnt[j])
                idx = j
        fold_users[idx].append(user)
        fold_cnt[idx].append(cnt)

    predictions = []
    if mode != 'nonwear':
        wake_idx = states.index('Wake')
        wake_ext_idx = states.index('Wake_ext')
    for fold in range(cv_splits):
        print('Evaluating fold %d' % (fold + 1))
        test_users = fold_users[fold]
        trainval_users = [(key, val) for key, val in user_cnt
                          if key not in test_users]
        random.shuffle(trainval_users)
        # validation data is approximately 10% of total samples
        val_samp = 0.1 * sum([tup[1] for tup in user_cnt])
        nval = 0
        val_sum = 0
        while (val_sum < val_samp):
            val_sum += trainval_users[nval][1]
            nval += 1
        val_users = [key for key, val in trainval_users[:nval]]
        train_users = [key for key, val in trainval_users[nval:]]
        print('#users: Train = {:d}, Val = {:d}, Test = {:d}'.format(
            len(train_users), len(val_users), len(test_users)))

        # Create partitions
        # make a copy to change wake_ext for this fold
        fold_labels = np.array(
            [states.index(lbl) if lbl in states else -1 for lbl in labels])
        train_indices = get_partition(raw_data,
                                      fold_labels,
                                      users,
                                      train_users,
                                      states,
                                      mode,
                                      is_train=True)
        val_indices = get_partition(raw_data, fold_labels, users, val_users,
                                    states, mode)
        test_indices = get_partition(raw_data, fold_labels, users, test_users,
                                     states, mode)
        nsamples = len(train_indices) + len(val_indices) + len(test_indices)
        print('Train: {:0.2f}%, Val: {:0.2f}%, Test: {:0.2f}%'\
                .format(len(train_indices)*100.0/nsamples, len(val_indices)*100.0/nsamples,\
                        len(test_indices)*100.0/nsamples))

        if mode != 'nonwear':
            chosen_indices = train_indices[
                fold_labels[train_indices] != wake_ext_idx]
        else:
            chosen_indices = train_indices
        class_wts = class_weight.compute_class_weight(
            class_weight='balanced',
            classes=np.unique(fold_labels[chosen_indices]),
            y=fold_labels[chosen_indices])

        # Rename wake_ext as wake for training samples
        if mode != 'nonwear':
            rename_indices = train_indices[fold_labels[train_indices] ==
                                           wake_ext_idx]
            fold_labels[rename_indices] = wake_idx

        print('Train', Counter(np.array(fold_labels)[train_indices]))
        print('Val', Counter(np.array(fold_labels)[val_indices]))
        print('Test', Counter(np.array(fold_labels)[test_indices]))

        # Data generators for computing statistics
        stat_gen = DataGenerator(train_indices, raw_data, fold_labels, valid_states, partition='stat',\
                                  batch_size=batch_size, seqlen=seqlen, n_channels=num_channels, feat_channels=feat_channels,\
                                  n_classes=num_classes, shuffle=True)
        mean, std = stat_gen.fit()
        np.savez(os.path.join(resultdir, 'Fold' + str(fold + 1) + '_stats'),
                 mean=mean,
                 std=std)

        # Data generators for train/val/test
        train_gen = DataGenerator(train_indices, raw_data, fold_labels, valid_states, partition='train',\
                                  batch_size=batch_size, seqlen=seqlen, n_channels=num_channels, feat_channels=feat_channels,\
                                  n_classes=num_classes, shuffle=True, augment=True, aug_factor=0.75, balance=True,
                                  mean=mean, std=std)
        val_gen = DataGenerator(val_indices, raw_data, fold_labels, valid_states, partition='val',\
                                batch_size=batch_size, seqlen=seqlen, n_channels=num_channels, feat_channels=feat_channels,\
                                n_classes=num_classes, mean=mean, std=std)
        test_gen = DataGenerator(test_indices, raw_data, fold_labels, valid_states, partition='test',\
                                 batch_size=batch_size, seqlen=seqlen, n_channels=num_channels, feat_channels=feat_channels,\
                                 n_classes=num_classes, mean=mean, std=std)

        # Create model
        # Use batchnorm as first step since computing mean and std
        # across entire dataset is time-consuming
        model = FCN(input_shape=(seqlen, num_channels + feat_channels),
                    max_seqlen=max_seqlen,
                    num_classes=len(valid_states),
                    norm_max=args.maxnorm)
        #print(model.summary())
        model.compile(optimizer=Adam(lr=lr),
                      loss=focal_loss(),
                      metrics=['accuracy', macro_f1])

        # Train model
        # Use callback to compute F-scores over entire validation data
        metrics_cb = Metrics(val_data=val_gen, batch_size=batch_size)
        # Use early stopping and model checkpoints to handle overfitting and save best model
        model_checkpt = ModelCheckpoint(os.path.join(resultdir,'fold'+str(fold+1)+'_'+mode+'-{epoch:02d}-{val_f1:.4f}.h5'),\
                                                     monitor='val_f1',\
                                                     mode='max', save_best_only=True)
        batch_renorm_cb = BatchRenormScheduler(len(train_gen))
        history = model.fit(
            train_gen,
            epochs=num_epochs,
            validation_data=val_gen,
            verbose=1,
            shuffle=False,
            callbacks=[batch_renorm_cb, metrics_cb, model_checkpt],
            workers=2,
            max_queue_size=20,
            use_multiprocessing=False)

        # Plot training history
        plot_results(fold+1, history.history['loss'], history.history['val_loss'],\
                     os.path.join(resultdir,'Fold'+str(fold+1)+'_'+mode+'_loss.jpg'), metric='Loss')
        plot_results(fold+1, history.history['accuracy'], history.history['val_accuracy'],\
                     os.path.join(resultdir,'Fold'+str(fold+1)+'_'+mode+'_accuracy.jpg'), metric='Accuracy')
        plot_results(fold+1, history.history['macro_f1'], metrics_cb.val_f1,\
                     os.path.join(resultdir,'Fold'+str(fold+1)+'_'+mode+'_macro_f1.jpg'), metric='Macro F1')

        # Predict probability on validation data using best model
        best_model_file, epoch, val_f1 = get_best_model(resultdir, fold + 1)
        print('Predicting with model saved at Epoch={:d} with val_f1={:0.4f}'.
              format(epoch, val_f1))
        model.load_weights(os.path.join(resultdir, best_model_file))
        probs = model.predict(test_gen)
        y_pred = probs.argmax(axis=1)
        y_true = fold_labels[test_indices]
        predictions.append(
            (users[test_indices], data.iloc[test_indices]['timestamp'],
             data.iloc[test_indices]['filename'], test_indices, y_true, probs))

        # Save user report
        cv_save_classification_result(
            predictions,
            valid_states,
            os.path.join(
                resultdir, 'fold' + str(fold + 1) + '_deeplearning_' + mode +
                '_results.csv'),
            method='dl')
        cv_get_classification_report(predictions,
                                     mode,
                                     valid_states,
                                     method='dl')

    cv_get_classification_report(predictions, mode, valid_states, method='dl')

    # Save user report
    cv_save_classification_result(predictions,
                                  valid_states,
                                  os.path.join(
                                      resultdir,
                                      'deeplearning_' + mode + '_results.csv'),
                                  method='dl')
Esempio n. 7
0
        x = Dropout(dr)(x)
        x = Dense(d, activation=Mish())(x)
        x = LayerNormalization()(x)
    outputs = Dense(n_class, activation="softmax")(x)
    model = Model(inputs, outputs)
    return model


model = dense_model(**model_pars)

cosine = cb.CosineAnnealingScheduler(T_max=50,
                                     eta_max=1e-3,
                                     eta_min=1e-5,
                                     verbose=1,
                                     epoch_start=5)
loss = l.focal_loss(gamma=3., alpha=6.)
model.compile(Ranger(learning_rate=1e-3), loss=loss, metrics=["accuracy"])

print(model.summary())

model.fit(
    train,
    epochs=55,
    validation_data=validation,
    callbacks=[
        ModelCheckpoint(
            "main.h5",
            monitor="val_loss",
            keep_best_only=True,
            save_weights_only=False,
        ),
Esempio n. 8
0
    def build(self):
        """
        feature_size: N
        field_size: F
        embedding_size: K
        batch_size:  None
        """
        self.feat_index = tf.placeholder(tf.int32,
                                         shape=[None, None],
                                         name='feature_index')
        self.feat_value = tf.placeholder(tf.float32,
                                         shape=[None, None],
                                         name='feature_value')
        self.label = tf.placeholder(tf.float32, shape=[None, 1], name='label')
        self.keep_prob = tf.placeholder(tf.float32, shape=[],
                                        name='keep_prob')  # scaler
        self.is_training = tf.placeholder(tf.bool,
                                          shape=[],
                                          name='is_training')

        #1、-------------------------定义权值-----------------------------------------
        # FM部分中一次项的权值定义
        self.weight['first_order'] = tf.Variable(
            tf.random_normal([self.feature_size, 1], 0.0, 0.05),  # N * 1
            name='first_order')
        # One-hot编码后的输入层与Dense embeddings层的权值定义,即DNN的输入embedding。
        self.weight['embedding_weight'] = tf.Variable(
            tf.random_normal([self.feature_size, self.embedding_size], 0.0,
                             0.05),  # N*K
            name='embedding_weight')
        # deep网络部分的weight和bias, deep网络初始输入维度:input_size = F*K
        num_layer = len(self.deep_layers)
        input_size = self.field_size * self.embedding_size
        # glorot_normal = np.sqrt(2.0 / (input_size + self.deep_layers[0])) # for sigmoid
        he_normal = np.sqrt(2.0 / input_size)  # for relu

        self.weight['layer_0'] = tf.Variable(np.random.normal(
            loc=0, scale=he_normal, size=(input_size, self.deep_layers[0])),
                                             dtype=np.float32)
        self.weight['bias_0'] = tf.Variable(np.random.normal(
            loc=0, scale=he_normal, size=(1, self.deep_layers[0])),
                                            dtype=np.float32)

        # 生成deep network里面每层的weight 和 bias
        for i in range(1, num_layer):
            he_normal = np.sqrt(2.0 / (self.deep_layers[i - 1]))
            self.weight['layer_' + str(i)] = tf.Variable(np.random.normal(
                loc=0,
                scale=he_normal,
                size=(self.deep_layers[i - 1], self.deep_layers[i])),
                                                         dtype=np.float32)
            self.weight['bias_' + str(i)] = tf.Variable(np.random.normal(
                loc=0, scale=he_normal, size=(1, self.deep_layers[i])),
                                                        dtype=np.float32)

        # deep部分output_size + 一次项output_size + 二次项output_size
        last_layer_size = self.deep_layers[
            -1] + self.field_size + self.embedding_size
        glorot_normal = np.sqrt(2.0 / (last_layer_size + 1))
        # 生成最后一层的weight和bias
        self.weight['last_layer'] = tf.Variable(np.random.normal(
            loc=0, scale=glorot_normal, size=(last_layer_size, 1)),
                                                dtype=np.float32)
        self.weight['last_bias'] = tf.Variable(tf.constant(0.0),
                                               dtype=np.float32)

        #2、----------------------前向传播------------------------------------
        # None*F*K
        self.embedding_index = tf.nn.embedding_lookup(
            self.weight['embedding_weight'], self.feat_index)
        # [None*F*K] .*[None*F*1] = None*F*K
        self.embedding_part = tf.multiply(
            self.embedding_index,
            tf.reshape(self.feat_value, [-1, self.field_size, 1]))

        # FM部分一阶特征
        # None * F*1
        self.embedding_first = tf.nn.embedding_lookup(
            self.weight['first_order'], self.feat_index)
        #[None*F*1].*[None*F*1] = None*F*1
        self.embedding_first = tf.multiply(
            self.embedding_first,
            tf.reshape(self.feat_value, [-1, self.field_size, 1]))
        # None*F
        self.first_order = tf.reduce_sum(self.embedding_first, 2)

        # 二阶特征 None*K
        self.sum_second_order = tf.reduce_sum(self.embedding_part, 1)
        self.sum_second_order_square = tf.square(self.sum_second_order)
        self.square_second_order = tf.square(self.embedding_part)
        self.square_second_order_sum = tf.reduce_sum(self.square_second_order,
                                                     1)
        # 1/2*((a+b)^2 - a^2 - b^2)=ab
        # None*K
        self.second_order = 0.5 * tf.subtract(self.sum_second_order_square,
                                              self.square_second_order_sum)

        # FM部分的输出 None*(F+K)
        self.fm_part = tf.concat([self.first_order, self.second_order], axis=1)

        # DNN部分
        # None*(F*K)
        self.deep_embedding = tf.reshape(
            self.embedding_part, [-1, self.field_size * self.embedding_size])

        # 全连接部分
        for i in range(0, len(self.deep_layers)):
            self.deep_embedding = tf.add(
                tf.matmul(self.deep_embedding, self.weight["layer_%d" % i]),
                self.weight["bias_%d" % i])
            # self.deep_embedding =tf.matmul(self.deep_embedding, self.weight["layer_%d" % i])
            self.bn_out = tf.layers.batch_normalization(
                self.deep_embedding, training=self.is_training)
            # self.bn_out = tf.layers.dropout(self.deep_embedding, rate=self.keep_prob,training=self.is_training)
            self.deep_embedding = self.activate(self.bn_out)
            self.deep_embedding = tf.layers.dropout(self.deep_embedding,
                                                    rate=1.0 - self.keep_prob,
                                                    training=self.is_training)

        # FM输出与DNN输出拼接 None*(F+K+layer[-1]])
        din_all = tf.concat([self.fm_part, self.deep_embedding], axis=1)
        #None*1
        self.out = tf.add(tf.matmul(din_all, self.weight['last_layer']),
                          self.weight['last_bias'])

        #3. ------------------确定损失---------------------------------------
        # loss部分 None*1
        self.prob = tf.nn.sigmoid(self.out)
        # self.entropy_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels= self.label, logits= self.out))
        # self.entropy_loss = -tf.reduce_mean(
        #     self.label * tf.log(tf.clip_by_value(self.prob, 1e-10, 1.0))+ (1 - self.label)* tf.log(tf.clip_by_value(1-self.prob,1e-10,1.0)))
        self.entropy_loss = focal_loss(self.prob,
                                       self.label,
                                       alpha=0.5,
                                       gamma=2)
        # self.entropy_loss = weighted_binary_crossentropy(self.prob, self.label, pos_ratio=self.pos_ratio)

        # 正则:sum(w^2)/2*l2_reg_coef

        self.reg_loss = tf.contrib.layers.l2_regularizer(self.l2_reg_coef)(
            self.weight["last_layer"])
        for i in range(len(self.deep_layers)):
            self.reg_loss += tf.contrib.layers.l2_regularizer(
                self.l2_reg_coef)(self.weight["layer_%d" % i])
            # tf.add_to_collection('losses', tf.contrib.layers.l2_regularizer(self.l2_reg_coef)(self.weight['layer_1']))
        # print(self.entropy_loss.shape.as_list(), self.reg_loss.shape.as_list())
        self.loss = self.entropy_loss + self.reg_loss

        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        self.learning_rate = tf.train.exponential_decay(self.learning_rate,
                                                        self.global_step,
                                                        3000,
                                                        0.99,
                                                        staircase=False)
        opt = tf.train.AdamOptimizer(self.learning_rate)
        # opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        trainable_params = tf.trainable_variables()
        gradients = tf.gradients(self.loss, trainable_params)
        clip_gradients, _ = tf.clip_by_global_norm(gradients, 5)
        with tf.control_dependencies(update_ops):
            # self.train_op = opt.minimize(self.loss, global_step = self.global_step)
            self.train_op = opt.apply_gradients(zip(clip_gradients,
                                                    trainable_params),
                                                global_step=self.global_step)
        self.saver = tf.train.Saver(max_to_keep=3)
    loss0 = dice_coef_loss(
        roi_masks_pos0[:, :, :, 1], mask_logits0[:, :, :, 1]) + dice_coef_loss(
            roi_masks_pos1[:, :, :, 1], mask_logits1[:, :, :, 1])

with tf.name_scope("loss_Xent"):
    tf_mask0 = tf.cast(mask_logits0, tf.float32)
    tf_mask1 = tf.cast(mask_logits1, tf.float32)
    tf0 = pixel_wise_loss(tf_mask0, roi_masks_pos0, pixel_weights=None)
    tf1 = pixel_wise_loss(tf_mask1, roi_masks_pos1, pixel_weights=None)

    loss1 = (
        pixel_wise_loss(tf_mask0, roi_masks_pos0, pixel_weights=None) +
        pixel_wise_loss(tf_mask1, roi_masks_pos1, pixel_weights=None)) / 3.0

with tf.name_scope("loss_focal"):
    focal = focal_loss(
        mask_logits0[:, :, :, 0], roi_masks_pos0[:, :, :, 0]) + focal_loss(
            mask_logits0[:, :, :, 0], roi_masks_pos0[:, :, :, 0])

with tf.name_scope("loss"):
    loss = loss0 + sce_weight * loss1

with tf.name_scope("train"):
    solver = tf.train.AdamOptimizer(learning_rate, epsilon=1e-8)

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    train_op = solver.minimize(loss1)
    loss_op = [loss0, loss1]

tf.summary.scalar('Dice_p', -dice1)
tf.summary.scalar('Dice_pz', -dice2)