def trainDAE(target, dataPath, refSampleInd, trainIndex, relevantMarkers, mode,
             keepProb, denoise, loadModel, path):
    sourceX = []
    for i in np.arange(trainIndex.size-1):
        sourceIndex = np.delete(trainIndex, refSampleInd)[i]
        source = dh.loadDeepCyTOFData(dataPath, sourceIndex,
                                      relevantMarkers, mode)
        numZerosOK=1
        toKeepS = np.sum((source.X==0), axis = 1) <= numZerosOK
        if i == 0:
            sourceX = source.X[toKeepS]
        else:
            sourceX = np.concatenate([sourceX, source.X[toKeepS]], axis = 0)
        
    # preProcess source
    sourceX = np.log(1 + np.abs(sourceX))
    
    numZerosOK=1
    toKeepT = np.sum((target.X==0), axis = 1) <= numZerosOK
    
    inputDim = target.X.shape[1]
    
    ae_encodingDim = 25
    l2_penalty_ae = 1e-2
    
    if denoise:
        if loadModel:
            from keras.models import load_model
            autoencoder = load_model(os.path.join(io.DeepLearningRoot(), path + '/denoisedAE.h5'))
        else:
            # train de-noising auto encoder and save it.
            trainTarget_ae = np.concatenate([sourceX, target.X[toKeepT]],
                                            axis=0)
            trainData_ae = trainTarget_ae * np.random.binomial(n=1, p=keepProb,
                                                size = trainTarget_ae.shape)
        
            input_cell = Input(shape=(inputDim,))
            encoded = Dense(ae_encodingDim, activation='relu',
                            W_regularizer=l2(l2_penalty_ae))(input_cell)
            encoded1 = Dense(ae_encodingDim, activation='relu',
                             W_regularizer=l2(l2_penalty_ae))(encoded)
            decoded = Dense(inputDim, activation='linear',
                            W_regularizer=l2(l2_penalty_ae))(encoded1)
        
            autoencoder = Model(input=input_cell, output=decoded)
            autoencoder.compile(optimizer='rmsprop', loss='mse')
            autoencoder.fit(trainData_ae, trainTarget_ae, nb_epoch=80,
                            batch_size=128, shuffle=True,
                            validation_split=0.1, verbose = 0,
                            callbacks=[mn.monitor(), cb.EarlyStopping(
                            monitor='val_loss', patience=25,  mode='auto')])
            autoencoder.save(os.path.join(io.DeepLearningRoot(), path + '/denoisedAE.h5'))
            del sourceX
            plt.close('all')
        
        return autoencoder
def trainClassifier(trainSample, mode = 'None', i = 0,
                    hiddenLayersSizes = [12, 6, 3],
                    activation = 'softplus', l2_penalty = 1e-4,
                    path = 'None'):
    # Remove unlabeled cells for training.
    x_train = trainSample.X[trainSample.y != 0]
    y_train = trainSample.y[trainSample.y != 0]
    
    # Labels start from 0.
    y_train = np.int_(y_train) - 1

    
    # Special case in GvHD: label in those files are 0,1,3,4 with no 2.
    if mode == 'GvHD' and (i == 5 or i == 9 or 
                           i == 10 or i == 11):
        y_train[y_train != 0] = y_train[y_train != 0] - 1

    # Expand labels, to work with sparse categorical cross entropy.
    y_train = np.expand_dims(y_train, -1)
    
    # Construct a feed-forward neural network.
    inputLayer = Input(shape = (x_train.shape[1],))
    hidden1 = Dense(hiddenLayersSizes[0], activation = activation,
                    kernel_regularizer = l2(l2_penalty))(inputLayer)
    hidden2 = Dense(hiddenLayersSizes[1], activation = activation,
                    kernel_regularizer = l2(l2_penalty))(hidden1)
    hidden3 = Dense(hiddenLayersSizes[2], activation = activation,
                    kernel_regularizer = l2(l2_penalty))(hidden2)
#    numClasses = len(np.unique(trainSample.y)) - 1   # with 0 class
    numClasses = len(np.unique(trainSample.y))       # without 0 class
#    numClasses = 57                                   # for HMIS-2
    outputLayer = Dense(numClasses, activation = 'softmax')(hidden3)
    
    encoder = Model(inputs = inputLayer, outputs = outputLayer)
    net = Model(inputs = inputLayer, outputs = outputLayer)
    lrate = LearningRateScheduler(step_decay)
    optimizer = keras.optimizers.rmsprop(lr = 0.0)

    net.compile(optimizer = optimizer, 
                loss = 'sparse_categorical_crossentropy')
    net.fit(x_train, y_train, epochs = 80, batch_size = 128, shuffle = True,
            validation_split = 0.1, verbose = 0, 
            callbacks=[lrate, mn.monitor(),
            cb.EarlyStopping(monitor = 'val_loss',
                             patience = 25, mode = 'auto')])
    try:
        net.save(os.path.join(io.DeepLearningRoot(),
                              'savemodels/' + path + '/cellClassifier.h5'))
    except OSError:
        pass
    #plt.close('all')
    
    return net
Beispiel #3
0
def calibrate(target, source, sourceIndex, predLabel, path):

    mmdNetLayerSizes = [25, 25]
    l2_penalty = 1e-2
    #init = lambda shape:initializers.normal(shape, scale=.1e-4)
    space_dim = target.X.shape[1]

    calibInput = Input(shape=(space_dim, ))
    block1_bn1 = BatchNormalization()(calibInput)
    block1_a1 = Activation('relu')(block1_bn1)
    block1_w1 = Dense(mmdNetLayerSizes[0],
                      activation='linear',
                      W_regularizer=l2(l2_penalty),
                      init='random_uniform')(block1_a1)
    block1_bn2 = BatchNormalization()(block1_w1)
    block1_a2 = Activation('relu')(block1_bn2)
    block1_w2 = Dense(space_dim,
                      activation='linear',
                      W_regularizer=l2(l2_penalty),
                      init='random_uniform')(block1_a2)
    block1_output = Add()([block1_w2, calibInput])
    block2_bn1 = BatchNormalization()(block1_output)
    block2_a1 = Activation('relu')(block2_bn1)
    block2_w1 = Dense(mmdNetLayerSizes[1],
                      activation='linear',
                      W_regularizer=l2(l2_penalty),
                      init='random_uniform')(block2_a1)
    block2_bn2 = BatchNormalization()(block2_w1)
    block2_a2 = Activation('relu')(block2_bn2)
    block2_w2 = Dense(space_dim,
                      activation='linear',
                      W_regularizer=l2(l2_penalty),
                      init='random_uniform')(block2_a2)
    block2_output = Add()([block2_w2, block1_output])
    block3_bn1 = BatchNormalization()(block2_output)
    block3_a1 = Activation('relu')(block3_bn1)
    block3_w1 = Dense(mmdNetLayerSizes[1],
                      activation='linear',
                      W_regularizer=l2(l2_penalty),
                      init='random_uniform')(block3_a1)
    block3_bn2 = BatchNormalization()(block3_w1)
    block3_a2 = Activation('relu')(block3_bn2)
    block3_w2 = Dense(space_dim,
                      activation='linear',
                      W_regularizer=l2(l2_penalty),
                      init='random_uniform')(block3_a2)
    block3_output = Add()([block3_w2, block2_output])

    calibMMDNet = Model(input=calibInput, output=block3_output)

    n = target.X.shape[0]
    p = np.random.permutation(n)
    toTake = p[range(int(.2 * n))]
    targetXMMD = target.X[toTake]
    targetYMMD = target.y[toTake]

    targetXMMD = targetXMMD[targetYMMD != 0]
    targetYMMD = targetYMMD[targetYMMD != 0]

    targetYMMD = np.reshape(targetYMMD, (-1, 1))

    n = source.X.shape[0]
    p = np.random.permutation(n)
    toTake = p[range(int(.2 * n))]
    sourceXMMD = source.X[toTake]
    sourceYMMD = predLabel[toTake]

    sourceXMMD = sourceXMMD[sourceYMMD != 0]
    sourceYMMD = sourceYMMD[sourceYMMD != 0]

    sourceYMMD = np.reshape(sourceYMMD, (-1, 1))

    lrate = LearningRateScheduler(step_decay)
    optimizer = opt.rmsprop(lr=0.0)
    calibMMDNet.compile(
        optimizer=optimizer,
        loss=lambda y_true, y_pred: cf.MMD(
            block3_output, targetXMMD, MMDTargetValidation_split=0.1).
        KerasCost(y_true, y_pred))

    sourceLabels = np.zeros(sourceXMMD.shape[0])

    calibMMDNet.fit(sourceXMMD,
                    sourceLabels,
                    nb_epoch=500,
                    batch_size=1000,
                    validation_split=0.1,
                    verbose=0,
                    callbacks=[
                        lrate,
                        mn.monitorMMD(sourceXMMD, sourceYMMD, targetXMMD,
                                      targetYMMD, calibMMDNet.predict),
                        cb.EarlyStopping(monitor='val_loss',
                                         patience=20,
                                         mode='auto')
                    ])
    plt.close('all')
    calibMMDNet.save_weights(
        os.path.join(
            io.DeepLearningRoot(),
            'savemodels/' + path + '/ResNet' + str(sourceIndex) + '.h5'))
    calibrateSource = Sample(calibMMDNet.predict(source.X), source.y)
    calibMMDNet = None
    return calibrateSource
def plotHidden(trainSample, testSample, mode = 'None', i = 0,
                    hiddenLayersSizes = [12, 6, 3],
                    activation = 'softplus', l2_penalty = 1e-4,
                    path = 'None'):
    # Remove unlabeled cells for training.
    x_train = trainSample.X[trainSample.y != 0]
    y_train = trainSample.y[trainSample.y != 0]
    x_test = testSample.X[testSample.y != 0]
    y_test = testSample.y[testSample.y != 0]
    
    # Labels start from 0.
    y_train = np.int_(y_train) - 1
    y_test = np.int_(y_test) - 1

    
    # Special case in GvHD: label in those files are 0,1,3,4 with no 2.
    if mode == 'GvHD' and (i == 5 or i == 9 or 
                           i == 10 or i == 11):
        y_train[y_train != 0] = y_train[y_train != 0] - 1

    # Expand labels, to work with sparse categorical cross entropy.
    y_train = np.expand_dims(y_train, -1)
    y_test = np.expand_dims(y_test, -1)
    
    # Construct a feed-forward neural network.
    inputLayer = Input(shape = (x_train.shape[1],))
    hidden1 = Dense(hiddenLayersSizes[0], activation = activation,
                    W_regularizer = l2(l2_penalty))(inputLayer)
    hidden2 = Dense(hiddenLayersSizes[1], activation = activation,
                    W_regularizer = l2(l2_penalty))(hidden1)
    hidden3 = Dense(hiddenLayersSizes[2], activation = activation,
                    W_regularizer = l2(l2_penalty))(hidden2)
    numClasses = len(np.unique(trainSample.y)) - 1
    outputLayer = Dense(numClasses, activation = 'softmax')(hidden3)
    
    encoder = Model(input = inputLayer, output = hidden3)
    # plot data in the 3rd hidden layer
    h3_data = encoder.predict(x_test, verbose = 0)
    #fig, (ax1) = plt1.subplots(1,1, subplot_kw={'projection':'3d'})
    #ax1.scatter(h3_data[:,0], h3_data[:,1], h3_data[:,2], s = 20, c = np.squeeze(y_test))
    
    fig = plt1.figure()
    ax = fig.add_subplot(111, projection = '3d')
    ax.scatter(h3_data[:,0], h3_data[:,1], h3_data[:,2], s = 20, c = np.squeeze(y_test))
    #ax1.set_title('data in 3rd hidden layer')
    plt1.show()
    
    net = Model(input = inputLayer, output = outputLayer)
    lrate = LearningRateScheduler(step_decay)
    optimizer = keras.optimizers.rmsprop(lr = 0.0)

    net.compile(optimizer = optimizer, 
                loss = 'sparse_categorical_crossentropy')
    net.fit(x_train, y_train, nb_epoch = 80, batch_size = 128, shuffle = True,
            validation_split = 0.1, verbose = 0, 
            callbacks=[lrate, mn.monitor(),
            cb.EarlyStopping(monitor = 'val_loss',
                             patience = 25, mode = 'auto')])
    try:
        net.save(os.path.join(io.DeepLearningRoot(),
                              'savemodels/' + path + '/cellClassifier.h5'))
    except OSError:
        pass