Ejemplo n.º 1
0
def trainDAE(target, dataPath, refSampleInd, trainIndex, relevantMarkers, mode,
             keepProb, denoise, loadModel, path):
    sourceX = []
    for i in np.arange(trainIndex.size-1):
        sourceIndex = np.delete(trainIndex, refSampleInd)[i]
        source = dh.loadDeepCyTOFData(dataPath, sourceIndex,
                                      relevantMarkers, mode)
        numZerosOK=1
        toKeepS = np.sum((source.X==0), axis = 1) <= numZerosOK
        if i == 0:
            sourceX = source.X[toKeepS]
        else:
            sourceX = np.concatenate([sourceX, source.X[toKeepS]], axis = 0)
        
    # preProcess source
    sourceX = np.log(1 + np.abs(sourceX))
    
    numZerosOK=1
    toKeepT = np.sum((target.X==0), axis = 1) <= numZerosOK
    
    inputDim = target.X.shape[1]
    
    ae_encodingDim = 25
    l2_penalty_ae = 1e-2
    
    if denoise:
        if loadModel:
            from keras.models import load_model
            autoencoder = load_model(os.path.join(io.DeepLearningRoot(),
                                                  'savemodels/' + path + '/denoisedAE.h5'))
        else:
            # train de-noising auto encoder and save it.
            trainTarget_ae = np.concatenate([sourceX, target.X[toKeepT]],
                                            axis=0)
            trainData_ae = trainTarget_ae * np.random.binomial(n=1, p=keepProb,
                                                size = trainTarget_ae.shape)
        
            input_cell = Input(shape=(inputDim,))
            encoded = Dense(ae_encodingDim, activation='relu',
                            W_regularizer=l2(l2_penalty_ae))(input_cell)
            encoded1 = Dense(ae_encodingDim, activation='relu',
                             W_regularizer=l2(l2_penalty_ae))(encoded)
            decoded = Dense(inputDim, activation='linear',
                            W_regularizer=l2(l2_penalty_ae))(encoded1)
        
            autoencoder = Model(input=input_cell, output=decoded)
            autoencoder.compile(optimizer='rmsprop', loss='mse')
            autoencoder.fit(trainData_ae, trainTarget_ae, nb_epoch=80,
                            batch_size=128, shuffle=True,
                            validation_split=0.1, verbose = 0,
                            callbacks=[mn.monitor(), cb.EarlyStopping(
                            monitor='val_loss', patience=25,  mode='auto')])
            autoencoder.save(os.path.join(io.DeepLearningRoot(),
                                          'savemodels/' + path + '/denoisedAE.h5'))
            del sourceX
            plt.close('all')
        
        return autoencoder
Ejemplo n.º 2
0
def loadModel(target, source, sourceIndex, predLabel, path):
    mmdNetLayerSizes = [25, 25]
    l2_penalty = 1e-2
    init = lambda shape, name: initializers.normal(
        shape, scale=.1e-4, name=name)
    space_dim = target.X.shape[1]

    calibInput = Input(shape=(space_dim, ))
    block1_bn1 = BatchNormalization()(calibInput)
    block1_a1 = Activation('relu')(block1_bn1)
    block1_w1 = Dense(mmdNetLayerSizes[0],
                      activation='linear',
                      W_regularizer=l2(l2_penalty),
                      init=init)(block1_a1)
    block1_bn2 = BatchNormalization()(block1_w1)
    block1_a2 = Activation('relu')(block1_bn2)
    block1_w2 = Dense(space_dim,
                      activation='linear',
                      W_regularizer=l2(l2_penalty),
                      init=init)(block1_a2)
    block1_output = merge([block1_w2, calibInput], mode='sum')
    block2_bn1 = BatchNormalization()(block1_output)
    block2_a1 = Activation('relu')(block2_bn1)
    block2_w1 = Dense(mmdNetLayerSizes[1],
                      activation='linear',
                      W_regularizer=l2(l2_penalty),
                      init=init)(block2_a1)
    block2_bn2 = BatchNormalization()(block2_w1)
    block2_a2 = Activation('relu')(block2_bn2)
    block2_w2 = Dense(space_dim,
                      activation='linear',
                      W_regularizer=l2(l2_penalty),
                      init=init)(block2_a2)
    block2_output = merge([block2_w2, block1_output], mode='sum')
    block3_bn1 = BatchNormalization()(block2_output)
    block3_a1 = Activation('relu')(block3_bn1)
    block3_w1 = Dense(mmdNetLayerSizes[1],
                      activation='linear',
                      W_regularizer=l2(l2_penalty),
                      init=init)(block3_a1)
    block3_bn2 = BatchNormalization()(block3_w1)
    block3_a2 = Activation('relu')(block3_bn2)
    block3_w2 = Dense(space_dim,
                      activation='linear',
                      W_regularizer=l2(l2_penalty),
                      init=init)(block3_a2)
    block3_output = merge([block3_w2, block2_output], mode='sum')

    calibMMDNet = Model(input=calibInput, output=block3_output)

    calibMMDNet.load_weights(
        os.path.join(
            io.DeepLearningRoot(),
            'savemodels/' + path + '/ResNet' + str(sourceIndex) + '.h5'))

    return calibMMDNet
Ejemplo n.º 3
0
def loadDeepCyTOFData(dataPath,
                      dataIndex,
                      relevantMarkers,
                      mode,
                      skip_header=0):
    if mode == 'CSV.GZ':
        data_filename = dataPath + "/" + str(
            dataIndex)  # I'm just going to give it the file name
        X = pd.read_csv(os.path.join(io.DeepLearningRoot(),
                                     data_filename)).to_numpy()
        # print(np.shape(X))
        actual = pd.read_csv(
            os.path.join(io.DeepLearningRoot(),
                         data_filename.replace("/x/", "/y/")))
        labels = pd.DataFrame([0] * len(actual))
        for aci in range(len(actual.columns)):
            labels[actual[actual.columns[aci]] == 1] = aci + 1
        labels = [
            item for sublist in labels.values.tolist() for item in sublist
        ]

    else:
        if mode == 'CSV':
            data_filename = dataPath + '/sample' + str(dataIndex) + '.csv'
            X = genfromtxt(os.path.join(io.DeepLearningRoot(), data_filename),
                           delimiter=',',
                           skip_header=skip_header)
        if mode == 'FCS':
            data_filename = dataPath + '/sample' + str(dataIndex) + '.fcs'
            _, X = fcsparser.parse(os.path.join(io.DeepLearningRoot(),
                                                data_filename),
                                   reformat_meta=True)
            X = X.as_matrix()
        label_filename = dataPath + '/labels' + str(dataIndex) + '.csv'
        labels = genfromtxt(os.path.join(io.DeepLearningRoot(),
                                         label_filename),
                            delimiter=',')
    labels = np.int_(labels)

    X = X[:, relevantMarkers]
    sample = Sample(X, labels)

    return sample
def trainClassifier(trainSample, mode = 'None', i = 0,
                    hiddenLayersSizes = [12, 6, 3],
                    activation = 'softplus', l2_penalty = 1e-4,
                    path = 'None'):
    # Remove unlabeled cells for training.
    x_train = trainSample.X[trainSample.y != 0]
    y_train = trainSample.y[trainSample.y != 0]
    
    # Labels start from 0.
    y_train = np.int_(y_train) - 1

    
    # Special case in GvHD: label in those files are 0,1,3,4 with no 2.
    if mode == 'GvHD' and (i == 5 or i == 9 or 
                           i == 10 or i == 11):
        y_train[y_train != 0] = y_train[y_train != 0] - 1

    # Expand labels, to work with sparse categorical cross entropy.
    y_train = np.expand_dims(y_train, -1)
    
    # Construct a feed-forward neural network.
    inputLayer = Input(shape = (x_train.shape[1],))
    hidden1 = Dense(hiddenLayersSizes[0], activation = activation,
                    kernel_regularizer = l2(l2_penalty))(inputLayer)
    hidden2 = Dense(hiddenLayersSizes[1], activation = activation,
                    kernel_regularizer = l2(l2_penalty))(hidden1)
    hidden3 = Dense(hiddenLayersSizes[2], activation = activation,
                    kernel_regularizer = l2(l2_penalty))(hidden2)
#    numClasses = len(np.unique(trainSample.y)) - 1   # with 0 class
    numClasses = len(np.unique(trainSample.y))       # without 0 class
#    numClasses = 57                                   # for HMIS-2
    outputLayer = Dense(numClasses, activation = 'softmax')(hidden3)
    
    encoder = Model(inputs = inputLayer, outputs = outputLayer)
    net = Model(inputs = inputLayer, outputs = outputLayer)
    lrate = LearningRateScheduler(step_decay)
    optimizer = keras.optimizers.rmsprop(lr = 0.0)

    net.compile(optimizer = optimizer, 
                loss = 'sparse_categorical_crossentropy')
    net.fit(x_train, y_train, epochs = 80, batch_size = 128, shuffle = True,
            validation_split = 0.1, verbose = 0, 
            callbacks=[lrate, mn.monitor(),
            cb.EarlyStopping(monitor = 'val_loss',
                             patience = 25, mode = 'auto')])
    try:
        net.save(os.path.join(io.DeepLearningRoot(),
                              'savemodels/' + path + '/cellClassifier.h5'))
    except OSError:
        pass
    #plt.close('all')
    
    return net
Ejemplo n.º 5
0
def loadDeepCyTOFData(dataPath,
                      dataIndex,
                      relevantMarkers,
                      mode,
                      skip_header=0):
    if mode == 'CSV':
        data_filename = dataPath + '/sample' + str(dataIndex) + '.csv'
        X = genfromtxt(os.path.join(io.DeepLearningRoot(), data_filename),
                       delimiter=',',
                       skip_header=skip_header)
    if mode == 'FCS':
        data_filename = dataPath + '/sample' + str(dataIndex) + '.fcs'
        _, X = fcsparser.parse(os.path.join(io.DeepLearningRoot(),
                                            data_filename),
                               reformat_meta=True)
        X = X.as_matrix()
    X = X[:, relevantMarkers]
    label_filename = dataPath + '/labels' + str(dataIndex) + '.csv'
    labels = genfromtxt(os.path.join(io.DeepLearningRoot(), label_filename),
                        delimiter=',')
    labels = np.int_(labels)
    sample = Sample(X, labels)

    return sample
Ejemplo n.º 6
0
print('Train the de-noising auto encoder.')
start = tm.time()
DAE = dae.trainDAE(target, dataPath, refSampleInd, trainIndex,
                             relevantMarkers, mode, keepProb, denoise,
                             loadModel, dataSet[choice])
denoiseTarget = dae.predictDAE(target, DAE, denoise)

'''
Train the feed-forward classifier on (de-noised) target.
'''
denoiseTarget, preprocessor = dh.standard_scale(denoiseTarget,
                                                preprocessor = None)

if loadModel:
    from keras.models import load_model
    cellClassifier = load_model(os.path.join(io.DeepLearningRoot(),
                                'savemodels/' + dataSet[choice] +
                                '/cellClassifier.h5'))
else:
    print('Train the classifier on de-noised Target')
    cellClassifier = net.trainClassifier(denoiseTarget, mode, refSampleInd,
                                         hiddenLayersSizes,
                                         activation,
                                         l2_penalty,
                                         dataSet[choice])
end = tm.time()
print('Training time: ' + str(end - start))
    
'''
Test the performance with and without calibration.
'''
Ejemplo n.º 7
0
def calibrate(target, source, sourceIndex, predLabel, path):
    
    mmdNetLayerSizes = [25, 25]
    l2_penalty = 1e-2
    init = lambda shape, name:initializations.normal(shape,
                                                     scale=.1e-4, name=name)
    space_dim = target.X.shape[1]
    
    calibInput = Input(shape=(space_dim,))
    block1_bn1 = BatchNormalization()(calibInput)
    block1_a1 = Activation('relu')(block1_bn1)
    block1_w1 = Dense(mmdNetLayerSizes[0], activation='linear',
                      W_regularizer=l2(l2_penalty), init = init)(block1_a1) 
    block1_bn2 = BatchNormalization()(block1_w1)
    block1_a2 = Activation('relu')(block1_bn2)
    block1_w2 = Dense(space_dim, activation='linear',
                      W_regularizer=l2(l2_penalty), init = init)(block1_a2) 
    block1_output = merge([block1_w2, calibInput], mode = 'sum')
    block2_bn1 = BatchNormalization()(block1_output)
    block2_a1 = Activation('relu')(block2_bn1)
    block2_w1 = Dense(mmdNetLayerSizes[1], activation='linear',
                      W_regularizer=l2(l2_penalty), init = init)(block2_a1) 
    block2_bn2 = BatchNormalization()(block2_w1)
    block2_a2 = Activation('relu')(block2_bn2)
    block2_w2 = Dense(space_dim, activation='linear',
                      W_regularizer=l2(l2_penalty), init = init)(block2_a2) 
    block2_output = merge([block2_w2, block1_output], mode = 'sum')
    block3_bn1 = BatchNormalization()(block2_output)
    block3_a1 = Activation('relu')(block3_bn1)
    block3_w1 = Dense(mmdNetLayerSizes[1], activation='linear',
                      W_regularizer=l2(l2_penalty), init = init)(block3_a1) 
    block3_bn2 = BatchNormalization()(block3_w1)
    block3_a2 = Activation('relu')(block3_bn2)
    block3_w2 = Dense(space_dim, activation='linear',
                      W_regularizer=l2(l2_penalty), init = init)(block3_a2) 
    block3_output = merge([block3_w2, block2_output], mode = 'sum')
    
    calibMMDNet = Model(input=calibInput, output=block3_output)

    n = target.X.shape[0]
    p = np.random.permutation(n)
    toTake = p[range(int(.2*n))] 
    targetXMMD = target.X[toTake]
    targetYMMD = target.y[toTake]
    
    targetXMMD = targetXMMD[targetYMMD!=0]
    targetYMMD = targetYMMD[targetYMMD!=0]
    
    targetYMMD = np.reshape(targetYMMD, (-1, 1))

    n = source.X.shape[0]
    p = np.random.permutation(n)
    toTake = p[range(int(.2*n))] 
    sourceXMMD = source.X[toTake]
    sourceYMMD = predLabel[toTake]
    
    sourceXMMD = sourceXMMD[sourceYMMD!=0]
    sourceYMMD = sourceYMMD[sourceYMMD!=0]
    
    sourceYMMD = np.reshape(sourceYMMD, (-1, 1))

    lrate = LearningRateScheduler(step_decay)
    optimizer = opt.rmsprop(lr=0.0)
    calibMMDNet.compile(optimizer = optimizer, loss = lambda y_true,y_pred: 
       cf.MMD(block3_output, targetXMMD, 
            MMDTargetValidation_split = 0.1).KerasCost(y_true,y_pred))

    sourceLabels = np.zeros(sourceXMMD.shape[0])

    calibMMDNet.fit(sourceXMMD,sourceLabels,nb_epoch=500,
            batch_size=1000,validation_split=0.1,verbose=0,
            callbacks=[lrate,mn.monitorMMD(sourceXMMD, sourceYMMD, targetXMMD,
                                           targetYMMD, calibMMDNet.predict),
              cb.EarlyStopping(monitor='val_loss',patience=20,mode='auto')])
    plt.close('all')
    calibMMDNet.save_weights(os.path.join(io.DeepLearningRoot(),
                                          'savemodels/' + path + '/ResNet'+ str(sourceIndex)+'.h5'))
    calibrateSource = Sample(calibMMDNet.predict(source.X),
                                             source.y)
    calibMMDNet = None
    return calibrateSource
def plotHidden(trainSample, testSample, mode = 'None', i = 0,
                    hiddenLayersSizes = [12, 6, 3],
                    activation = 'softplus', l2_penalty = 1e-4,
                    path = 'None'):
    # Remove unlabeled cells for training.
    x_train = trainSample.X[trainSample.y != 0]
    y_train = trainSample.y[trainSample.y != 0]
    x_test = testSample.X[testSample.y != 0]
    y_test = testSample.y[testSample.y != 0]
    
    # Labels start from 0.
    y_train = np.int_(y_train) - 1
    y_test = np.int_(y_test) - 1

    
    # Special case in GvHD: label in those files are 0,1,3,4 with no 2.
    if mode == 'GvHD' and (i == 5 or i == 9 or 
                           i == 10 or i == 11):
        y_train[y_train != 0] = y_train[y_train != 0] - 1

    # Expand labels, to work with sparse categorical cross entropy.
    y_train = np.expand_dims(y_train, -1)
    y_test = np.expand_dims(y_test, -1)
    
    # Construct a feed-forward neural network.
    inputLayer = Input(shape = (x_train.shape[1],))
    hidden1 = Dense(hiddenLayersSizes[0], activation = activation,
                    W_regularizer = l2(l2_penalty))(inputLayer)
    hidden2 = Dense(hiddenLayersSizes[1], activation = activation,
                    W_regularizer = l2(l2_penalty))(hidden1)
    hidden3 = Dense(hiddenLayersSizes[2], activation = activation,
                    W_regularizer = l2(l2_penalty))(hidden2)
    numClasses = len(np.unique(trainSample.y)) - 1
    outputLayer = Dense(numClasses, activation = 'softmax')(hidden3)
    
    encoder = Model(input = inputLayer, output = hidden3)
    # plot data in the 3rd hidden layer
    h3_data = encoder.predict(x_test, verbose = 0)
    #fig, (ax1) = plt1.subplots(1,1, subplot_kw={'projection':'3d'})
    #ax1.scatter(h3_data[:,0], h3_data[:,1], h3_data[:,2], s = 20, c = np.squeeze(y_test))
    
    fig = plt1.figure()
    ax = fig.add_subplot(111, projection = '3d')
    ax.scatter(h3_data[:,0], h3_data[:,1], h3_data[:,2], s = 20, c = np.squeeze(y_test))
    #ax1.set_title('data in 3rd hidden layer')
    plt1.show()
    
    net = Model(input = inputLayer, output = outputLayer)
    lrate = LearningRateScheduler(step_decay)
    optimizer = keras.optimizers.rmsprop(lr = 0.0)

    net.compile(optimizer = optimizer, 
                loss = 'sparse_categorical_crossentropy')
    net.fit(x_train, y_train, nb_epoch = 80, batch_size = 128, shuffle = True,
            validation_split = 0.1, verbose = 0, 
            callbacks=[lrate, mn.monitor(),
            cb.EarlyStopping(monitor = 'val_loss',
                             patience = 25, mode = 'auto')])
    try:
        net.save(os.path.join(io.DeepLearningRoot(),
                              'savemodels/' + path + '/cellClassifier.h5'))
    except OSError:
        pass
Ejemplo n.º 9
0
Train the de-noising auto encoder.
'''
print('Train the de-noising auto encoder.')
DAE = dae.trainDAE(target, dataPath, refSampleInd, trainIndex, relevantMarkers,
                   mode, keepProb, denoise, loadModel, dataSet[choice])
denoiseTarget = dae.predictDAE(target, DAE, denoise)
'''
Train the feed-forward classifier on (de-noised) target.
'''
denoiseTarget, preprocessor = dh.standard_scale(denoiseTarget,
                                                preprocessor=None)

if loadModel:
    from keras.models import load_model
    cellClassifier = load_model(
        os.path.join(io.DeepLearningRoot(),
                     'savemodels/' + dataSet[choice] + '/cellClassifier.h5'))
else:
    print('Train the classifier on de-noised Target')
    cellClassifier = net.trainClassifier(denoiseTarget, mode, refSampleInd,
                                         hiddenLayersSizes, activation,
                                         l2_penalty, dataSet[choice])
'''
Test the performance with and without calibration.
'''
# Generate the output table.
dim = 2 if isCalibrate else 1
acc = np.zeros((testIndex.size, dim), np.float16)
F1 = np.zeros((testIndex.size, dim), np.float16)
mmd_before = np.zeros(testIndex.size)
mmd_after = np.zeros(testIndex.size)
Ejemplo n.º 10
0
activation = 'softplus'
l2_penalty = 1e-4
'''
The user needs to specify the data set to run the cell classifier.

Make your choice here - an integer from 0 to 4.
0: NDD
1: CFSE
2: StemCell
3: Lymph
4: GvHD
'''
choice = 4

# Generate the path of the chosen data set.
dataPath = os.path.join(io.DeepLearningRoot(), 'Data/FlowCAP-I/',
                        dataSet[choice])

# Generate the output table.
acc = np.zeros(numSample[choice])
F1 = np.zeros(numSample[choice])
'''
For each single sample of the chosen data set, train a feed-forward neural
net classifier using 25% of cells, and test the performance using the rest
75% of cells.
'''
print('Data set name: ', dataSet[choice])
for i in range(numSample[choice]):
    # Load sample.
    print('Load sample ', str(i + 1))
    sample = dh.loadDeepCyTOFData(dataPath,