def trainDAE(target, dataPath, refSampleInd, trainIndex, relevantMarkers, mode, keepProb, denoise, loadModel, path): sourceX = [] for i in np.arange(trainIndex.size-1): sourceIndex = np.delete(trainIndex, refSampleInd)[i] source = dh.loadDeepCyTOFData(dataPath, sourceIndex, relevantMarkers, mode) numZerosOK=1 toKeepS = np.sum((source.X==0), axis = 1) <= numZerosOK if i == 0: sourceX = source.X[toKeepS] else: sourceX = np.concatenate([sourceX, source.X[toKeepS]], axis = 0) # preProcess source sourceX = np.log(1 + np.abs(sourceX)) numZerosOK=1 toKeepT = np.sum((target.X==0), axis = 1) <= numZerosOK inputDim = target.X.shape[1] ae_encodingDim = 25 l2_penalty_ae = 1e-2 if denoise: if loadModel: from keras.models import load_model autoencoder = load_model(os.path.join(io.DeepLearningRoot(), 'savemodels/' + path + '/denoisedAE.h5')) else: # train de-noising auto encoder and save it. trainTarget_ae = np.concatenate([sourceX, target.X[toKeepT]], axis=0) trainData_ae = trainTarget_ae * np.random.binomial(n=1, p=keepProb, size = trainTarget_ae.shape) input_cell = Input(shape=(inputDim,)) encoded = Dense(ae_encodingDim, activation='relu', W_regularizer=l2(l2_penalty_ae))(input_cell) encoded1 = Dense(ae_encodingDim, activation='relu', W_regularizer=l2(l2_penalty_ae))(encoded) decoded = Dense(inputDim, activation='linear', W_regularizer=l2(l2_penalty_ae))(encoded1) autoencoder = Model(input=input_cell, output=decoded) autoencoder.compile(optimizer='rmsprop', loss='mse') autoencoder.fit(trainData_ae, trainTarget_ae, nb_epoch=80, batch_size=128, shuffle=True, validation_split=0.1, verbose = 0, callbacks=[mn.monitor(), cb.EarlyStopping( monitor='val_loss', patience=25, mode='auto')]) autoencoder.save(os.path.join(io.DeepLearningRoot(), 'savemodels/' + path + '/denoisedAE.h5')) del sourceX plt.close('all') return autoencoder
testIndex = dataIndex trainIndex = dataIndex relevantMarkers = np.asarray([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18, 19,20,21,22,23,24,25,26,27,28])-1 mode = 'CSV' numClasses = 57 keepProb = .8 ''' Choose the reference sample. ''' print('Choose the reference sample between ' + str(trainIndex)) start = tm.time() refSampleInd = dh.chooseReferenceSample(dataPath, trainIndex, relevantMarkers, mode, choice) end = tm.time() print('reference sample selection time: ' + str(end - start)) print('Load the target ' + str(trainIndex[refSampleInd])) target = dh.loadDeepCyTOFData(dataPath, trainIndex[refSampleInd], relevantMarkers, mode) # Pre-process sample. if choice!=5: target = dh.preProcessSamplesCyTOFData(target) ''' Train the de-noising auto encoder. '''
# Generate the output table. acc = np.zeros(numSample[choice]) F1 = np.zeros(numSample[choice]) ''' For each single sample of the chosen data set, train a feed-forward neural net classifier using 25% of cells, and test the performance using the rest 75% of cells. ''' print('Data set name: ', dataSet[choice]) for i in range(numSample[choice]): # Load sample. print('Load sample ', str(i + 1)) sample = dh.loadDeepCyTOFData(dataPath, i + 1, range(relevantMarkers[choice]), 'CSV', skip_header=1) # Pre-process sample. print('Pre-process sample ', str(i + 1)) sample = dh.preProcessSamplesCyTOFData(sample) sample, preprocessor = dh.standard_scale(sample, preprocessor=None) # Split data into training and testing. print('Split data into training and testing.') trainSample, testSample = dh.splitData(sample, test_size=.75) # Train a feed-forward neural net classifier on the training data. print('Train a feed-forward neural net classifier on the training data.') classifier = net.trainClassifier(trainSample,