def pretrain_sda_cg(sda, train_names, read_window, read_algo, read_rank, window_size, pretraining_epochs, corruption_levels): ## Pre-train layer-wise print '... getting the pretraining functions' import scipy.optimize for i in xrange(sda.n_layers): train_reader = ICHISeqDataReader(train_names) n_train_patients = len(train_names) for patients in xrange(n_train_patients): train_set_x, train_set_y = train_reader.read_next_doc( algo=read_algo, window=read_window, rank=read_rank) pretraining_fn, pretraining_update = pretraining_functions_sda_cg( sda=sda, train_set_x=train_set_x, window_size=window_size, corruption_levels=corruption_levels) print '... pre-training the model' # using scipy conjugate gradient optimizer print("Optimizing using scipy.optimize.fmin_cg...") best_w_b = scipy.optimize.fmin_cg( f=partial(pretraining_fn, da_index=i), x0=numpy.zeros((sda.dA_layers[i].n_visible + 1) * sda.dA_layers[i].n_hidden, dtype=sda.dA_layers[i].input.dtype), fprime=partial(pretraining_update, da_index=i), maxiter=pretraining_epochs) return sda
def pretrain_sda_cg(sda, train_names, read_window, read_algo, read_rank, window_size, pretraining_epochs, corruption_levels): ## Pre-train layer-wise print '... getting the pretraining functions' import scipy.optimize for i in xrange(sda.n_layers): train_reader = ICHISeqDataReader(train_names) n_train_patients = len(train_names) for patients in xrange(n_train_patients): train_set_x, train_set_y = train_reader.read_next_doc( algo = read_algo, window = read_window, rank = read_rank ) pretraining_fn, pretraining_update = pretraining_functions_sda_cg( sda=sda, train_set_x=train_set_x, window_size=window_size, corruption_levels=corruption_levels ) print '... pre-training the model' # using scipy conjugate gradient optimizer print ("Optimizing using scipy.optimize.fmin_cg...") best_w_b = scipy.optimize.fmin_cg( f=partial(pretraining_fn, da_index = i), x0=numpy.zeros((sda.dA_layers[i].n_visible + 1) * sda.dA_layers[i].n_hidden, dtype=sda.dA_layers[i].input.dtype), fprime=partial(pretraining_update, da_index = i), maxiter=pretraining_epochs ) return sda
def test_hmm( gen_hmm, test_names, read_window, read_algo, read_rank ): test_reader = ICHISeqDataReader(test_names) n_test_patients = len(test_names) error_array = [] for i in xrange(n_test_patients): #get data divided on sequences with respect to labels test_x, test_y = test_reader.read_next_doc( algo = read_algo, rank = read_rank, window = read_window, divide = False ) #compute mean error value for one patient in test set patient_error = mean_error( gen_hmm = gen_hmm, obs_seq = test_x.get_value(), actual_states = test_y.eval() ) error_array.append(patient_error) print(patient_error, ' error for patient ' + str(test_names[i])) gc.collect() return error_array
def create_hmm(train_data_names, n_hidden, n_visible, read_algo, read_rank, read_window): train_reader = ICHISeqDataReader(train_data_names) n_train_patients = len(train_data_names) pi_values = numpy.zeros((n_hidden,)) a_values = numpy.zeros((n_hidden, n_hidden)) b_values = numpy.zeros((n_hidden, n_visible)) array_from_hidden = numpy.zeros((n_hidden,)) for train_patient in xrange(n_train_patients): # get data divided on sequences with respect to labels train_set_x, train_set_y = train_reader.read_next_doc(algo=read_algo, rank=read_rank, window=read_window) pi_values, a_values, b_values, array_from_hidden = update_params_on_patient( pi_values=pi_values, a_values=a_values, b_values=b_values, array_from_hidden=array_from_hidden, hiddens_patient=train_set_y.eval(), visibles_patient=train_set_x.eval(), n_hidden=n_hidden, ) gc.collect() pi_values, a_values, b_values = finish_training( pi_values=pi_values, a_values=a_values, b_values=b_values, array_from_hidden=array_from_hidden, n_hidden=n_hidden, n_patients=n_train_patients, ) # use standart model of hmm hmm_model = hmm.MultinomialHMM(n_components=n_hidden) hmm_model.startprob_ = pi_values hmm_model.transmat_ = a_values hmm_model.n_symbols = n_visible hmm_model.emissionprob_ = b_values gc.collect() return hmm_model
def test(hmm_model, valid_data, read_algo, read_window, read_rank, predict_algo): valid_reader = ICHISeqDataReader(valid_data) for valid_patient in valid_data: # get data divided on sequences with respect to labels valid_set_x, valid_set_y = valid_reader.read_next_doc(algo=read_algo, rank=read_rank, window=read_window) patient_error = get_error_on_patient( model=hmm_model, visible_set=valid_set_x.eval(), hidden_set=valid_set_y.eval(), algo=predict_algo, pat=valid_patient, all_labels=True, ) gc.collect() return patient_error
def validate_model(sda, valid_names, read_window, read_algo, read_rank, window_size): valid_reader = ICHISeqDataReader(valid_names) valid_errors = [] for i in xrange (len(valid_names)): valid_x, valid_y = valid_reader.read_next_doc( algo = read_algo, rank = read_rank, window = read_window, divide = False ) valid_x = valid_x.get_value() valid_y = valid_y.eval() n_valid_times = valid_x.shape[0] - window_size + 1 new_valid_x = numpy.array( [sda.get_da_output( valid_x[time: time + window_size] ).ravel() for time in xrange(n_valid_times)] ) half_window_size = int(window_size/2) new_valid_y = valid_y[ half_window_size: n_valid_times + half_window_size ] #compute mean error value for patients in validation set pat_error = mean_error( gen_hmm = sda.hmm1, obs_seq = new_valid_x, actual_states = new_valid_y ) valid_errors.append(pat_error) return numpy.mean(valid_errors)
def test_da_params(corruption_level): learning_rates = [0.001, 0.003, 0.005, 0.007, 0.009, 0.011, 0.013, 0.015] window_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] train_data = ['p10a','p011','p013','p014','p020','p022','p040','p045','p048'] valid_data = ['p09b','p023','p035','p038'] test_data = ['p09a','p033'] train_reader = ICHISeqDataReader(train_data) train_set, train_labels = train_reader.read_all() valid_reader = ICHISeqDataReader(valid_data) valid_set, valid_labels = valid_reader.read_all() test_reader = ICHISeqDataReader(test_data) test_set, test_labels = test_reader.read_all() output_folder=('[%s], [%s], [%s]')%(",".join(train_data), ",".join(valid_data), ",".join(test_data)) for lr in learning_rates: for ws in window_sizes: train_dA(learning_rate=lr, training_epochs=1, window_size = ws, corruption_level=corruption_level, n_hidden=ws*2, train_set=train_set, output_folder=output_folder)
def train(self, train_names, valid_names, read_window, read_algo, read_rank, train_epochs ): for epoch in xrange(train_epochs): train_reader = ICHISeqDataReader(train_names) n_train_patients = len(train_names) #train hmms on data of each pattient for train_patient in xrange(n_train_patients): #get data divided on sequences with respect to labels train_set = train_reader.read_next_doc( algo = read_algo, rank = read_rank, window = read_window, divide = True ) for label in xrange(self.n_hmms): train_for_fit = train_set[label].eval().reshape(-1, 1) if train_for_fit.shape[0] > self.hmm_models[label].n_components: self.hmm_models[label].fit( numpy.array(train_for_fit) ) self.isFitted[label] = True error_cur_epoch = self.validate_model( valid_names = valid_names, read_window = read_window, read_algo = read_algo, read_rank = read_rank ) self.valid_error_array.append([]) self.valid_error_array[-1].append(epoch) self.valid_error_array[-1].append(train_patient) self.valid_error_array[-1].append(error_cur_epoch) gc.collect()
def test_log_reg(test_names, read_algo, read_window, read_rank, classifier, window_size=1): test_reader = ICHISeqDataReader(test_names) index = T.lscalar() y = T.iscalar('y') test_error_array = [] for pat_num in xrange(len(test_names)): test_set_x, test_set_y = test_reader.read_next_doc( algo = read_algo, window = read_window, rank = read_rank ) n_test_samples = test_set_x.get_value(borrow=True).shape[0] - window_size + 1 # compiling a Theano function that computes the mistakes that are made by # the model on a row test_model = theano.function( inputs=[index], outputs=[classifier.errors(y), classifier.predict(), y], givens={ classifier.x: test_set_x[index: index + window_size], y: test_set_y[index + window_size - 1] } ) test_result = [test_model(i) for i in xrange(n_test_samples)] test_result = numpy.asarray(test_result) test_losses = test_result[:,0] test_score = float(numpy.mean(test_losses))*100 test_error_array.append(test_score) return test_error_array
def train_separately(): all_train = ['p002','p003','p005','p007','p08a','p08b','p09a','p09b', 'p10a','p011','p012','p013','p014','p15a','p15b','p016', 'p017','p018','p019','p020','p021','p022','p023','p025', 'p026','p027','p028','p029','p030','p031','p032','p033', 'p034','p035','p036','p037','p038','p040','p042','p043', 'p044','p045','p047','p048','p049','p050','p051'] valid_data = all_train valid_reader = ICHISeqDataReader(valid_data) for valid_patient in valid_data: #get data divided on sequences with respect to labels valid_set_x, valid_set_y = valid_reader.read_next_doc() patient_error = get_error_on_patient( hidden_set=valid_set_y.eval() ) print(patient_error, ' error for patient ' + valid_patient) gc.collect()
def validate_model(self, valid_names, read_window, read_algo, read_rank ): valid_reader = ICHISeqDataReader(valid_names) valid_errors = [] for i in xrange (len(valid_names)): valid_x, valid_y = valid_reader.read_next_doc( algo = read_algo, rank = read_rank, window = read_window, divide = False ) #compute mean error value for patients in validation set pat_error = mean_error( gen_hmm = self, obs_seq = valid_x.get_value(), actual_states = valid_y.eval() ) valid_errors.append(pat_error) return numpy.mean(valid_errors)
def finetune_hmm1(sda, n_hiddens, n_hmms, train_names, valid_names, global_epochs, read_rank, read_window, read_algo, window_size, posttrain_algo, posttrain_rank, posttrain_window): # set hmm1 layer on sda sda.set_hmm1( hmm1 = GeneralHMM( n_hiddens = n_hiddens, n_hmms = n_hmms ) ) for epoch in xrange(global_epochs): train_reader = ICHISeqDataReader(train_names) n_train_patients = len(train_names) #train hmms on data of each patient for train_patient in xrange(n_train_patients): #get data divided on sequences with respect to labels train_set = train_reader.read_next_doc( algo = read_algo, rank = read_rank, window = read_window, divide = True ) for label in xrange(n_hmms): set_for_label = set(train_set[label].eval()) if set_for_label != []: n_train_times = len(set_for_label) - window_size + 1 train_after_sda = numpy.array( [sda.get_da_output( set_for_label[time: time + window_size] ).ravel() for time in xrange(n_train_times)] ) if train_after_sda != []: sda.hmm1.hmm_models[label].fit( [numpy.array(train_after_sda).reshape((-1, 1))] ) error_cur_epoch = validate_model( sda = sda, valid_names = valid_names, read_window = read_window, read_algo = read_algo, read_rank = read_rank, window_size = window_size ) sda.hmm1.valid_error_array.append([]) sda.hmm1.valid_error_array[-1].append( epoch*n_train_patients + train_patient ) sda.hmm1.valid_error_array[-1].append(error_cur_epoch) gc.collect() return sda
def train_logistic_sgd( read_algo, read_window, read_rank, learning_rate, n_epochs, train_names, valid_names, classifier, output_folder, base_folder, window_size=1 ): # read the datasets train_reader = ICHISeqDataReader(train_names) valid_reader = ICHISeqDataReader(valid_names) # early-stopping parameters patience_increase = 25 # wait this much longer when a new best is found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss = numpy.inf done_looping = False iter = 0 classifier.train_cost_array = [] classifier.train_error_array = [] classifier.valid_error_array = [] for pat_num in xrange (len(train_names)): pat_epoch = 0 # go through the training set train_set_x, train_set_y = train_reader.read_next_doc( algo = read_algo, window = read_window, rank = read_rank ) valid_set_x, valid_set_y = valid_reader.read_next_doc( algo = read_algo, window = read_window, rank = read_rank ) n_train_samples = train_set_x.get_value(borrow=True).shape[0] - window_size + 1 n_valid_samples = valid_set_x.get_value(borrow=True).shape[0] - window_size + 1 patience = n_train_samples*2 # look as this many examples regardless validation_frequency = patience / 4 train_model, validate_model = training_functions_log_reg_sgd( classifier = classifier, window_size = window_size ) done_looping = False while (pat_epoch < n_epochs) and (not done_looping): cur_train_cost =[] cur_train_error = [] for index in xrange(n_train_samples): sample_cost, sample_error, cur_pred, cur_actual = train_model( index = index, train_set_x = train_set_x.get_value(borrow=True), train_set_y = train_set_y.eval(), lr = learning_rate ) # iteration number iter = pat_epoch * n_train_samples + index cur_train_cost.append(sample_cost) cur_train_error.append(sample_error) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [] for i in xrange(n_valid_samples): validation_loss, cur_pred, cur_actual = validate_model( index = i, valid_set_x = valid_set_x.get_value(borrow=True), valid_set_y = valid_set_y.eval() ) validation_losses.append(validation_loss) this_validation_loss = float(numpy.mean(validation_losses))*100 classifier.valid_error_array.append([]) classifier.valid_error_array[-1].append(classifier.epoch + float(iter)/n_train_samples) classifier.valid_error_array[-1].append(this_validation_loss) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss if patience*4 <= iter: done_looping = True break classifier.train_cost_array.append([]) classifier.train_cost_array[-1].append(classifier.epoch + float(iter)/n_train_samples) classifier.train_cost_array[-1].append(float(numpy.mean(cur_train_cost))) cur_train_cost =[] classifier.train_error_array.append([]) classifier.train_error_array[-1].append(classifier.epoch + float(iter)/n_train_samples) classifier.train_error_array[-1].append(float(numpy.mean(cur_train_error)*100)) cur_train_error =[] classifier.epoch = classifier.epoch + 1 pat_epoch = pat_epoch + 1 gc.collect() return classifier
def finetune_log_layer_sgd( sda, train_names, valid_names, read_algo, read_window, read_rank, window_size, finetune_lr, global_epochs, pat_epochs, output_folder): ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing functions for the model train_fn, validate_model = build_finetune_functions( sda=sda, window_size=window_size, learning_rate=finetune_lr ) train_reader = ICHISeqDataReader(train_names) # early-stopping parameters patience_increase = 25 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_valid = numpy.inf cur_train_cost =[] cur_train_error = [] iter = 0 for global_epoch in xrange(global_epochs): for pat_num in xrange(len(train_names)): done_looping = False # go through the training set train_set_x, train_set_y = train_reader.read_next_doc( algo = read_algo, window = read_window, rank = read_rank ) n_train_samples = train_set_x.get_value(borrow=True).shape[0] - window_size + 1 patience = n_train_samples*2 # look as this many examples regardless validation_frequency = patience / 2 # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch pat_epoch = 0 while (pat_epoch < pat_epochs) and (not done_looping): for index in xrange(n_train_samples): sample_cost, sample_error, cur_pred, cur_actual = train_fn( index = index, train_set_x = train_set_x.get_value(borrow=True), train_set_y = train_set_y.eval() ) # iteration number iter = iter + 1 cur_train_cost.append(sample_cost) cur_train_error.append(sample_error) if (iter + 1) % validation_frequency == 0: valid_reader = ICHISeqDataReader(valid_names) valid_array = [] for valid_pat in xrange(len(valid_names)): valid_set_x, valid_set_y = valid_reader.read_next_doc( algo = read_algo, window = read_window, rank = read_rank ) n_valid_samples = valid_set_x.get_value(borrow=True).shape[0] - window_size + 1 validation_losses = [ validate_model( index = i, valid_set_x = valid_set_x.get_value(borrow=True), valid_set_y = valid_set_y.eval() ) for i in xrange(n_valid_samples) ] this_validation_loss = float(numpy.mean(validation_losses))*100 valid_array.append(this_validation_loss) valid_mean_error = numpy.mean(valid_array) sda.logLayer.valid_error_array.append([]) sda.logLayer.valid_error_array[-1].append(sda.logLayer.epoch + float(index)/n_train_samples) sda.logLayer.valid_error_array[-1].append(valid_mean_error) # if we got the best validation score until now if valid_mean_error < best_valid: #improve patience if loss improvement is good enough if this_validation_loss < best_valid * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_valid = valid_mean_error if patience*4 <= iter: done_looping = True break sda.logLayer.train_cost_array.append([]) sda.logLayer.train_cost_array[-1].append(sda.logLayer.epoch) sda.logLayer.train_cost_array[-1].append(numpy.mean(cur_train_cost)) cur_train_cost =[] sda.logLayer.train_error_array.append([]) sda.logLayer.train_error_array[-1].append(sda.logLayer.epoch) sda.logLayer.train_error_array[-1].append(numpy.mean(cur_train_error)*100) cur_train_error =[] sda.logLayer.epoch = sda.logLayer.epoch + 1 pat_epoch = pat_epoch + 1 gc.collect() visualize_finetuning( train_cost=sda.logLayer.train_cost_array, train_error=sda.logLayer.train_error_array, valid_error=sda.logLayer.valid_error_array, window_size=window_size, learning_rate=0, datasets_folder=output_folder, base_folder='finetune_log_reg' ) return sda
def pretrain_sda_sgd( sda, train_names, valid_names, read_window, read_algo, read_rank, window_size, pretrain_lr, corruption_levels, global_epochs, pat_epochs): # compute number of examples given in training set n_train_patients = len(train_names) pretraining_fns, valid_fns = pretraining_functions_sda_sgd(sda=sda, window_size=window_size) ## Pre-train layer-wise for i in xrange(sda.n_layers): cur_dA = sda.dA_layers[i] cur_dA.train_cost_array = [] iter = 0 for global_epoch in xrange(global_epochs): train_reader = ICHISeqDataReader(train_names) for patients in xrange(n_train_patients): # go through the training set train_set_x, train_set_y = train_reader.read_next_doc( algo = read_algo, window = read_window, rank = read_rank ) n_train_samples = train_set_x.get_value(borrow=True).shape[0] - window_size + 1 patience = n_train_samples*2 # look as this many examples regardless validation_frequency = patience / 2 # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch # go through pretraining epochs for pat_epoch in xrange(pat_epochs): cur_epoch_cost=[] for index in xrange(n_train_samples): # iteration number big_epoch = (global_epoch*n_train_patients + patients)*pat_epochs + pat_epoch iter = iter + 1 cur_epoch_cost.append(pretraining_fns[i](index=index, train_set = train_set_x.get_value(borrow=True), corruption=corruption_levels[i], lr=pretrain_lr)) # test on valid set if (iter + 1) % validation_frequency == 0: valid_reader = ICHISeqDataReader(valid_names) valid_array = [] for valid_pat in xrange(len(valid_names)): valid_set_x, valid_set_y = valid_reader.read_next_doc( algo = read_algo, window = read_window, rank = read_rank ) n_valid_samples = valid_set_x.get_value(borrow=True).shape[0] - window_size + 1 validation_losses = [ valid_fns[i]( index = index, valid_set = valid_set_x.get_value(borrow=True) ) for index in xrange(n_valid_samples)] this_validation_loss = float(numpy.mean(validation_losses))*100 valid_array.append(this_validation_loss) valid_mean_error = numpy.mean(valid_array) cur_dA.valid_error_array.append([]) cur_dA.valid_error_array[-1].append( big_epoch + float(index)/n_train_samples ) cur_dA.valid_error_array[-1].append(valid_mean_error) cur_dA.train_cost_array.append([]) cur_dA.train_cost_array[-1].append(big_epoch) cur_dA.train_cost_array[-1].append(numpy.mean(cur_epoch_cost)) gc.collect() return sda
def finetune_hmm2(sda, read_window, read_algo, read_rank, posttrain_rank, posttrain_algo, window_size, train_names): n_train_patients=len(train_names) n_visible = pow(10, posttrain_rank) + 2 - read_window #input of sda n_visible = n_visible - window_size + 1 #output of sda n_hidden = 7 posttrain_window = sda.da_layers_output_size train_reader = ICHISeqDataReader(train_names) #create matrices for params of HMM layer pi_values = numpy.zeros((n_hidden,)) a_values = numpy.zeros((n_hidden, n_hidden)) b_values = numpy.zeros((n_hidden, n_visible)) array_from_hidden = numpy.zeros((n_hidden,)) if (posttrain_algo == "avg_disp" or posttrain_algo == "filter+avg_disp"): n_visible *= 10 for train_patient in xrange(n_train_patients): train_set_x, train_set_y = train_reader.read_next_doc( algo = read_algo, window = read_window, rank = read_rank ) train_set_x = train_set_x.get_value() train_set_y = train_set_y.eval() n_train_times = train_set_x.shape[0] - window_size + 1 train_visible_after_sda = numpy.array( [sda.get_da_output( train_set_x[time: time + window_size] ).ravel() for time in xrange(n_train_times)] ) new_train_visible = create_labels_after_das( da_output_matrix = train_visible_after_sda, algo = posttrain_algo, rank = posttrain_rank, window = posttrain_window ) n_patient_samples = len(new_train_visible) half_window_size = int(window_size/2) new_train_hidden=train_set_y[half_window_size:n_patient_samples+half_window_size] pi_values, a_values, b_values, array_from_hidden = update_params_on_patient( pi_values=pi_values, a_values=a_values, b_values=b_values, array_from_hidden=array_from_hidden, hiddens_patient=new_train_hidden, visibles_patient=new_train_visible, n_hidden=n_hidden ) gc.collect() pi_values, a_values, b_values = finish_training( pi_values=pi_values, a_values=a_values, b_values=b_values, array_from_hidden=array_from_hidden, n_hidden=n_hidden, n_patients=n_train_patients ) hmm_model = hmm.MultinomialHMM( n_components=n_hidden, startprob=pi_values, transmat=a_values ) hmm_model.n_symbols=n_visible hmm_model.emissionprob_=b_values gc.collect() print('MultinomialHMM created') sda.set_hmm2( hmm2 = hmm_model ) return sda
def finetune_hmm1(sda, n_components, n_hmms, train_names, valid_names, global_epochs, read_rank, read_window, read_algo, window_size, posttrain_algo, posttrain_rank, posttrain_window, output_folder): # set hmm1 layer on sda sda.set_hmm1( hmm1 = GeneralHMM( n_components = n_components, n_hmms = n_hmms ) ) for epoch in xrange(global_epochs): train_reader = ICHISeqDataReader(train_names) n_train_patients = len(train_names) #train hmms on data of each patient for train_patient in xrange(n_train_patients): #get data divided on sequences with respect to labels train_set = train_reader.read_next_doc( algo = read_algo, rank = read_rank, window = read_window, divide = True ) for label in xrange(n_hmms): train_for_label = train_set[label].eval() if train_for_label != []: n_train_times = train_for_label.shape[0] - window_size + 1 train_after_sda = numpy.array( [sda.get_da_output( train_for_label[time: time + window_size] ).ravel() for time in xrange(n_train_times)] ) if train_after_sda.shape[0] > sda.hmm1.hmm_models[label].n_components: sda.hmm1.hmm_models[label].fit( train_after_sda.reshape((-1, 1)) ) sda.hmm1.isFitted[label] = True error_cur_epoch = validate_model( sda = sda, valid_names = valid_names, read_window = read_window, read_algo = read_algo, read_rank = read_rank, window_size = window_size ) sda.hmm1.valid_error_array.append([]) sda.hmm1.valid_error_array[-1].append( epoch*n_train_patients + train_patient ) sda.hmm1.valid_error_array[-1].append(error_cur_epoch) gc.collect() visualize_validating( valid_error=sda.hmm1.valid_error_array, window_size=window_size, datasets_folder=output_folder, base_folder='finetune_hmm1' ) return sda
def train_logistic_cg(read_algo, read_window, read_rank, train_names, valid_names, window_size, n_epochs, classifier): # read the datasets train_reader = ICHISeqDataReader(train_names) valid_reader = ICHISeqDataReader(valid_names) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # generate symbolic variables for input x = classifier.x # data, presented as window with x, y, x for each sample y = T.iscalar('y') # labels, presented as int label # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) for pat_num in xrange(len(train_names)): # go through the training set train_set_x, train_set_y = train_reader.read_next_doc( algo=read_algo, window=read_window, rank=read_rank) valid_set_x, valid_set_y = valid_reader.read_next_doc( algo=read_algo, window=read_window, rank=read_rank) n_train_samples = train_set_x.get_value( borrow=True).shape[0] - window_size + 1 n_valid_samples = valid_set_x.get_value( borrow=True).shape[0] - window_size + 1 validate_model = theano.function( [index], classifier.errors(y), givens={ x: valid_set_x[index:index + window_size], y: valid_set_y[index + window_size - 1] }, name="validate") # compile a theano function that returns the cost conj_cost = theano.function( inputs=[index], outputs=[cost, classifier.errors(y), classifier.predict(), y], givens={ x: train_set_x[index:index + window_size], y: train_set_y[index + window_size - 1] }, name="conj_cost") # compile a theano function that returns the gradient with respect to theta conj_grad = theano.function( [index], T.grad(cost, classifier.theta), givens={ x: train_set_x[index:index + window_size], y: train_set_y[index + window_size - 1] }, name="conj_grad") train_confusion_matrix = numpy.zeros((7, 7)) # creates a function that computes the average cost on the training set def train_fn(theta_value): classifier.theta.set_value(theta_value, borrow=True) cur_train_cost = [] cur_train_error = [] for i in xrange(n_train_samples): sample_cost, sample_error, cur_pred, cur_actual = conj_cost(i) cur_train_cost.append(sample_cost) cur_train_error.append(sample_error) train_confusion_matrix[cur_actual][cur_pred] += 1 this_train_loss = float(numpy.mean(cur_train_cost)) classifier.train_cost_array.append([]) classifier.train_cost_array[-1].append(classifier.epoch) classifier.train_cost_array[-1].append(this_train_loss) classifier.train_error_array.append([]) classifier.train_error_array[-1].append(classifier.epoch) classifier.train_error_array[-1].append( float(numpy.mean(cur_train_error) * 100)) classifier.epoch += 1 return this_train_loss # creates a function that computes the average gradient of cost with # respect to theta def train_fn_grad(theta_value): classifier.theta.set_value(theta_value, borrow=True) grad = conj_grad(0) for i in xrange(1, n_train_samples): grad += conj_grad(i) return grad / n_train_samples # creates the validation function def callback(theta_value): classifier.theta.set_value(theta_value, borrow=True) #compute the validation loss validation_losses = [ validate_model(i) for i in xrange(n_valid_samples) ] this_validation_loss = float( numpy.mean(validation_losses) * 100., ) print('validation error %f %%' % (this_validation_loss)) classifier.valid_error_array.append([]) classifier.valid_error_array[-1].append(classifier.epoch) classifier.valid_error_array[-1].append(this_validation_loss) ############### # TRAIN MODEL # ############### # using scipy conjugate gradient optimizer print("Optimizing using scipy.optimize.fmin_cg...") best_theta = scipy.optimize.fmin_cg( f=train_fn, x0=numpy.zeros((classifier.n_in + 1) * classifier.n_out, dtype=x.dtype), fprime=train_fn_grad, callback=callback, disp=0, maxiter=n_epochs) return classifier
def test_sda( sda, test_names, read_window, read_algo, read_rank, window_size, posttrain_rank, posttrain_algo, predict_algo='viterbi'): test_reader = ICHISeqDataReader(test_names) posttrain_window = sda.da_layers_output_size index = T.lscalar('index') test_set_x = T.vector('test_set_x') test_set_y = T.ivector('test_set_y') y = T.iscalar('y') # labels, presented as int label hmm1_error_array = [] hmm2_error_array = [] log_reg_errors = [] test_log_reg = theano.function( inputs=[ index, test_set_x, test_set_y ], outputs=[sda.logLayer.errors(y), sda.logLayer.predict(), y], givens={ sda.x: test_set_x[index: index + window_size], y: test_set_y[index + window_size - 1] } ) for test_patient in test_names: test_set_x, test_set_y = test_reader.read_next_doc( algo = read_algo, window = read_window, rank = read_rank ) test_set_x = test_set_x.get_value(borrow=True) test_set_y = test_set_y.eval() n_test_times = test_set_x.shape[0] - window_size + 1 test_result = [test_log_reg( index = i, test_set_x = test_set_x, test_set_y = test_set_y) for i in xrange(n_test_times) ] test_result = numpy.asarray(test_result) test_losses = test_result[:,0] test_score = float(numpy.mean(test_losses))*100 log_reg_errors.append(test_score) test_visible_after_sda = numpy.array( [sda.get_da_output( test_set_x[time: time+window_size] ).ravel() for time in xrange(n_test_times)] ) half_window_size = int(window_size/2) test_y_after_sda = test_set_y[ half_window_size : n_test_times + half_window_size ] #compute mean error value for patients in validation set hmm1 pat_error = mean_error( gen_hmm = sda.hmm1, obs_seq = test_visible_after_sda, actual_states = test_y_after_sda ) hmm1_error_array.append(pat_error) new_test_visible = create_labels_after_das( da_output_matrix = test_visible_after_sda, algo = posttrain_algo, rank = posttrain_rank, window = posttrain_window ) n_patient_samples = len(new_test_visible) new_test_hidden = test_set_y[half_window_size:n_patient_samples+half_window_size] patient_error = get_error_on_patient( model = sda.hmm2, visible_set = new_test_visible, hidden_set = new_test_hidden, algo = predict_algo, pat = test_patient, all_labels = True ) hmm2_error_array.append(patient_error) print(patient_error, ' error (hmm) for patient ' + test_patient) print(test_score, ' error (log_reg) for patient ' + test_patient) gc.collect() return hmm1_error_array, hmm2_error_array, log_reg_errors
def train_logistic_cg( read_algo, read_window, read_rank, train_names, valid_names, window_size, n_epochs, classifier): # read the datasets train_reader = ICHISeqDataReader(train_names) valid_reader = ICHISeqDataReader(valid_names) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # generate symbolic variables for input x = classifier.x # data, presented as window with x, y, x for each sample y = T.iscalar('y') # labels, presented as int label # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) for pat_num in xrange(len(train_names)): # go through the training set train_set_x, train_set_y = train_reader.read_next_doc( algo = read_algo, window = read_window, rank = read_rank ) valid_set_x, valid_set_y = valid_reader.read_next_doc( algo = read_algo, window = read_window, rank = read_rank ) n_train_samples = train_set_x.get_value(borrow=True).shape[0] - window_size + 1 n_valid_samples = valid_set_x.get_value(borrow=True).shape[0] - window_size + 1 validate_model = theano.function( [index], classifier.errors(y), givens={ x: valid_set_x[index: index + window_size], y: valid_set_y[index + window_size - 1] }, name="validate" ) # compile a theano function that returns the cost conj_cost = theano.function( inputs=[index], outputs=[cost, classifier.errors(y), classifier.predict(), y], givens={ x: train_set_x[index: index + window_size], y: train_set_y[index + window_size - 1] }, name="conj_cost" ) # compile a theano function that returns the gradient with respect to theta conj_grad = theano.function( [index], T.grad(cost, classifier.theta), givens={ x: train_set_x[index: index + window_size], y: train_set_y[index + window_size - 1] }, name="conj_grad" ) train_confusion_matrix = numpy.zeros((7, 7)) # creates a function that computes the average cost on the training set def train_fn(theta_value): classifier.theta.set_value(theta_value, borrow=True) cur_train_cost = [] cur_train_error =[] for i in xrange(n_train_samples): sample_cost, sample_error, cur_pred, cur_actual = conj_cost(i) cur_train_cost.append(sample_cost) cur_train_error.append(sample_error) train_confusion_matrix[cur_actual][cur_pred] += 1 this_train_loss = float(numpy.mean(cur_train_cost)) classifier.train_cost_array.append([]) classifier.train_cost_array[-1].append(classifier.epoch) classifier.train_cost_array[-1].append(this_train_loss) classifier.train_error_array.append([]) classifier.train_error_array[-1].append(classifier.epoch) classifier.train_error_array[-1].append(float(numpy.mean(cur_train_error)*100)) classifier.epoch += 1 return this_train_loss # creates a function that computes the average gradient of cost with # respect to theta def train_fn_grad(theta_value): classifier.theta.set_value(theta_value, borrow=True) grad = conj_grad(0) for i in xrange(1, n_train_samples): grad += conj_grad(i) return grad / n_train_samples # creates the validation function def callback(theta_value): classifier.theta.set_value(theta_value, borrow=True) #compute the validation loss validation_losses = [validate_model(i) for i in xrange(n_valid_samples)] this_validation_loss = float(numpy.mean(validation_losses) * 100.,) print('validation error %f %%' % (this_validation_loss)) classifier.valid_error_array.append([]) classifier.valid_error_array[-1].append(classifier.epoch) classifier.valid_error_array[-1].append(this_validation_loss) ############### # TRAIN MODEL # ############### # using scipy conjugate gradient optimizer print ("Optimizing using scipy.optimize.fmin_cg...") best_theta = scipy.optimize.fmin_cg( f=train_fn, x0=numpy.zeros((classifier.n_in + 1) * classifier.n_out, dtype=x.dtype), fprime=train_fn_grad, callback=callback, disp=0, maxiter=n_epochs ) return classifier