def validate_model(sda, valid_names, read_window, read_algo, read_rank, window_size): valid_reader = ICHISeqDataReader(valid_names) valid_errors = [] for i in xrange (len(valid_names)): valid_x, valid_y = valid_reader.read_next_doc( algo = read_algo, rank = read_rank, window = read_window, divide = False ) valid_x = valid_x.get_value() valid_y = valid_y.eval() n_valid_times = valid_x.shape[0] - window_size + 1 new_valid_x = numpy.array( [sda.get_da_output( valid_x[time: time + window_size] ).ravel() for time in xrange(n_valid_times)] ) half_window_size = int(window_size/2) new_valid_y = valid_y[ half_window_size: n_valid_times + half_window_size ] #compute mean error value for patients in validation set pat_error = mean_error( gen_hmm = sda.hmm1, obs_seq = new_valid_x, actual_states = new_valid_y ) valid_errors.append(pat_error) return numpy.mean(valid_errors)
def test_sda( sda, test_names, read_window, read_algo, read_rank, window_size, posttrain_rank, posttrain_algo, predict_algo='viterbi'): test_reader = ICHISeqDataReader(test_names) posttrain_window = sda.da_layers_output_size index = T.lscalar('index') test_set_x = T.vector('test_set_x') test_set_y = T.ivector('test_set_y') y = T.iscalar('y') # labels, presented as int label hmm1_error_array = [] hmm2_error_array = [] log_reg_errors = [] test_log_reg = theano.function( inputs=[ index, test_set_x, test_set_y ], outputs=[sda.logLayer.errors(y), sda.logLayer.predict(), y], givens={ sda.x: test_set_x[index: index + window_size], y: test_set_y[index + window_size - 1] } ) for test_patient in test_names: test_set_x, test_set_y = test_reader.read_next_doc( algo = read_algo, window = read_window, rank = read_rank ) test_set_x = test_set_x.get_value(borrow=True) test_set_y = test_set_y.eval() n_test_times = test_set_x.shape[0] - window_size + 1 test_result = [test_log_reg( index = i, test_set_x = test_set_x, test_set_y = test_set_y) for i in xrange(n_test_times) ] test_result = numpy.asarray(test_result) test_losses = test_result[:,0] test_score = float(numpy.mean(test_losses))*100 log_reg_errors.append(test_score) test_visible_after_sda = numpy.array( [sda.get_da_output( test_set_x[time: time+window_size] ).ravel() for time in xrange(n_test_times)] ) half_window_size = int(window_size/2) test_y_after_sda = test_set_y[ half_window_size : n_test_times + half_window_size ] #compute mean error value for patients in validation set hmm1 pat_error = mean_error( gen_hmm = sda.hmm1, obs_seq = test_visible_after_sda, actual_states = test_y_after_sda ) hmm1_error_array.append(pat_error) new_test_visible = create_labels_after_das( da_output_matrix = test_visible_after_sda, algo = posttrain_algo, rank = posttrain_rank, window = posttrain_window ) n_patient_samples = len(new_test_visible) new_test_hidden = test_set_y[half_window_size:n_patient_samples+half_window_size] patient_error = get_error_on_patient( model = sda.hmm2, visible_set = new_test_visible, hidden_set = new_test_hidden, algo = predict_algo, pat = test_patient, all_labels = True ) hmm2_error_array.append(patient_error) print(patient_error, ' error (hmm) for patient ' + test_patient) print(test_score, ' error (log_reg) for patient ' + test_patient) gc.collect() return hmm1_error_array, hmm2_error_array, log_reg_errors