Beispiel #1
0
def validate_model(sda,
                   valid_names,
                   read_window,
                   read_algo,
                   read_rank,
                   window_size):
                       
    valid_reader = ICHISeqDataReader(valid_names)
    valid_errors = []
    for i in xrange (len(valid_names)):
        valid_x, valid_y = valid_reader.read_next_doc(
            algo = read_algo,
            rank = read_rank,
            window = read_window,
            divide = False
        )
        valid_x = valid_x.get_value()
        valid_y = valid_y.eval()
        
        n_valid_times = valid_x.shape[0] - window_size + 1
                    
        new_valid_x = numpy.array(
            [sda.get_da_output(
                    valid_x[time: time + window_size]
                ).ravel()
            for time in xrange(n_valid_times)]
        )

        half_window_size = int(window_size/2)
        new_valid_y = valid_y[
            half_window_size: n_valid_times + half_window_size
        ]

        #compute mean error value for patients in validation set
        pat_error = mean_error(
            gen_hmm = sda.hmm1,
            obs_seq = new_valid_x,
            actual_states = new_valid_y
        )
        valid_errors.append(pat_error)
    return numpy.mean(valid_errors)
Beispiel #2
0
def test_sda(
    sda,
    test_names,
    read_window,
    read_algo,
    read_rank,
    window_size,
    posttrain_rank,
    posttrain_algo,
    predict_algo='viterbi'):

    test_reader = ICHISeqDataReader(test_names)
    posttrain_window = sda.da_layers_output_size
    
    index = T.lscalar('index')
    test_set_x = T.vector('test_set_x')
    test_set_y = T.ivector('test_set_y')
    y = T.iscalar('y')  # labels, presented as int label
    
    hmm1_error_array = []
    hmm2_error_array = []
    log_reg_errors = []
    
    test_log_reg = theano.function(
        inputs=[
            index,
            test_set_x,
            test_set_y
        ],
        outputs=[sda.logLayer.errors(y), sda.logLayer.predict(), y],
        givens={
            sda.x: test_set_x[index: index + window_size],
            y: test_set_y[index + window_size - 1]
        }
    )    
    
    for test_patient in test_names:
        test_set_x, test_set_y = test_reader.read_next_doc(
            algo = read_algo,
            window = read_window,
            rank = read_rank
        )
                        
        test_set_x = test_set_x.get_value(borrow=True)
        test_set_y = test_set_y.eval()
        
        n_test_times = test_set_x.shape[0] - window_size + 1
        
        test_result = [test_log_reg(
            index = i,
            test_set_x = test_set_x,
            test_set_y = test_set_y) for i in xrange(n_test_times)
        ]
        test_result = numpy.asarray(test_result)
        test_losses = test_result[:,0]
        test_score = float(numpy.mean(test_losses))*100
                            
        log_reg_errors.append(test_score)
                
        test_visible_after_sda = numpy.array(
            [sda.get_da_output(
                test_set_x[time: time+window_size]
            ).ravel()
            for time in xrange(n_test_times)]
        )
        
        half_window_size = int(window_size/2)
        test_y_after_sda = test_set_y[
            half_window_size : n_test_times + half_window_size
        ]
        
        #compute mean error value for patients in validation set hmm1
        pat_error = mean_error(
            gen_hmm = sda.hmm1,
            obs_seq = test_visible_after_sda,
            actual_states = test_y_after_sda
        )
        hmm1_error_array.append(pat_error)
        
                    
        new_test_visible = create_labels_after_das(
            da_output_matrix = test_visible_after_sda,
            algo = posttrain_algo,
            rank = posttrain_rank,
            window = posttrain_window
        )
        
        n_patient_samples = len(new_test_visible)
        new_test_hidden = test_set_y[half_window_size:n_patient_samples+half_window_size]
        
        patient_error = get_error_on_patient(
            model = sda.hmm2,
            visible_set = new_test_visible,
            hidden_set = new_test_hidden,
            algo = predict_algo,
            pat = test_patient,
            all_labels = True
        )
        
        hmm2_error_array.append(patient_error)
        print(patient_error, ' error (hmm) for patient ' + test_patient)
        print(test_score, ' error (log_reg) for patient ' + test_patient)
        gc.collect()
        
    return hmm1_error_array, hmm2_error_array, log_reg_errors