def test_compare_structure_base(self):
        """
        The base comparison with the original testing and training data
        """
        test = self.all_struct.contents[0:543]
        train = self.all_struct.contents[543:]
        test, train = compare.compare_structure(test, train)
        val = compare.predict_accuracy(test, train)

        self.assertEqual(val, DEFAULT_VALUE)
def _randomize(nbs, percent_for_testing, threshold):
    """
    Randoimzed training and testing data on the give input
    """
    data_dict = nbs.get_training_testing(percent_for_testing)
    test_data = data_dict['test']
    train_data = data_dict['train']
    if threshold is None:
        prediction, test = nbs_comparison.compare_structure(test_data, train_data)
        return [(0, (prediction, test, _get_word_info(test_data)))]
    else:
        return _get_threshold_data(test_data, train_data, nbs, threshold)
def _eval_threshold(test_data, train_data, nbs, curr_threshold, results):
    """
    Evaluates the comparison of the data at the given threshold
    """
    start_index = _make_start_index(nbs, curr_threshold)
    curr_test_data = _remove_columns(test_data, nbs, start_index)
    curr_train_data = _remove_columns(train_data, nbs, start_index)
    prediction, test = nbs_comparison.compare_structure(curr_test_data, curr_train_data)
    curr_word_info = _get_word_info(curr_test_data)
    curr_result = (prediction, test, curr_word_info)
    #results.put((curr_threshold,curr_result))
    results.append((curr_threshold,curr_result))
def _cross_validation(nbs, chunks, threshold):
    """
    Performs cross validation on the data input
    """
    results = []
    cross_validation_chunks = nbs.get_cross_validation_chunks(chunks)
    for i in range(chunks):
        print('chunk: ' + str(i))
        test_data = cross_validation_chunks[i]
        train_data = []
        for x in range(len(cross_validation_chunks)):
            if x != i:
                train_data.extend(cross_validation_chunks[x])
        
        if threshold is None:
            prediction, test = nbs_comparison.compare_structure(test_data, train_data)
            results.append([(0, (prediction, test, _get_word_info(test_data)))])
        else:
            results.append(_get_threshold_data(test_data, train_data, nbs, threshold))

    return results