def train_and_val():
    training_data = dp.read_data('dataset/splice-Xtrain.dat',
                                 'dataset/splice-Ytrain.dat')
    training_set_indices, validation_set_indices = dp.read_training_val_set(
        'dataset/train.txt', 'dataset/val.txt')
    feature = Features()
    features_labels_pair = feature.amino_acid_count(training_data)
    training_set = []
    for index in training_set_indices:
        training_set.append(features_labels_pair[index])

    dp.remove_ambiguous_entry_plus(training_set)
    k_nn = KNN(training_set, 23)

    confusion_matrix = np.zeros([3, 3])
    correct = 0.0
    total = 0.0

    validation_set = []
    for index in validation_set_indices:
        validation_set.append(features_labels_pair[index])

    dp.remove_ambiguous_entry_plus(validation_set)
    for feature_vector, correct_class in validation_set:
        prediction = k_nn.predict_codon_cosine(feature_vector, k_nn.no_weight)
        total += 1
        if prediction == correct_class:
            correct += 1
        if prediction == 0 and correct_class == 0:
            confusion_matrix[0, 0] += 1
        if prediction == 0 and correct_class == 1:
            confusion_matrix[0, 1] += 1
        if prediction == 0 and correct_class == 2:
            confusion_matrix[0, 2] += 1
        if prediction == 1 and correct_class == 0:
            confusion_matrix[1, 0] += 1
        if prediction == 1 and correct_class == 1:
            confusion_matrix[1, 1] += 1
        if prediction == 1 and correct_class == 2:
            confusion_matrix[1, 2] += 1
        if prediction == 2 and correct_class == 0:
            confusion_matrix[2, 0] += 1
        if prediction == 2 and correct_class == 1:
            confusion_matrix[2, 1] += 1
        if prediction == 2 and correct_class == 2:
            confusion_matrix[2, 2] += 1
        #print prediction, correct_class
    print confusion_matrix
    print correct / total
def train_and_val():
    training_data = dp.read_data('dataset/splice-Xtrain.dat', 'dataset/splice-Ytrain.dat')
    training_set_indices, validation_set_indices = dp.read_training_val_set('dataset/train.txt', 'dataset/val.txt')
    feature = Features()
    features_labels_pair = feature.amino_acid_count(training_data)
    training_set = []
    for index in training_set_indices:
        training_set.append(features_labels_pair[index])
    
    dp.remove_ambiguous_entry_plus(training_set)
    k_nn = KNN(training_set, 23)
    
    confusion_matrix = np.zeros([3,3])
    correct = 0.0
    total = 0.0
    
    validation_set = []
    for index in validation_set_indices:
        validation_set.append(features_labels_pair[index])
    
    dp.remove_ambiguous_entry_plus(validation_set)
    for feature_vector, correct_class in validation_set: 
        prediction = k_nn.predict_codon_cosine(feature_vector, k_nn.no_weight)
        total += 1
        if prediction == correct_class:
            correct += 1
        if prediction == 0 and correct_class == 0:
            confusion_matrix[0,0] += 1
        if  prediction == 0 and correct_class == 1:
            confusion_matrix[0,1] += 1
        if  prediction == 0 and correct_class == 2:
            confusion_matrix[0,2] += 1
        if  prediction == 1 and correct_class == 0:
            confusion_matrix[1,0] += 1
        if  prediction == 1 and correct_class == 1:
            confusion_matrix[1,1] += 1
        if  prediction == 1 and correct_class == 2:
            confusion_matrix[1,2] += 1
        if  prediction == 2 and correct_class == 0:
            confusion_matrix[2,0] += 1
        if  prediction == 2 and correct_class == 1:
            confusion_matrix[2,1] += 1
        if  prediction == 2 and correct_class == 2:
            confusion_matrix[2,2] += 1  
        #print prediction, correct_class
    print confusion_matrix      
    print correct/total     
Example #3
0
def train_and_test():
    training_data = dp.read_data('dataset/splice-Xtrain.dat',
                                 'dataset/splice-Ytrain.dat')
    test_data = dp.read_data('dataset/test40.txt', 'dataset/ytest40.txt')
    feature = Features()
    dp.remove_ambiguous_entry_plus(training_data)
    training_set = feature.amino_acid_count(training_data)
    test_set = feature.amino_acid_count(test_data)

    k_nearest_neighbors = KNN(training_set, 26)

    confusion_matrix = np.zeros([3, 3])
    correct = 0.0
    total = 0.0

    for index in range(len(test_set)):
        feature_vector, correct_class = test_set[index]
        prediction = k_nearest_neighbors.predict_codon_cosine(
            feature_vector, k_nearest_neighbors.no_weight)
        total += 1
        if prediction == correct_class:
            correct += 1
        if prediction == 0 and correct_class == 0:
            confusion_matrix[0, 0] += 1
        if prediction == 0 and correct_class == 1:
            confusion_matrix[0, 1] += 1
        if prediction == 0 and correct_class == 2:
            confusion_matrix[0, 2] += 1
        if prediction == 1 and correct_class == 0:
            confusion_matrix[1, 0] += 1
        if prediction == 1 and correct_class == 1:
            confusion_matrix[1, 1] += 1
        if prediction == 1 and correct_class == 2:
            confusion_matrix[1, 2] += 1
        if prediction == 2 and correct_class == 0:
            confusion_matrix[2, 0] += 1
        if prediction == 2 and correct_class == 1:
            confusion_matrix[2, 1] += 1
        if prediction == 2 and correct_class == 2:
            confusion_matrix[2, 2] += 1

    print confusion_matrix
    print correct / total
Example #4
0
def train_and_test():
    training_data = dp.read_data("dataset/splice-Xtrain.dat", "dataset/splice-Ytrain.dat")
    test_data = dp.read_data("dataset/test40.txt", "dataset/ytest40.txt")
    feature = Features()
    dp.remove_ambiguous_entry_plus(training_data)
    training_set = feature.amino_acid_count(training_data)
    test_set = feature.amino_acid_count(test_data)

    k_nearest_neighbors = KNN(training_set, 26)

    confusion_matrix = np.zeros([3, 3])
    correct = 0.0
    total = 0.0

    for index in range(len(test_set)):
        feature_vector, correct_class = test_set[index]
        prediction = k_nearest_neighbors.predict_codon_cosine(feature_vector, k_nearest_neighbors.no_weight)
        total += 1
        if prediction == correct_class:
            correct += 1
        if prediction == 0 and correct_class == 0:
            confusion_matrix[0, 0] += 1
        if prediction == 0 and correct_class == 1:
            confusion_matrix[0, 1] += 1
        if prediction == 0 and correct_class == 2:
            confusion_matrix[0, 2] += 1
        if prediction == 1 and correct_class == 0:
            confusion_matrix[1, 0] += 1
        if prediction == 1 and correct_class == 1:
            confusion_matrix[1, 1] += 1
        if prediction == 1 and correct_class == 2:
            confusion_matrix[1, 2] += 1
        if prediction == 2 and correct_class == 0:
            confusion_matrix[2, 0] += 1
        if prediction == 2 and correct_class == 1:
            confusion_matrix[2, 1] += 1
        if prediction == 2 and correct_class == 2:
            confusion_matrix[2, 2] += 1

    print confusion_matrix
    print correct / total