Exemple #1
0
def knn(train, test, k=1, randFlag=False):
    """
    Return a list of comparison scores computed between each loop in test and its 
    'nearest neighbor' (highest scoring model) from the models in train.
    @author: Travis Peters
    """
    
    # Given a test instance, find its k-nearest neighbors in the training set
    knn_list = []
    for test_instance in test:
        if not randFlag:
            knn_list.append( (test_instance, neighbors(test_instance, train, k)) )
        else:
            knn_list.append( (test_instance, rand_neighbors(test_instance, train, k)) )

    # Compare each test instance and its best matching model...
    test_results = []
    for t in knn_list:
        # Un-pack test/best matching loop
        testLoop = t[0]
        bestMatchLoop = t[1][0]
        
        # Compute comparison
        testLoopModel = Model.fromLoop(testLoop)
        cmp_score = testLoopModel.compare(bestMatchLoop, max_rmsd=-1)        

        # Record (1) best matching model, and (2) comparison score
        test_results.append( (t[1], cmp_score) )

    # Tuple of ( best_model, compare_score)
    return test_results
Exemple #2
0
def compute_score_naive(bin_clusters, first_only=True):
    ## Naive testing - check if a loop gets placed into it's model
    total_model_score = 0
    total_structure_score = 0
    total_partial_structure_score = [[0,0],[0,0]]
    total_clusters = len(bin_clusters)
    for bc in bin_clusters:
        bin_data, models = bc
        
        #Find loops
        loop_set = []
        for model in models:
            temp_loops = []
            model.get_loops(temp_loops)
            if(len(temp_loops) > 2): #reject exact and close to exact matches, why test what we know is going to hit 100%?
                loop_set += temp_loops
        
        #Quit if we didn't find any suitable loops
        if(len(loop_set) == 0):
            total_clusters -= 1
            continue
        
        cluster_model_score = 0
        cluster_structure_score = 0
        cluster_partial_structure_score = [[0,0],[0,0]]
        for loop in loop_set:
            scores = classify_loop_seq(loop.seq, models, blosum62)
            scores = sorted(scores, key=lambda x:-x[1])
            model = Model.fromLoop(loop)
            
            #compute model score
            if not first_only:
                structure_score = 0.0
                tries = 0 #start at 1 so no div by zero stuff
                
                
                structure_score = scores[0][0].compare(model, max_rmsd=-1, verbose=False)
                
                #Iterate until we find the match
                for score in scores:
                    temp_loop_set = []
                    score[0].get_loops(temp_loop_set)
                    if loop in temp_loop_set: #is match
                        break 
                    tries+=1
                 
                #Higher score is better!
                model_score = (len(scores) - tries) / (len(scores) + 0.0)
            else:
                #Only search first result
                temp_loop_set = []
                scores[0][0].get_loops(temp_loop_set)
                model_score = 0.0
                structure_score = scores[0][0].compare(model, max_rmsd=-1, verbose=False)
                if loop in temp_loop_set:
                    model_score = 1.0
                    cluster_partial_structure_score[0][0] += structure_score
                    cluster_partial_structure_score[0][1] += 1
                else:
                    cluster_partial_structure_score[1][0] += structure_score
                    cluster_partial_structure_score[1][1] += 1
            
            cluster_model_score += model_score
            cluster_structure_score += structure_score
            
        cluster_model_score /= len(loop_set)
        cluster_structure_score /= len(loop_set)
        
        try:
            cluster_partial_structure_score[0] = cluster_partial_structure_score[0][0] / cluster_partial_structure_score[0][1]
            
            total_partial_structure_score[0][0] += cluster_partial_structure_score[0]
            total_partial_structure_score[0][1] += 1
        except:
            pass
        
        try:    
            cluster_partial_structure_score[1] = cluster_partial_structure_score[1][0] / cluster_partial_structure_score[1][1]
            
            total_partial_structure_score[1][0] += cluster_partial_structure_score[1]
            total_partial_structure_score[1][1] += 1
        except:
            pass
            
        total_model_score += cluster_model_score
        total_structure_score += cluster_structure_score

    if(total_clusters != 0):
        total_model_score /= total_clusters
        total_structure_score /= total_clusters
        try:
            ps1 = total_partial_structure_score[0][0] / total_partial_structure_score[0][1]
        except:
            ps1 = float("nan")
        try:
            ps2 = total_partial_structure_score[1][0] / total_partial_structure_score[1][1]
        except:
            ps2 = float("nan")
        print("Total score: (%f, %f (%f, %f))" % (total_model_score, total_structure_score, ps1, ps2))
    else:
        print("Insufficient data to compute score")