Esempio n. 1
0
    def bestInfo(self, id1, id2, alignment, coevolution):
        "Points out the best coevolution scores"

        seq = class_sequence(self.file1, self.file2, self.id1, self.id2,
                             self.chain1, self.chain2, self.parameterfile,
                             self.dirname)

        histogram = LP(self.parameterfile, "results_histogram")
        heatmap = LP(self.parameterfile, "results_heatmap")
        best_info = LP(self.parameterfile, "best_results")

        surface1 = []
        surface2 = []
        interface = []
        try:
            surface1 = seq.parseSurfacePDB(id1)
            surface2 = seq.parseSurfacePDB(id2)
        except:
            pass

        try:
            interface = seq.parseInterfacePDB(id1)
        except:
            pass

        input = self.dirname + alignment + "_" + coevolution + ".txt"
        output = self.dirname + alignment + "_" + coevolution + "_best.txt"
        bestResults(input, output, best_info, surface1, surface2, interface)

        if histogram == True:
            input = self.dirname + alignment + "_" + coevolution + ".txt"
            output = self.dirname + alignment + "_" + coevolution + "_hg.png"
            drawHistogram(input, output)

        if heatmap == True:
            input = self.dirname + alignment + "_" + coevolution + ".txt"
            output = self.dirname + alignment + "_" + coevolution + "_hm.png"
            drawHeatmap(id1, id2, input, output)
Esempio n. 2
0
 def bestInfo(self, id1, id2, alignment, coevolution):
     "Points out the best coevolution scores"
     
     seq = class_sequence(self.file1, self.file2, self.id1, self.id2,
                    self.chain1, self.chain2, self.parameterfile, 
                    self.dirname)
     
     histogram = LP(self.parameterfile, "results_histogram")
     heatmap = LP(self.parameterfile, "results_heatmap")
     best_info = LP(self.parameterfile, "best_results")
     
     surface1 = []
     surface2 = []
     interface = []
     try:
         surface1 = seq.parseSurfacePDB(id1)
         surface2 = seq.parseSurfacePDB(id2)
     except:
         pass
     
     try:
         interface = seq.parseInterfacePDB(id1)
     except:
         pass
     
     input = self.dirname + alignment + "_" + coevolution + ".txt"
     output = self.dirname + alignment + "_" + coevolution + "_best.txt"
     bestResults(input, output, best_info, surface1, surface2, interface)
     
     if histogram == True:
         input = self.dirname + alignment + "_" + coevolution + ".txt"
         output = self.dirname + alignment + "_" + coevolution + "_hg.png"
         drawHistogram(input, output)
         
     if heatmap == True:
         input = self.dirname + alignment + "_" + coevolution + ".txt"
         output = self.dirname + alignment + "_" + coevolution + "_hm.png"
         drawHeatmap(id1, id2, input, output)
Esempio n. 3
0
    def coevolAnalysis(self, file1, file2, id1, id2, chain1, chain2, alignment,
                       coevolution):
        "Returns a matrix of coevolution scores"

        seq = class_sequence(self.file1, self.file2, self.id1, self.id2,
                             self.chain1, self.chain2, self.parameterfile,
                             self.dirname)
        aln = class_alignment(self.id1, self.id2, self.alignment,
                              self.parameterfile, self.dirname)

        alignment1 = aln.cutAlignment(file1, id1, alignment)
        alignment2 = aln.cutAlignment(file2, id2, alignment)

        try:
            assert len(alignment1) == len(alignment2)
        except:
            raise StandardError, "Alignments must have the same number of sequences"

        protein1 = []
        protein2 = []
        try:
            protein1 = seq.matchResiduePosition(id1, chain1)
            protein2 = seq.matchResiduePosition(id2, chain2)
        except:
            pass

        info = dict()
        alignment1 = [e for e in alignment1]
        columns1 = transpose(alignment1)

        alignment2 = [e for e in alignment2]
        columns2 = transpose(alignment2)

        if coevolution == "mi":
            Flash('Mutual Information')
            mi = dict()
            pD1 = probabilityDict(columns1)
            pD2 = probabilityDict(columns2)

            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    mi[(i, j)] = mutualInformation(i, j, columns1, columns2,
                                                   pD1, pD2)

            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(mi[(i, j)])
            max_val = max(max_pos)

            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if mi[(i, j)] != 0.0:
                        info[(i, j)] = mi[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0

        elif coevolution == "mie":
            Flash('Mutual Information by Pair Entropy')
            mie = dict()
            pD1 = probabilityDict(columns1)
            pD2 = probabilityDict(columns2)

            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    mie[(i, j)] = miEntropy(i, j, columns1, columns2, pD1, pD2)

            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(mie[(i, j)])
            max_val = max(max_pos)

            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if mie[(i, j)] != 0.0:
                        info[(i, j)] = mie[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0

        elif coevolution == "rcwmi":
            Flash('Row and Column Weighed Mutual Information')
            rcwmi = dict()
            pD1 = probabilityDict(columns1)
            pD2 = probabilityDict(columns2)

            i_all = dict()
            all_j = dict()
            for i in range(len(columns1)):
                v_i = 0
                for j in range(len(columns2)):
                    v_i += mutualInformation(i, j, columns1, columns2, pD1,
                                             pD2)
                    i_all[i] = v_i

            for j in range(len(columns2)):
                v_j = 0
                for i in range(len(columns1)):
                    v_j += mutualInformation(i, j, columns1, columns2, pD1,
                                             pD2)
                    all_j[j] = v_j

            column = columns1[0]
            n = len(column)
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    mi = mutualInformation(i, j, columns1, columns2, pD1, pD2)
                    rcwmi[(i, j)] = rowColumnWeighed(mi, i_all[i], all_j[j], n)
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(rcwmi[(i, j)])
            max_val = max(max_pos)

            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if rcwmi[(i, j)] != 0.0:
                        info[(i, j)] = rcwmi[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0

        elif coevolution == "cpvn":
            Flash('Contact Preferences, Volume Normalized')
            cpvn = dict()
            score_matrix = mapMatrix("CPVN")
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    res1 = str(alignment1[0][i])
                    res2 = str(alignment2[0][j])
                    average = []
                    for a, b in zip(columns1[i], columns2[j]):
                        if a in aa and b in aa:
                            average.append(
                                float(matchScore(res1, res2, score_matrix)))
                    cpvn[(i, j)] = mean(average)

            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(cpvn[(i, j)])
            max_val = max(max_pos)

            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if cpvn[(i, j)] != 0.0:
                        info[(i, j)] = cpvn[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0

        elif coevolution == "clm":
            Flash('Contact PDB-derived Likelihood Matrix')
            clm = dict()
            score_matrix = mapMatrix("CLM")
            for i in range(len(alignment1[0])):
                Flash('Column ' + str(i))
                for j in range(len(alignment2[0])):
                    res1 = str(alignment1[0][i])
                    res2 = str(alignment2[0][j])
                    average = []
                    for a, b in zip(columns1[i], columns2[j]):
                        if a in aa and b in aa:
                            average.append(
                                float(matchScore(res1, res2, score_matrix)))
                    clm[(i, j)] = mean(average)

            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(clm[(i, j)])
            max_val = max(max_pos)

            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if clm[(i, j)] != 0.0:
                        info[(i, j)] = clm[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0

        elif coevolution == "vol":
            Flash('Residue-residue Volume Normalized')
            vol = dict()
            score_matrix = mapMatrix("VOL")
            for i in range(len(alignment1[0])):
                Flash('Column ' + str(i))
                for j in range(len(alignment2[0])):
                    res1 = str(alignment1[0][i])
                    res2 = str(alignment2[0][j])
                    average = []
                    for a, b in zip(columns1[i], columns2[j]):
                        if a in aa and b in aa:
                            average.append(
                                float(matchScore(res1, res2, score_matrix)))
                    vol[(i, j)] = mean(average)

            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(vol[(i, j)])
            max_val = max(max_pos)

            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if vol[(i, j)] != 0.0:
                        info[(i, j)] = vol[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0

        elif coevolution == "omes":
            Flash('Observed Minus Expected Squared')
            omes = dict()
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    omes[(i, j)] = covarianceOMES(columns1[i], columns2[j])

            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(omes[(i, j)])
            max_val = max(max_pos)

            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if omes[(i, j)] != 0.0:
                        info[(i, j)] = omes[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0

        elif coevolution == "pearson":
            Flash("Pearson's correlation")
            pearson = dict()
            score_matrix = mapMatrix("MCLACHLAN")
            N = len(columns1[0])
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix)
                    d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix)
                    pearson[(i,
                             j)] = pearsonsCorrelation(d_matrix1, d_matrix2, N)

            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(pearson[(i, j)])
            max_val = max(max_pos)

            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if pearson[(i, j)] != 0.0:
                        info[(i, j)] = pearson[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0

        elif coevolution == "spearman":
            Flash("Spearman's rank correlation")
            score_matrix = mapMatrix("MCLACHLAN")
            spearman = dict()
            N = len(columns1[0])
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix)
                    d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix)
                    spearman[(i, j)] = spearmansCorrelation(
                        d_matrix1, d_matrix2, N)

            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(spearman[(i, j)])
            max_val = max(max_pos)

            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if spearman[(i, j)] != 0.0:
                        info[(i, j)] = spearman[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0

        elif coevolution == "mcbasc":
            Flash('McLachlan Based Substitution Correlation')
            mcbasc = dict()
            score_matrix = mapMatrix("MCLACHLAN")
            N = len(columns1[0])
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix)
                    d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix)
                    mcbasc[(i, j)] = mcbascCorrelation(d_matrix1, d_matrix2, N)

            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(mcbasc[(i, j)])
            max_val = max(max_pos)

            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if mcbasc[(i, j)] != 0.0:
                        info[(i, j)] = mcbasc[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0

        elif coevolution == "quartets":
            Flash('Quartets')
            quartets = dict()
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    quartets[(i, j)] = quartetsCorrelation(
                        columns1[i], columns2[j])

            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(quartets[(i, j)])
            max_val = max(max_pos)

            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if quartets[(i, j)] != 0.0:
                        info[(i, j)] = quartets[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0

        elif coevolution == "sca":
            Flash('Statistical Coupling Analysis')
            sca = dict()
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    sca[(i, j)] = perturbationSCA(columns1[i], columns2[j], \
                                                  j, columns2)
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(sca[(i, j)])
            max_val = max(max_pos)

            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if sca[(i, j)] != 0.0:
                        info[(i, j)] = sca[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0

        elif coevolution == "elsc":
            Flash('Explicit Likelihood of Subset Covariation')
            elsc = dict()
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    elsc[(i, j)] = perturbationELSC(columns1[i], columns2[j], \
                                                   j, columns2)
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(elsc[(i, j)])
            max_val = max(max_pos)

            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if elsc[(i, j)] != 0.0:
                        info[(i, j)] = elsc[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0
        else:
            pass

        output = self.dirname + alignment + "_" + coevolution + ".txt"
        results = open(output, "w")
        for i, j in sorted(info.keys()):
            if protein1 != [] and protein2 != []:
                print >> results, protein1[i], protein2[j], \
                round((info[(i, j)]), 4)
            elif protein1 != [] and protein2 == []:
                print >> results, protein1[i], protein1[j], \
                round((info[(i, j)]), 4)
            else:
                print >> results, str(i + 1), str(j + 1), \
                round((info[(i, j)]), 4)
        results.close()
Esempio n. 4
0
    def coevolAnalysis(self, file1, file2, id1, id2,
                       chain1, chain2, alignment, coevolution):
        "Returns a matrix of coevolution scores"
        
        seq = class_sequence(self.file1, self.file2, self.id1, self.id2,
                       self.chain1, self.chain2, self.parameterfile,
                       self.dirname)
        aln = class_alignment(self.id1, self.id2, self.alignment,
                              self.parameterfile, self.dirname)
        
        alignment1 = aln.cutAlignment(file1, id1, alignment)
        alignment2 = aln.cutAlignment(file2, id2, alignment)

        try:
            assert len(alignment1) == len(alignment2)
        except:
            raise StandardError, "Alignments must have the same number of sequences"
            
        protein1 = []
        protein2 = []
        try:
            protein1 = seq.matchResiduePosition(id1, chain1)
            protein2 = seq.matchResiduePosition(id2, chain2)
        except:
            pass

        info = dict()
        alignment1 = [e for e in alignment1]
        columns1 = transpose(alignment1)

        alignment2 = [e for e in alignment2]
        columns2 = transpose(alignment2)
            
        if coevolution == "mi":
            Flash('Mutual Information')
            mi = dict()
            pD1 = probabilityDict(columns1)
            pD2 = probabilityDict(columns2)
         
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    mi[(i, j)] = mutualInformation(i, j, columns1, columns2, pD1, pD2)
            
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(mi[(i, j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if mi[(i, j)] != 0.0:
                        info[(i, j)] = mi[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0
        
        elif coevolution == "mie":
            Flash('Mutual Information by Pair Entropy')
            mie = dict()
            pD1 = probabilityDict(columns1)
            pD2 = probabilityDict(columns2)
         
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    mie[(i, j)] = miEntropy(i, j, columns1, columns2, pD1, pD2)
            
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(mie[(i, j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if mie[(i, j)] != 0.0:
                        info[(i, j)] = mie[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0
                    
        elif coevolution == "rcwmi":
            Flash('Row and Column Weighed Mutual Information')
            rcwmi = dict()
            pD1 = probabilityDict(columns1)
            pD2 = probabilityDict(columns2)
         
            i_all = dict()
            all_j = dict()
            for i in range(len(columns1)):
                v_i = 0
                for j in range(len(columns2)):
                    v_i += mutualInformation(i, j, columns1, columns2,
                                             pD1, pD2)
                    i_all[i] = v_i

            for j in range(len(columns2)):
                v_j = 0
                for i in range(len(columns1)):
                    v_j += mutualInformation(i, j, columns1, columns2,
                                             pD1, pD2)
                    all_j[j] = v_j
            
            column = columns1[0]
            n = len(column)
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    mi = mutualInformation(i, j, columns1, columns2,
                                           pD1, pD2)    
                    rcwmi[(i, j)] = rowColumnWeighed(mi,
                                                   i_all[i], all_j[j], n)
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(rcwmi[(i, j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if rcwmi[(i, j)] != 0.0:
                        info[(i, j)] = rcwmi[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0
                    
        elif coevolution == "cpvn":
            Flash('Contact Preferences, Volume Normalized')
            cpvn = dict()
            score_matrix = mapMatrix("CPVN")
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    res1 = str(alignment1[0][i])
                    res2 = str(alignment2[0][j])
                    average = []
                    for a, b in zip(columns1[i], columns2[j]):
                        if a in aa and b in aa:
                            average.append(float(matchScore(res1, res2, score_matrix)))
                    cpvn[(i, j)] = mean(average)
            
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(cpvn[(i, j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if cpvn[(i, j)] != 0.0:
                        info[(i, j)] = cpvn[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0

        elif coevolution == "clm":
            Flash('Contact PDB-derived Likelihood Matrix')
            clm = dict()
            score_matrix = mapMatrix("CLM")
            for i in range(len(alignment1[0])):
                Flash('Column ' + str(i))
                for j in range(len(alignment2[0])):
                    res1 = str(alignment1[0][i])
                    res2 = str(alignment2[0][j])
                    average = []
                    for a, b in zip(columns1[i], columns2[j]):
                        if a in aa and b in aa:
                            average.append(float(matchScore(res1, res2, score_matrix)))
                    clm[(i, j)] = mean(average)
            
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(clm[(i, j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if clm[(i, j)] != 0.0:
                        info[(i, j)] = clm[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0
                    
        elif coevolution == "vol":
            Flash('Residue-residue Volume Normalized')
            vol = dict()
            score_matrix = mapMatrix("VOL")
            for i in range(len(alignment1[0])):
                Flash('Column ' + str(i))
                for j in range(len(alignment2[0])):
                    res1 = str(alignment1[0][i])
                    res2 = str(alignment2[0][j])
                    average = []
                    for a, b in zip(columns1[i], columns2[j]):
                        if a in aa and b in aa:
                            average.append(float(matchScore(res1, res2, score_matrix)))
                    vol[(i, j)] = mean(average)
            
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(vol[(i, j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if vol[(i, j)] != 0.0:
                        info[(i, j)] = vol[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0
                    
        elif coevolution == "omes":
            Flash('Observed Minus Expected Squared')
            omes = dict()
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    omes[(i, j)] = covarianceOMES(columns1[i], columns2[j])
                    
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(omes[(i, j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if omes[(i, j)] != 0.0:
                        info[(i, j)] = omes[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0
                    
        elif coevolution == "pearson":
            Flash("Pearson's correlation")
            pearson = dict()
            score_matrix = mapMatrix("MCLACHLAN")
            N = len(columns1[0])
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix)
                    d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix)
                    pearson[(i, j)] = pearsonsCorrelation(d_matrix1, d_matrix2, N)
                    
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(pearson[(i, j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if pearson[(i, j)] != 0.0:
                        info[(i, j)] = pearson[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0
                    
        elif coevolution == "spearman":
            Flash("Spearman's rank correlation")
            score_matrix = mapMatrix("MCLACHLAN")
            spearman = dict()
            N = len(columns1[0])
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix)
                    d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix)
                    spearman[(i, j)] = spearmansCorrelation(d_matrix1, d_matrix2, N)
            
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(spearman[(i, j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if spearman[(i, j)] != 0.0:
                        info[(i, j)] = spearman[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0
                    
        elif coevolution == "mcbasc":
            Flash('McLachlan Based Substitution Correlation')
            mcbasc = dict()
            score_matrix = mapMatrix("MCLACHLAN")
            N = len(columns1[0])
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix)
                    d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix)
                    mcbasc[(i, j)] = mcbascCorrelation(d_matrix1, d_matrix2, N)
            
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(mcbasc[(i, j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if mcbasc[(i, j)] != 0.0:
                        info[(i, j)] = mcbasc[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0 
        
        elif coevolution == "quartets":
            Flash('Quartets')
            quartets = dict()
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    quartets[(i, j)] = quartetsCorrelation(columns1[i], columns2[j])
            
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(quartets[(i, j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if quartets[(i, j)] != 0.0:
                        info[(i, j)] = quartets[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0
                        
        elif coevolution == "sca":
            Flash('Statistical Coupling Analysis')
            sca = dict()   
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    sca[(i, j)] = perturbationSCA(columns1[i], columns2[j], \
                                                  j, columns2)
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(sca[(i, j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if sca[(i, j)] != 0.0:
                        info[(i, j)] = sca[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0
                    
        elif coevolution == "elsc":
            Flash('Explicit Likelihood of Subset Covariation') 
            elsc = dict()  
            for i in range(len(columns1)):
                Flash('Column ' + str(i))
                for j in range(len(columns2)):
                    elsc[(i, j)] = perturbationELSC(columns1[i], columns2[j], \
                                                   j, columns2)       
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(elsc[(i, j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if elsc[(i, j)] != 0.0:
                        info[(i, j)] = elsc[(i, j)] * 1.0 / max_val
                    else:
                        info[(i, j)] = 0.0               
        else: pass
        
        output = self.dirname + alignment + "_" + coevolution + ".txt"
        results = open(output, "w")
        for i, j in sorted(info.keys()):
            if protein1 != [] and protein2 != []:
                print >> results, protein1[i], protein2[j], \
                round((info[(i, j)]), 4)
            elif protein1 != [] and protein2 == []:
                print >> results, protein1[i], protein1[j], \
                round((info[(i, j)]), 4)
            else:
                print >> results, str(i + 1), str(j + 1), \
                round((info[(i, j)]), 4)
        results.close()
Esempio n. 5
0
    def coevolAnalysis(self, file1, file2, id1, id2, 
                       chain1, chain2, alignment, coevolution):
        "Returns a matrix of coevolution scores"
        
        seq = class_sequence(self.file1, self.file2, self.id1, self.id2, 
                       self.chain1, self.chain2)
        aln = class_alignment(self.id1, self.id2, self.alignment)
        
        alignment1 = aln.cutAlignment(file1, id1, alignment)
        alignment2 = aln.cutAlignment(file2, id2, alignment)
        
        try:
            assert len(alignment1) == len(alignment2)
        except:
            raise StandardError, "Alignments must have the same number of sequences"
            
        protein1 = []
        protein2 = []
        try:
            protein1 = seq.matchResiduePosition(id1, chain1)
            protein2 = seq.matchResiduePosition(id2, chain2)
        except:
            pass

        info = dict()
        alignment1 = [e for e in alignment1]
        columns1 = transpose(alignment1)

        alignment2 = [e for e in alignment2]
        columns2 = transpose(alignment2)
            
        if coevolution == "mi":
            pD1 = probabilityDict(columns1)
            pD2 = probabilityDict(columns2)
         
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    info[(i,j)] = mutualInformation(i, j, columns1, columns2, pD1, pD2)
        
        elif coevolution == "mie":
            pD1 = probabilityDict(columns1)
            pD2 = probabilityDict(columns2)
         
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    info[(i,j)] = miEntropy(i, j, columns1, columns2, pD1, pD2)
                    
        elif coevolution == "rcwmi":
            pD1 = probabilityDict(columns1)
            pD2 = probabilityDict(columns2)
         
            i_all = dict()
            all_j = dict()
            for i in range(len(columns1)):
                v_i = 0
                for j in range(len(columns2)):
                    v_i += mutualInformation(i, j, columns1, columns2, 
                                             pD1, pD2)
                    i_all[i]= v_i

            for j in range(len(columns2)):
                v_j = 0
                for i in range(len(columns1)):
                    v_j += mutualInformation(i, j, columns1, columns2, 
                                             pD1, pD2)
                    all_j[j]= v_j
            
            column = columns1[0]
            n = len(column)
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    mi = mutualInformation(i, j, columns1, columns2, 
                                           pD1, pD2)    
                    info[(i,j)] = rowColumnWeighed(mi, 
                                                   i_all[i], all_j[j], n)
        
        elif coevolution == "cpvnmie":
            pD1 = probabilityDict(columns1)
            pD2 = probabilityDict(columns2)
            
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    res1 = str(alignment1[0][i])
                    res2 = str(alignment2[0][j])
                    mie = miEntropy(i, j, columns1, columns2, pD1, pD2)
                    info[(i,j)] = contactPreferenceMI(mie, res1, res2)
                    
        elif coevolution == "cpvn":
            score_matrix = mapMatrix("CPVN")
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    res1 = str(alignment1[0][i])
                    res2 = str(alignment2[0][j])
                    average = []
                    for a,b in zip(columns1[i],columns2[j]):
                        if a in aa and b in aa:
                            average.append(float(matchScore(res1, res2, score_matrix)))
                    info[(i,j)] = mean(average)

        elif coevolution == "clm":
            score_matrix = mapMatrix("CLM")
            for i in range(len(alignment1[0])):
                for j in range(len(alignment2[0])):
                    res1 = str(alignment1[0][i])
                    res2 = str(alignment2[0][j])
                    average = []
                    for a,b in zip(columns1[i],columns2[j]):
                        if a in aa and b in aa:
                            average.append(float(matchScore(res1, res2, score_matrix)))
                    info[(i,j)] = mean(average)
                    
        elif coevolution == "vol":
            score_matrix = mapMatrix("VOL")
            for i in range(len(alignment1[0])):
                for j in range(len(alignment2[0])):
                    res1 = str(alignment1[0][i])
                    res2 = str(alignment2[0][j])
                    average = []
                    for a,b in zip(columns1[i],columns2[j]):
                        if a in aa and b in aa:
                            average.append(float(matchScore(res1, res2, score_matrix)))
                    info[(i,j)] = mean(average)
                    
        elif coevolution == "omes":
            
            omes = dict()
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    omes[(i,j)] = covarianceOMES(columns1[i],columns2[j])
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(omes[(i,j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if omes[(i,j)] != 0.0:
                        info[(i,j)] = omes[(i,j)] * 1.0 / max_val
                    else:
                        info[(i,j)] = 0.0
                    
        elif coevolution == "pearson":
            
            score_matrix = mapMatrix("MCLACHLAN")
            N = len(columns1[0])
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix)
                    d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix)
                    info[(i,j)] = pearsonsCorrelation(d_matrix1, d_matrix2, N)
                    
        elif coevolution == "spearman":
            
            score_matrix = mapMatrix("MCLACHLAN")
            spearman = dict()
            N = len(columns1[0])
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix)
                    d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix)
                    spearman[(i,j)] = spearmansCorrelation(d_matrix1, d_matrix2, N)
            
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(spearman[(i,j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if spearman[(i,j)] != 0.0:
                        info[(i,j)] = spearman[(i,j)] * 1.0 / max_val
                    else:
                        info[(i,j)] = 0.0
                    
        elif coevolution == "mcbasc":
            
            score_matrix = mapMatrix("MCLACHLAN")
            N = len(columns1[0])
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    d_matrix1 = twoDimensionalMatrix(columns1[i], score_matrix)
                    d_matrix2 = twoDimensionalMatrix(columns2[j], score_matrix)
                    info[(i,j)] = mcbascCorrelation(d_matrix1,d_matrix2, N)
                     
        
        elif coevolution == "quartets":
            
            quartets = dict()
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    quartets[(i,j)] = quartetsCorrelation(columns1[i],columns2[j])
            
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(quartets[(i,j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if quartets[(i,j)] != 0.0:
                        info[(i,j)] = quartets[(i,j)] * 1.0 / max_val
                    else:
                        info[(i,j)] = 0.0
                        
        elif coevolution == "sca":
            
            sca = dict()   
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    sca[(i,j)] = perturbationSCA(columns1[i],columns2[j],\
                                                  j,columns2)
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(sca[(i,j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if sca[(i,j)] != 0.0:
                        info[(i,j)] = sca[(i,j)] * 1.0 / max_val
                    else:
                        info[(i,j)] = 0.0
                    
        elif coevolution == "elsc":
             
            elsc = dict()  
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    elsc[(i,j)] = perturbationELSC(columns1[i],columns2[j],\
                                                   j,columns2)       
            max_pos = []
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    max_pos.append(elsc[(i,j)])
            max_val = max(max_pos)
                    
            for i in range(len(columns1)):
                for j in range(len(columns2)):
                    if elsc[(i,j)] != 0.0:
                        info[(i,j)] = elsc[(i,j)] * 1.0 / max_val
                    else:
                        info[(i,j)] = 0.0               
        else: pass
        
        
        output = "./Results/" + alignment + "_" + coevolution + ".txt"
        results = open(output, "w")
        for i, j in sorted(info.keys()):
            if protein1 != [] and protein2 != []:
                print >> results, protein1[i], protein2[j], \
                round((info[(i, j)]), 4)
            elif protein1 != [] and protein2 == []:
                print >> results, protein1[i], protein1[j], \
                round((info[(i, j)]), 4)
            else:
                print >> results, str(i+1), str(j+1), \
                round((info[(i, j)]), 4)
        results.close()