Example #1
0
 def OutputSHMsPerPosition(self, output_fname):
     aa_len = len(self.aa_dict.GetAAByIndex(self.GetRootIndex()))
     num_shms = [0] * aa_len
     for shm in self.shms.SHMIter():
         num_shms[shm.pos] += self.shms.GetSHMMultiplicity(
             shm) * self.shms.GetSHMMultiplicity(shm)
     plt.figure()
     ax = plt.gca()
     ax.add_patch(
         Rectangle((self.shms.cdr1_bounds[0], 0),
                   self.shms.cdr1_bounds[1] - self.shms.cdr1_bounds[0],
                   max(num_shms),
                   facecolor='#FFB4B6'))
     ax.add_patch(
         Rectangle((self.shms.cdr2_bounds[0], 0),
                   self.shms.cdr2_bounds[1] - self.shms.cdr2_bounds[0],
                   max(num_shms),
                   facecolor='#FFB4B6'))
     ax.add_patch(
         Rectangle((self.shms.cdr3_bounds[0], 0),
                   self.shms.cdr3_bounds[1] - self.shms.cdr3_bounds[0],
                   max(num_shms),
                   facecolor='#FFB4B6'))
     plt.bar(range(aa_len), num_shms)
     plt.xlabel('AA position')
     plt.ylabel('Sum of squared SHM multiplicities')
     plt.savefig(output_fname + '.svg')
     utils.OutputPlotToPdf(output_fname + '.pdf')
 def ComputeVariabilityPlot(self, output_fname):
     # for each aa position compute how many aa in it
     self.variability_sets = []
     for i in range(self.most_freq_len):
         self.variability_sets.append(set())
     num_used_seq = 0
     num_good_seq = 0
     for aa in self.aa_dict:
         if len(aa) != self.most_freq_len:
             continue
         num_good_seq += 1
         for i in range(self.most_freq_len):
             self.variability_sets[i].add(aa[i])
         num_used_seq += len(self.aa_dict[aa])
     self.variability = [
         len(self.variability_sets[i]) for i in range(self.most_freq_len)
     ]
     fig, ax = plt.subplots(1)
     self._AddCDRsOnAminoAcidPlot(ax, max(self.variability) + 0.25)
     plt.bar(range(self.most_freq_len), self.variability)
     plt.ylim(1, max(self.variability) + 0.25)
     plt.xlabel('Amino-acid position')
     plt.ylabel('# amino-acids')
     plt.title(str(num_used_seq) + ' aa seq were used')
     utils.OutputPlotToPdf(output_fname)
Example #3
0
 def _OutputSHMsForV(self, v_gene, output_fname):
     max_length = 300
     num_roots = len(self.gene_usage[dataset.AnnotatedGene.V][v_gene])
     if num_roots < 5:
         return
     for shms in self.gene_usage[dataset.AnnotatedGene.V][v_gene]:
         for shm in shms:
             max_length = max(max_length, shm.pos)
     pos_mult = [0] * max_length
     for shms in self.gene_usage[dataset.AnnotatedGene.V][v_gene]:
         for shm in shms:
             if shm.IsSubstitution():
                 pos_mult[shm.pos] += 1
     plt.bar(range(max_length), pos_mult)
     plt.title(v_gene + ', ' + str(num_roots) + ' roots')
     plt.xlabel('Position (nt)')
     plt.ylabel('# roots')
     utils.OutputPlotToPdf(output_fname)
Example #4
0
    def OutputGraphSHMsAsMatrix(self, vertex_orderer, output_base):
        if self.NumVertices() < 2:
            return
        matrix = []
        #vertex_order = vertex_orderer.GetOrder()
        aa_len = len(self.aa_dict.GetAAByIndex(self.GetRootIndex()))
        for e in self.EdgeIter():
            matrix.append([0] * aa_len)
        for i in range(len(matrix)):
            for j in range(len(matrix[i])):
                if self._PositionIsInCDRs(j):
                    matrix[i][j] = 1
        edge_ind = 0
        for e in self.EdgeIter():
            src_aa = self.aa_dict.GetAAByIndex(e[0])
            dst_aa = self.aa_dict.GetAAByIndex(e[1])
            for i in range(len(src_aa)):
                if src_aa[i] != dst_aa[i]:
                    matrix[edge_ind][i] = 2
            edge_ind += 1


#        for i in range(1, len(vertex_order)):
#            cur_aa = self.aa_dict.GetAAByIndex(vertex_order[i])
#            parent_aa = self.aa_dict.GetAAByIndex(self.GetIncomingVertices(vertex_order[i])[0])
#            for j in range(len(cur_aa)):
#                if cur_aa[j] != parent_aa[j]:
#                    matrix[i][j] = 2
#        vertex_levels = clonal_graph_algorithms.GetLevelsByVertexOrder(self, vertex_order)
#        level_colors = []
#        for l in vertex_levels:
#            level_colors.append(utils.GetColorByNormalizedValue('prism', float(l) / max(vertex_levels)))
        sns.heatmap(matrix,
                    cmap='coolwarm',
                    xticklabels=[],
                    yticklabels=[],
                    cbar=False)
        #        sns.clustermap(matrix, cmap = 'coolwarm', yticklabels = [str(v) for v in vertex_order], row_colors = level_colors, row_cluster = False, col_cluster = False, xticklabels = [])
        plt.savefig(output_base + ".svg")
        utils.OutputPlotToPdf(output_base + '.pdf')
 def OutputNumCodonsPerAAPosition(self, output_fname):
     tree = self.full_length_lineage.UndirectedClonalTree()
     most_frequent_aa = [''] * self.most_freq_len
     for i in range(self.most_freq_len):
         cur_aa_dict = dict()
         for v in tree.VertexIter():
             v_aa_seq = self.aa_dict.GetAAById(
                 self.full_length_lineage.GetFullLengthSequenceByIndex(
                     v).id)
             if len(v_aa_seq) != self.most_freq_len:
                 continue
             if v_aa_seq[i] not in cur_aa_dict:
                 cur_aa_dict[v_aa_seq[i]] = 0
             cur_aa_dict[v_aa_seq[i]] += 1
         most_frequent_aa[i] = max(cur_aa_dict.iteritems(),
                                   key=operator.itemgetter(1))[0]
     codons = [0] * self.most_freq_len
     for i in range(self.most_freq_len):
         cur_codons = set()
         for v in tree.VertexIter():
             v_nucl_seq = self.full_length_lineage.GetFullLengthSequenceByIndex(
                 v).seq
             v_aa_seq = self.aa_dict.GetAAById(
                 self.full_length_lineage.GetFullLengthSequenceByIndex(
                     v).id)
             if len(v_aa_seq) != self.most_freq_len:
                 continue
             if v_aa_seq[i] != most_frequent_aa[i]:
                 continue
             cur_codons.add(v_nucl_seq[i * 3:i * 3 + 3])
         codons[i] = len(cur_codons)
     #print codons
     fig, ax = plt.subplots(1)
     self._AddCDRsOnAminoAcidPlot(ax, max(codons) + 0.25)
     plt.bar(range(self.most_freq_len), codons)
     plt.ylim(1, max(codons) + 0.25)
     utils.OutputPlotToPdf(output_fname)