def OutputSHMsPerPosition(self, output_fname): aa_len = len(self.aa_dict.GetAAByIndex(self.GetRootIndex())) num_shms = [0] * aa_len for shm in self.shms.SHMIter(): num_shms[shm.pos] += self.shms.GetSHMMultiplicity( shm) * self.shms.GetSHMMultiplicity(shm) plt.figure() ax = plt.gca() ax.add_patch( Rectangle((self.shms.cdr1_bounds[0], 0), self.shms.cdr1_bounds[1] - self.shms.cdr1_bounds[0], max(num_shms), facecolor='#FFB4B6')) ax.add_patch( Rectangle((self.shms.cdr2_bounds[0], 0), self.shms.cdr2_bounds[1] - self.shms.cdr2_bounds[0], max(num_shms), facecolor='#FFB4B6')) ax.add_patch( Rectangle((self.shms.cdr3_bounds[0], 0), self.shms.cdr3_bounds[1] - self.shms.cdr3_bounds[0], max(num_shms), facecolor='#FFB4B6')) plt.bar(range(aa_len), num_shms) plt.xlabel('AA position') plt.ylabel('Sum of squared SHM multiplicities') plt.savefig(output_fname + '.svg') utils.OutputPlotToPdf(output_fname + '.pdf')
def ComputeVariabilityPlot(self, output_fname): # for each aa position compute how many aa in it self.variability_sets = [] for i in range(self.most_freq_len): self.variability_sets.append(set()) num_used_seq = 0 num_good_seq = 0 for aa in self.aa_dict: if len(aa) != self.most_freq_len: continue num_good_seq += 1 for i in range(self.most_freq_len): self.variability_sets[i].add(aa[i]) num_used_seq += len(self.aa_dict[aa]) self.variability = [ len(self.variability_sets[i]) for i in range(self.most_freq_len) ] fig, ax = plt.subplots(1) self._AddCDRsOnAminoAcidPlot(ax, max(self.variability) + 0.25) plt.bar(range(self.most_freq_len), self.variability) plt.ylim(1, max(self.variability) + 0.25) plt.xlabel('Amino-acid position') plt.ylabel('# amino-acids') plt.title(str(num_used_seq) + ' aa seq were used') utils.OutputPlotToPdf(output_fname)
def _OutputSHMsForV(self, v_gene, output_fname): max_length = 300 num_roots = len(self.gene_usage[dataset.AnnotatedGene.V][v_gene]) if num_roots < 5: return for shms in self.gene_usage[dataset.AnnotatedGene.V][v_gene]: for shm in shms: max_length = max(max_length, shm.pos) pos_mult = [0] * max_length for shms in self.gene_usage[dataset.AnnotatedGene.V][v_gene]: for shm in shms: if shm.IsSubstitution(): pos_mult[shm.pos] += 1 plt.bar(range(max_length), pos_mult) plt.title(v_gene + ', ' + str(num_roots) + ' roots') plt.xlabel('Position (nt)') plt.ylabel('# roots') utils.OutputPlotToPdf(output_fname)
def OutputGraphSHMsAsMatrix(self, vertex_orderer, output_base): if self.NumVertices() < 2: return matrix = [] #vertex_order = vertex_orderer.GetOrder() aa_len = len(self.aa_dict.GetAAByIndex(self.GetRootIndex())) for e in self.EdgeIter(): matrix.append([0] * aa_len) for i in range(len(matrix)): for j in range(len(matrix[i])): if self._PositionIsInCDRs(j): matrix[i][j] = 1 edge_ind = 0 for e in self.EdgeIter(): src_aa = self.aa_dict.GetAAByIndex(e[0]) dst_aa = self.aa_dict.GetAAByIndex(e[1]) for i in range(len(src_aa)): if src_aa[i] != dst_aa[i]: matrix[edge_ind][i] = 2 edge_ind += 1 # for i in range(1, len(vertex_order)): # cur_aa = self.aa_dict.GetAAByIndex(vertex_order[i]) # parent_aa = self.aa_dict.GetAAByIndex(self.GetIncomingVertices(vertex_order[i])[0]) # for j in range(len(cur_aa)): # if cur_aa[j] != parent_aa[j]: # matrix[i][j] = 2 # vertex_levels = clonal_graph_algorithms.GetLevelsByVertexOrder(self, vertex_order) # level_colors = [] # for l in vertex_levels: # level_colors.append(utils.GetColorByNormalizedValue('prism', float(l) / max(vertex_levels))) sns.heatmap(matrix, cmap='coolwarm', xticklabels=[], yticklabels=[], cbar=False) # sns.clustermap(matrix, cmap = 'coolwarm', yticklabels = [str(v) for v in vertex_order], row_colors = level_colors, row_cluster = False, col_cluster = False, xticklabels = []) plt.savefig(output_base + ".svg") utils.OutputPlotToPdf(output_base + '.pdf')
def OutputNumCodonsPerAAPosition(self, output_fname): tree = self.full_length_lineage.UndirectedClonalTree() most_frequent_aa = [''] * self.most_freq_len for i in range(self.most_freq_len): cur_aa_dict = dict() for v in tree.VertexIter(): v_aa_seq = self.aa_dict.GetAAById( self.full_length_lineage.GetFullLengthSequenceByIndex( v).id) if len(v_aa_seq) != self.most_freq_len: continue if v_aa_seq[i] not in cur_aa_dict: cur_aa_dict[v_aa_seq[i]] = 0 cur_aa_dict[v_aa_seq[i]] += 1 most_frequent_aa[i] = max(cur_aa_dict.iteritems(), key=operator.itemgetter(1))[0] codons = [0] * self.most_freq_len for i in range(self.most_freq_len): cur_codons = set() for v in tree.VertexIter(): v_nucl_seq = self.full_length_lineage.GetFullLengthSequenceByIndex( v).seq v_aa_seq = self.aa_dict.GetAAById( self.full_length_lineage.GetFullLengthSequenceByIndex( v).id) if len(v_aa_seq) != self.most_freq_len: continue if v_aa_seq[i] != most_frequent_aa[i]: continue cur_codons.add(v_nucl_seq[i * 3:i * 3 + 3]) codons[i] = len(cur_codons) #print codons fig, ax = plt.subplots(1) self._AddCDRsOnAminoAcidPlot(ax, max(codons) + 0.25) plt.bar(range(self.most_freq_len), codons) plt.ylim(1, max(codons) + 0.25) utils.OutputPlotToPdf(output_fname)