d_widths = [.5] * len(densities) d_labels = [] for density in densities: d_labels.append(str(density)) ################################################################################# #graph subplot for u = 2 A2 = makeA(1) mycolorlistu2 = [(0.9769448735268946, 0.6468696110452877, 0.2151452804329661), (0.37645505989354233, 0.6875228836084111, 0.30496111115768654), (0.39140782566655674, 0.7613012099948101, 0.7475874114794775)] ax1 = plt.subplot2grid((20, 7), (0, 0), rowspan=5, colspan=7) SBG = StackedBarGrapher() SBG.stackedBarPlot(ax1, A2, mycolorlistu2, xLabels=d_labels, yTicks=3, widths=d_widths, gap=0.005, scale=False) for i in range(len(A2)): Ai = [x for x in A2[i] if x > 0] y = [x / 2.0 for x in Ai] for j in range(len(Ai)): if j > 0: yy = y[j] + np.sum(Ai[0:j])
def plot_gene_usage(geneusage_data, columns, pic_type, germline_gene_list, germline_type): detected_genes = [x.split('*')[0] for x in columns] SBG = StackedBarGrapher() d = np.array(geneusage_data) total_number = np.sum(d) if total_number == 0: print "There is no %s" % germline_type else: line_numbers = np.sum(d, axis=1) line_number_percent_list = [] for line_number in line_numbers: #line_number_percent = format((float(line_number)/float(total_number)), '.2%') line_number_percent = float(line_number) / float( total_number) * 100 line_number_percent_list.append(line_number_percent) #print len(line_number_percent_list), len(columns) zip_gene_number = zip(columns, line_number_percent_list) #print zip_gene_number gene_number_dict = {} for item in zip_gene_number: gene_number_dict.setdefault(item[0].split('*')[0], []).append(item[1]) max_len_value = 0 for key, value in gene_number_dict.items(): if max_len_value < len(value): max_len_value = len(value) #print gene_number_dict gene_names = get_all_gene_name(germline_gene_list) data = np.zeros((len(gene_names), max_len_value)) for index, gene in enumerate(gene_names): try: gene_number = gene_number_dict[gene] #print gene_number for n_index, number in enumerate( sorted(gene_number, reverse=True)): data[index][n_index] = number except KeyError: pass d_labels = gene_names d_colors = [ '#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33', '#b30000', '#777777', '#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33', '#b30000', '#777777', '#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33', '#b30000', '#777777' ] rows = ['%d%%' % x for x in range(75, 101)] #rows.insert(0, "<75%") colors = plt.cm.rainbow(np.linspace(0, 1, 10)) #d_colors = colors fig = plt.figure() max_y_tick = math.ceil(max(np.sum(data, axis=1))) y_ticks_tick, y_ticks_label, tick_log = [0], [0], 0 while max_y_tick % 2 != 0: max_y_tick += 1 while tick_log <= max_y_tick: tick_log += 2 y_ticks_tick.append(tick_log * 1) y_ticks_label.append(tick_log * 1) while len(y_ticks_tick) > 14: if len(y_ticks_tick) % 2 == 0: y_ticks_tick, y_ticks_label = y_ticks_tick[ 1:][::2], y_ticks_label[1:][::2] y_ticks_tick.insert(0, 0), y_ticks_label.insert(0, 0) else: y_ticks_tick, y_ticks_label = y_ticks_tick[:: 2], y_ticks_label[:: 2] y_ticks = [y_ticks_tick, y_ticks_label] ax = fig.add_subplot(111) SBG.stackedBarPlot(ax, data, d_colors, xLabels=d_labels, edgeCols=['#000000'] * len(d_colors), yTicks=y_ticks, ylabel='Percent of reads (%)', gap=.2, endGaps=True) plt.title("%s %s %s gene usage" % (prj_name, chain_type, germline_type)) fig.subplots_adjust(bottom=0.4) plt.tight_layout() for t in ax.xaxis.get_ticklabels(): t.set_horizontalalignment('center') if str(t).split("'")[1] in detected_genes: t.set_color('blue') plt.savefig( '%s/%s_%s_%s_gene_usage_identity%s.png' % (prj_tree.figure, prj_name, chain_type, germline_type, pic_type), dpi=300) del fig plt.close() #Plot black percentage gene usage black_colors = ['#000000'] * len(d_colors) fig = plt.figure() ax = fig.add_subplot(111) SBG.stackedBarPlot( ax, data, #d_colors, black_colors, xLabels=d_labels, edgeCols=['#000000'] * len(d_colors), yTicks=y_ticks, ylabel='Percent of reads (%)', gap=.2, endGaps=True) plt.title("%s %s %s gene usage" % (prj_name, chain_type, germline_type)) fig.subplots_adjust(bottom=0.4) plt.tight_layout() for t in ax.xaxis.get_ticklabels(): t.set_horizontalalignment('center') #if str(t).split("'")[1] in detected_genes: # t.set_color('blue') plt.savefig( '%s/%s_%s_%s_gene_usage_percentage%s.png' % (prj_tree.figure, prj_name, chain_type, germline_type, pic_type), dpi=300) del fig plt.close() #Write down gene usage percentage info record_gene_usage_percent(gene_number_dict, germline_type, pic_type)
post_A_2 = [] for i in pre_A_2: total = sum(i) post_A_partial = [] for j in i: post_A_partial.append((j/float(total))*100) post_A_2.append(post_A_partial) my_color_list_2 = [ (0.9769448735268946, 0.6468696110452877, 0.2151452804329661), (0.7645505989354233, 0.4875228836084111, 0.30496111115768654), (0.1562085876188265, 0.44786703170340336, 0.9887241674046707), ] ax1 = plt.subplot2grid((20,7), (0,0), rowspan = 4, colspan=7) SBG = StackedBarGrapher() SBG.stackedBarPlot(ax1, post_A_2, my_color_list_2, xLabels=d_labels, yTicks=3, widths=d_widths, gap = 0.005, scale=False ) for i in range(len(post_A_2)): Ai = [x for x in post_A_2[i] if x>0] y = [x/2.0 for x in Ai] for j in range(len(Ai)): if j>0:
import seaborn as sb from matplotlib import pyplot as plt import matplotlib as mpl import seaborn as sns import numpy as np SBDIR = '~/soft/src/dev/tools/stackedBarGraph/' GFDIR = '/na/home/splis/soft/src/dev/craft/gunfolds/tools/' import sys, os sys.path.append(os.path.expanduser(SBDIR)) sys.path.append(os.path.expanduser(GFDIR)) import zickle as zkl from stackedBarGraph import StackedBarGrapher SBG = StackedBarGrapher() def gettimes(d): t = [x['ms'] for x in d] time = map(lambda x: x/1000./60., t) return time l = [(0.15, 'leibnitz_nodes_15_density_0.1_newp_.zkl'), (0.20, 'leibnitz_nodes_20_density_0.1_newp_.zkl'), (0.25, 'leibnitz_nodes_25_density_0.1_newp_.zkl'), (0.30, 'leibnitz_nodes_30_density_0.1_newp_.zkl'), (0.35, 'leibnitz_nodes_35_density_0.1_newp_.zkl')] fig = pl.figure(figsize=[10,3]) #Read in data & create total column
(0.39140782566655674, 0.761012099948101, 0.7475874114794775), (0.0965359591761811, 0.43566457484639054, 0.9375581594394308), (0.859944654091911, 0.208070821188862, 0.8893517695418856), (0.022700048163251885, 0.658455757390323, 0.45194508876647577), (0.5934259725250017, 0.6259544064286037, 0.8943937276483482), (0.1248759682295419, 0.1286185769691658, 0.6973677590395778), (0.1834548561930609, 0.8625908063396674, 0.2808367027257399), (0.7072265637451247, 0.795648339142106, 0.4662593453344923), (0.9522043509564118, 0.8383482335114356, 0.04624824811210648), (0.2509444122476855, 0.723665792376911, 0.1685356796751546)] d_widths = [.5]*len(['2','3','4']) ax1 = plt.subplot2grid((7,7), (0,0), rowspan = 6, colspan=7) SBG = StackedBarGrapher() SBG.stackedBarPlot(ax1, A, my_color_list, xLabels=['2','3','4'], yTicks=3, widths=d_widths, gap = 0.005, scale=False ) for i in range(len(A)): Ai = [x for x in A[i] if x>0] y = [x/2.0 for x in Ai] for j in range(len(Ai)): if j>0:
total = sum(i) post_A_partial = [] for j in i: post_A_partial.append((j / float(total)) * 100) post_A_2.append(post_A_partial) my_color_list_2 = [ (0.9769448735268946, 0.6468696110452877, 0.2151452804329661), (0.7645505989354233, 0.4875228836084111, 0.30496111115768654), (0.6151274326753975, 0.496189476149738, 0.75244053646953548), (0.1562085876188265, 0.44786703170340336, 0.9887241674046707), (0.4210506028639077, 0.2200011667972023, 0.37841949185273394) ] ax1 = plt.subplot2grid((20, 7), (0, 0), rowspan=4, colspan=7) SBG = StackedBarGrapher() SBG.stackedBarPlot(ax1, post_A_2, my_color_list_2, xLabels=d_labels, yTicks=3, widths=d_widths, gap=0.005, scale=False) for i in range(len(post_A_2)): Ai = [x for x in post_A_2[i] if x > 0] y = [x / 2.0 for x in Ai] for j in range(len(Ai)): if j > 0: yy = y[j] + np.sum(Ai[0:j])
def main(): #sys.exit(0) germline_fasta = SeqIO.index( "/zzh_gpfs02/yanmingchen/HJT-PGM/Naive/Naive_IgM/Igblast_database/20150429-human-gl-vdj.fasta", "fasta") mutation_patterns_files = glob.glob('%s/%s_*_mutation_patterns.txt' % (prj_tree.data, prj_name)) mean_list = [] ploy_gene_position_dict = {} for mutation_patterns_file in mutation_patterns_files: print mutation_patterns_file ref_seq_id_name = mutation_patterns_file.split("_")[-3] print ref_seq_id_name #if ref_seq_id_name == "IGHV1-18": mutation_patterns_file = open(mutation_patterns_file, "rU") if os.fstat(mutation_patterns_file.fileno()).st_size: mutation_patterns_reader = np.loadtxt(mutation_patterns_file) print mutation_patterns_reader, len(mutation_patterns_reader) mutation_patterns_reader = copy.deepcopy( mutation_patterns_reader[:10]) if mutation_patterns_reader.ndim == 2: #print mutation_patterns_reader #print len(mutation_patterns_reader) #print mutation_patterns_reader.shape, mutation_patterns_reader.ndim line_numbers = np.sum(mutation_patterns_reader, axis=1) #print line_numbers line_number_percent_list = [] elif mutation_patterns_reader.ndim == 1: #print mutation_patterns_reader mutation_patterns_reader = mutation_patterns_reader.reshape( 1, len(mutation_patterns_reader)) #print mutation_patterns_reader #print len(mutation_patterns_reader) #print mutation_patterns_reader.shape, mutation_patterns_reader.ndim line_numbers = np.sum(mutation_patterns_reader, axis=1) #print line_numbers line_numbers = [ x / (index + 1) for (index, x) in enumerate(line_numbers) ] third_quartile = scist.scoreatpercentile(line_numbers, 75) interquartile_range = scist.scoreatpercentile( line_numbers, 75) - scist.scoreatpercentile(line_numbers, 25) #print mutation_patterns_reader, len(mutation_patterns_reader) print third_quartile, interquartile_range, line_numbers start_bin = 0 for bin_index in sorted(range(1, 6), reverse=True): #print bin_index, mutation_patterns_reader[bin_index] try: bin_numbers = line_numbers[bin_index] print bin_numbers, third_quartile, interquartile_range, ( bin_numbers - third_quartile) - (1.5 * interquartile_range) if (bin_numbers - third_quartile) > ( 1.5 * interquartile_range): #0.4 Naive :0.3 print "yes" print bin_numbers, third_quartile, interquartile_range, ( bin_numbers - third_quartile) - (1.5 * interquartile_range) mutation_patterns_reader = copy.deepcopy( mutation_patterns_reader[bin_index:]) start_bin = bin_index break except IndexError: continue #if ref_seq_id_name == "IGHV1-69": # sys.exit(0) for line_index, line in enumerate(mutation_patterns_reader): for row_index, row in enumerate(line): if line_numbers[line_index] != 0: #print line_index, line_index + start_bin, line_numbers[line_index + start_bin], line_numbers mutation_patterns_reader[ line_index, row_index] = mutation_patterns_reader[ line_index, row_index] / line_numbers[line_index + start_bin] * 100 #print mutation_patterns_reader, mutation_patterns_reader.shape mutation_patterns_reader = mutation_patterns_reader.T #print mutation_patterns_reader, mutation_patterns_reader.shape if mutation_patterns_reader.shape[1] == 1: ploy_gene_position_dict[ref_seq_id_name] = [0] else: ploy_gene_position_dict[ref_seq_id_name] = [0] left, width = .45, .5 bottom, height = .5, .5 right = left + width top = bottom + height fig = plt.figure(figsize=(8, 6)) ax = fig.add_subplot(111) line_flag = 1 for index, line in enumerate(mutation_patterns_reader): Yi = line Xi = np.arange(start_bin + 1, start_bin + len(line) + 1) #print Xi, Yi, type(Xi), type(Yi), len(Xi), len(Yi) X = sm.add_constant(Xi) est = sm.OLS(Yi, X) est = est.fit() if est.params[0] >= 12.5: if est.pvalues[0] <= 0.05: ploy_gene_position_dict.setdefault( ref_seq_id_name, []).append(index + 1) #print est.summary() print est.params[0] #print est.tvalues print est.pvalues[0] #print help(sm.regression.linear_model.OLSResults) #sys,exit(0) X_prime = np.linspace(X.min(), X.max(), 100)[:, np.newaxis] X_prime = sm.add_constant(X_prime) y_hat = est.predict(X_prime) plt.plot(Xi, Yi, 'g', alpha=0.9, linewidth=2) plt.scatter(Xi, Yi, c='g', marker="o", alpha=0.9, linewidth=2) plt.plot(X_prime[:, 1], y_hat, 'r') ax.text(.2, top - (0.05 * line_flag), "Nt postition: " + str(index + 1), horizontalalignment='right', verticalalignment='top', transform=ax.transAxes) ax.text(.2, top - (0.05 * line_flag) - 0.05, "P-value: " + str(round(est.pvalues[0], 3)), horizontalalignment='right', verticalalignment='top', transform=ax.transAxes) line_flag += 2 else: plt.plot(Xi, Yi, 'b', alpha=0.3, linewidth=2) plt.scatter(Xi, Yi, c='b', marker="o", alpha=0.3, linewidth=2) plt.xlabel("Mutation Count (Sequence)") plt.ylabel("Mutation Freq. (position)") plt.ylim(ymin=0) plt.ylim(0, 100) #plt.xlim(start_bin , start_bin + len(line)) plt.xlim(0, 10) #plt.xticks(range(0,len(line),1), ('0','1', '2', '3', '4', '5', '6', '7', '8', '9', '10')) plt.title(ref_seq_id_name) fig.savefig("%s/%s_%s_mutation_patterns0.4.png" % (prj_tree.figure, prj_name, ref_seq_id_name), dpi=300) plt.close(fig) else: print "yes" ploy_gene_position_dict[ref_seq_id_name] = [0] #output print ploy_gene_position_dict pickle_file = '%s/%s_ploy_gene_position_dict_dump' % (prj_tree.tmp, prj_name) pickle_file_handle = open(pickle_file, 'wb') dump_tuple = (ploy_gene_position_dict) pickle.dump(dump_tuple, pickle_file_handle) pickle_file_handle.close() #Step 2: get PLOY position nucl and plot stack bar for germline_type in ('V', 'J'): pickle_file = '%s/%s_gene_usage_info_dump_%s_%s%s' % ( prj_tree.tmp, prj_name, germline_type, chain_type, pic_type) f_IgH = open(pickle_file, 'rb') pickle_tuple_IgH = pickle.load(f_IgH) columns_IgH, geneusage_data_IgH, gene_usage_ids_IgH = pickle_tuple_IgH[ 0], pickle_tuple_IgH[1], pickle_tuple_IgH[2] f_IgH.close() geneusage_dict = {} for i in range(len(pickle_tuple_IgH[0])): geneusage_dict.setdefault(columns_IgH[i].split("*")[0], []).append(sum(geneusage_data_IgH[i])) for (key, value) in geneusage_dict.items(): geneusage_dict[key] = sum(value) result_file_name = open( '%s/%s_%s_%s_right_allele_usage%s.txt' % (prj_tree.data, prj_name, chain_type, germline_type, pic_type), "rU") result_file = csv.reader(result_file_name, delimiter="\t") max_freq_allele_dict = {} for line in result_file: max_freq_allele_dict[line[0]] = line[0] + "*" + line[1] print max_freq_allele_dict pickle_files = glob.glob( '%s/%s_mutation_pattrens_dump_%s_%s_*' % (prj_tree.tmp, prj_name, germline_type, chain_type)) for pickle_file in pickle_files: print pickle_file ref_seq_id_name = pickle_file.split('_')[-1] outfile = open( '%s/%s_mutation_spectrum_%s_%s_%s_all' % (prj_tree.data, prj_name, germline_type, chain_type, ref_seq_id_name), "w") writer = csv.writer(outfile, delimiter="\t") pickle_file_handle = open(pickle_file, 'rb') pickle_tuple = pickle.load(pickle_file_handle) mutation_patterns_group = pickle_tuple #print mutation_patterns_group pickle_file_handle.close() try: max_freq_allele = max_freq_allele_dict[ref_seq_id_name] except KeyError: continue germline_fasta_seq = germline_fasta[max_freq_allele].seq.upper() mutation_spectrum_array = np.zeros((len(germline_fasta_seq), 5)) for (key, value) in mutation_patterns_group.items(): #if key <= 10: for record in value: read_id = record[0] position_records = record[1] for position_record_item in position_records: position = position_record_item[0] ref_nucl = position_record_item[1] query_nucl = position_record_item[2] if query_nucl == "A": mutation_spectrum_array[position - 1][0] += 1 elif query_nucl == "C": mutation_spectrum_array[position - 1][1] += 1 elif query_nucl == "-": mutation_spectrum_array[position - 1][2] += 1 elif query_nucl == "T": mutation_spectrum_array[position - 1][3] += 1 elif query_nucl == "G": mutation_spectrum_array[position - 1][4] += 1 for index, nucl in enumerate(germline_fasta_seq): if nucl == "A": print "mutation_spectrum_array[index][0]", mutation_spectrum_array[ index][0] #mutation_spectrum_array[index][0] = 0#-sum(mutation_spectrum_array[index]) if nucl == "C": print "mutation_spectrum_array[index][1]", mutation_spectrum_array[ index][1] #mutation_spectrum_array[index][1] = 0#-sum(mutation_spectrum_array[index]) if nucl == "T": print "mutation_spectrum_array[index][3]", mutation_spectrum_array[ index][3] #mutation_spectrum_array[index][3] = 0#-sum(mutation_spectrum_array[index]) if nucl == "G": print "mutation_spectrum_array[index][4]", mutation_spectrum_array[ index][4] #mutation_spectrum_array[index][4] = 0#-sum(mutation_spectrum_array[index]) for index, line in enumerate(mutation_spectrum_array): writer.writerow([germline_fasta_seq[index]] + list(line)) outfile.close() #Plot mutation spectrum d_colors = [ '#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33', '#b30000', '#777777' ] #gap = 0.2 SBG = StackedBarGrapher() fig = plt.figure() ax5 = fig.add_subplot(111) SBG.stackedBarPlot(ax5, mutation_spectrum_array, d_colors, edgeCols=d_colors, ylabel='Number of reads') plt.title("%s %s" % (ref_seq_id_name, geneusage_dict[ref_seq_id_name])) #plt.xticks(range(0,10,1), ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10')) plt.savefig('%s/%s_%s_%s_%s_nutation_spectrum_all.png' % (prj_tree.figure, prj_name, chain_type, germline_type, ref_seq_id_name), dpi=300) del fig plt.close() # ploy_nucl_position_draw poly_nucl_positions = ploy_gene_position_dict[ref_seq_id_name] if len(poly_nucl_positions) > 1: for poly_nucl_position in poly_nucl_positions[1:]: poly_nucl_position_array = np.zeros((10, 4)) for (key, value) in mutation_patterns_group.items(): if key <= 10: for record in value: read_id = record[0] position_records = record[1] for position_record_item in position_records: position = position_record_item[0] if position == poly_nucl_position: ref_nucl = position_record_item[1] query_nucl = position_record_item[2] if query_nucl == "A": poly_nucl_position_array[key - 1][0] += 1 elif query_nucl == "T": poly_nucl_position_array[key - 1][1] += 1 elif query_nucl == "C": poly_nucl_position_array[key - 1][2] += 1 elif query_nucl == "G": poly_nucl_position_array[key - 1][3] += 1 #print poly_nucl_position_array #d_colors = ['#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33', '#b30000', '#777777'] d_colors = ['red', 'yellow', 'blue', 'green'] gap = 0.2 SBG = StackedBarGrapher() fig = plt.figure() ax5 = fig.add_subplot(111) SBG.stackedBarPlot(ax5, poly_nucl_position_array, d_colors, edgeCols=['#000000'] * 7, ylabel='Number of reads', gap=gap) plt.title("%s %s %s" % (ref_seq_id_name, poly_nucl_position, ref_nucl)) plt.xticks( range(0, 10, 1), ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10')) plt.savefig( '%s/%s_%s_%s_%s_position%s_all.png' % (prj_tree.figure, prj_name, chain_type, germline_type, ref_seq_id_name, poly_nucl_position), dpi=300) del fig plt.close()
def draw_graph(Holes_Objects, Processes_Objects, Block_List): SBG = StackedBarGrapher() Memory_List = [] for each in Holes_Objects: Memory_List.append([float(each.address), float(each.size), 1]) if (Block_List == []): for x in range(len(Holes_Objects)): Block_List.append([ float(Holes_Objects[x].address + Holes_Objects[x].size), float(Holes_Objects[x + 1].address - Holes_Objects[x].address - Holes_Objects[x].size) if x < len(Holes_Objects) - 1 else 0, 0 ]) for each in Block_List: Memory_List.append(each) for each in Processes_Objects: if (each.allocated_to != -1): Memory_List.append( [float(each.allocated_to.address), float(each.size), 2]) print Memory_List Memory_List.sort(key=lambda x: x[0]) print Memory_List np_array = [] d_colors = [] for each in Memory_List: np_array.append(each[1]) if (each[2] == 0): d_colors.append('#aaaaaa') elif (each[2] == 1): d_colors.append('#0000bb') elif (each[2] == 2): d_colors.append('#bb0000') d = np.array([np_array]) print d d_widths = [1] d_labels = ["Memory"] fig = plt.figure() ax = fig.add_subplot(111) SBG.stackedBarPlot( ax, d, d_colors, xLabels=d_labels, yTicks=7, widths=d_widths, ) plt.title("Memory Allocation") fig.subplots_adjust(bottom=0.4) plt.tight_layout() plt.show() plt.close(fig) del fig
def plot_gene_usage(result_dict, pic_type, germline_gene_list, germline_type): SBG = StackedBarGrapher() gene_number_dict = {} total_number = sum(result_dict.values()) for (key, value) in result_dict.items(): gene_number_dict.setdefault(key.split('*')[0], []).append( float(value) / total_number * 100) max_len_value = 0 for key, value in gene_number_dict.items(): if max_len_value < len(value): max_len_value = len(value) gene_names = get_all_gene_name(germline_gene_list) data = np.zeros((len(gene_names), max_len_value)) for index, gene in enumerate(gene_names): try: gene_number = gene_number_dict[gene] print gene_number for n_index, number in enumerate(sorted(gene_number, reverse=True)): data[index][n_index] = number except KeyError: pass d_labels = gene_names d_colors = [ '#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33', '#b30000', '#777777', '#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33', '#b30000', '#777777', '#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33', '#b30000', '#777777' ] rows = ['%d%%' % x for x in range(75, 101)] #rows.insert(0, "<75%") colors = plt.cm.rainbow(np.linspace(0, 1, 10)) #d_colors = colors fig = plt.figure() max_y_tick = math.ceil(max(np.sum(data, axis=1))) y_ticks_tick, y_ticks_label, tick_log = [0], [0], 0 while max_y_tick % 2 != 0: max_y_tick += 1 while tick_log <= max_y_tick: tick_log += 2 y_ticks_tick.append(tick_log * 1) y_ticks_label.append(tick_log * 1) while len(y_ticks_tick) > 14: if len(y_ticks_tick) % 2 == 0: y_ticks_tick, y_ticks_label = y_ticks_tick[1:][::2], y_ticks_label[ 1:][::2] y_ticks_tick.insert(0, 0), y_ticks_label.insert(0, 0) else: y_ticks_tick, y_ticks_label = y_ticks_tick[::2], y_ticks_label[::2] y_ticks = [y_ticks_tick, y_ticks_label] ax = fig.add_subplot(111) SBG.stackedBarPlot(ax, data, d_colors, xLabels=d_labels, edgeCols=['#000000'] * len(d_colors), yTicks=y_ticks, ylabel='Percent of reads (%)', gap=.2, endGaps=True) plt.title("%s %s %s gene usage" % (prj_name, chain_type, germline_type)) fig.subplots_adjust(bottom=0.4) plt.tight_layout() for t in ax.xaxis.get_ticklabels(): t.set_horizontalalignment('center') if str(t).split("'")[1] in list(gene_number_dict.keys()): t.set_color('blue') plt.savefig( '/zzh_gpfs02/yanmingchen/HJT-PGM/Naive/%s/%s_%s_%s_gene_usage%s.png' % (prj_name, prj_name, chain_type, germline_type, pic_type), dpi=300) del fig plt.close()
(0.6151274326753975, 0.496189476149738, 0.75244053646953548), (0.1562085876188265, 0.44786703170340336, 0.9887241674046707), (0.4210506028639077, 0.2200011667972023, 0.37841949185273394), (0.7728656344058752, 0.17367399916287833, 0.026245548153039366), (0.904005064928743, 0.3038725882767085, 0.9399279068775889), (0.39140782566655674, 0.761012099948101, 0.7475874114794775), (0.0965359591761811, 0.43566457484639054, 0.9375581594394308), (0.859944654091911, 0.208070821188862, 0.8893517695418856), (0.5934259725250017, 0.6259544064286037, 0.8943937276483482), (0.1834548561930609, 0.8625908063396674, 0.2808367027257399), (0.9522043509564118, 0.8383482335114356, 0.04624824811210648), (0.2509444122476855, 0.723665792376911, 0.1685356796751546)] ax1 = plt.subplot2grid((7,7), (0,0), rowspan = 6, colspan=7) SBG = StackedBarGrapher() SBG.stackedBarPlot(ax1, post_A, my_color_list, xLabels=d_labels, yTicks=3, widths=d_widths, gap = 0.005, scale=False ) for i in range(len(post_A)): Ai = [x for x in post_A[i] if x>0] y = [x/2.0 for x in Ai] for j in range(len(Ai)): if j>0:
import numpy as np from matplotlib import pyplot as plt from stackedBarGraph import StackedBarGrapher SBG = StackedBarGrapher() d = np.array([[101., 0., 0., 0., 0., 0., 0.], [92., 3., 0., 4., 5., 6., 0.], [56., 7., 8., 9., 23., 4., 5.], [81., 2., 4., 5., 32., 33., 4.], [0., 45., 2., 3., 45., 67., 8.], [99., 5., 0., 0., 0., 43., 56.]]) d_widths = [.5, .5, .5, .5, .5, .5] d_labels = ["fred", "julie", "sam", "peter", "rob", "baz"] d_colors = [ '#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33', '#b30000', '#777777' ] fig = plt.figure() ax = fig.add_subplot(111) SBG.stackedBarPlot(ax, d, d_colors, xLabels=d_labels, yTicks=7, widths=d_widths, scale=True, gap=1) plt.title("Scaled bars with set widths") fig.subplots_adjust(bottom=0.4) plt.tight_layout()
import pandas as pd import pylab as pl from matplotlib import pyplot as plt import matplotlib as mpl import seaborn as sns import numpy as np import sys sys.path.append('/na/home/splis/soft/src/dev/craft/gunfolds/tools/') sys.path.append('/na/homes/splis/soft/src/dev/tools/stackedBarGraph/') import zickle as zkl from stackedBarGraph import StackedBarGrapher SBG = StackedBarGrapher() fig = pl.figure(figsize=[10,1.3]) #Read in data & create total column d = zkl.load("hooke_nodes_6_g32g1_.zkl")#hooke_nodes_35_newp_.zkl") densities = np.sort(d.keys()) def get_counts(d): eqc = [len(x['eq']) for x in d] keys = np.sort(np.unique(eqc)) c = {} for k in keys: c[k] = len(np.where(eqc == k)[0]) return c # unique size usz = set() dc = {}