d_widths = [.5] * len(densities)
d_labels = []
for density in densities:
    d_labels.append(str(density))

#################################################################################
#graph subplot for u  = 2
A2 = makeA(1)
mycolorlistu2 = [(0.9769448735268946, 0.6468696110452877, 0.2151452804329661),
                 (0.37645505989354233, 0.6875228836084111,
                  0.30496111115768654),
                 (0.39140782566655674, 0.7613012099948101, 0.7475874114794775)]

ax1 = plt.subplot2grid((20, 7), (0, 0), rowspan=5, colspan=7)
SBG = StackedBarGrapher()
SBG.stackedBarPlot(ax1,
                   A2,
                   mycolorlistu2,
                   xLabels=d_labels,
                   yTicks=3,
                   widths=d_widths,
                   gap=0.005,
                   scale=False)

for i in range(len(A2)):
    Ai = [x for x in A2[i] if x > 0]
    y = [x / 2.0 for x in Ai]
    for j in range(len(Ai)):
        if j > 0:
            yy = y[j] + np.sum(Ai[0:j])
Exemple #2
0
def plot_gene_usage(geneusage_data, columns, pic_type, germline_gene_list,
                    germline_type):
    detected_genes = [x.split('*')[0] for x in columns]
    SBG = StackedBarGrapher()
    d = np.array(geneusage_data)
    total_number = np.sum(d)
    if total_number == 0:
        print "There is no %s" % germline_type
    else:
        line_numbers = np.sum(d, axis=1)
        line_number_percent_list = []
        for line_number in line_numbers:
            #line_number_percent = format((float(line_number)/float(total_number)), '.2%')
            line_number_percent = float(line_number) / float(
                total_number) * 100
            line_number_percent_list.append(line_number_percent)
        #print len(line_number_percent_list), len(columns)
        zip_gene_number = zip(columns, line_number_percent_list)
        #print zip_gene_number
        gene_number_dict = {}
        for item in zip_gene_number:
            gene_number_dict.setdefault(item[0].split('*')[0],
                                        []).append(item[1])
        max_len_value = 0
        for key, value in gene_number_dict.items():
            if max_len_value < len(value):
                max_len_value = len(value)
        #print gene_number_dict
        gene_names = get_all_gene_name(germline_gene_list)
        data = np.zeros((len(gene_names), max_len_value))
        for index, gene in enumerate(gene_names):
            try:
                gene_number = gene_number_dict[gene]
                #print gene_number
                for n_index, number in enumerate(
                        sorted(gene_number, reverse=True)):

                    data[index][n_index] = number
            except KeyError:
                pass

        d_labels = gene_names
        d_colors = [
            '#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33', '#b30000',
            '#777777', '#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33',
            '#b30000', '#777777', '#2166ac', '#fee090', '#fdbb84', '#fc8d59',
            '#e34a33', '#b30000', '#777777'
        ]

        rows = ['%d%%' % x for x in range(75, 101)]
        #rows.insert(0, "<75%")
        colors = plt.cm.rainbow(np.linspace(0, 1, 10))
        #d_colors = colors
        fig = plt.figure()
        max_y_tick = math.ceil(max(np.sum(data, axis=1)))
        y_ticks_tick, y_ticks_label, tick_log = [0], [0], 0

        while max_y_tick % 2 != 0:
            max_y_tick += 1
        while tick_log <= max_y_tick:
            tick_log += 2
            y_ticks_tick.append(tick_log * 1)
            y_ticks_label.append(tick_log * 1)

        while len(y_ticks_tick) > 14:
            if len(y_ticks_tick) % 2 == 0:
                y_ticks_tick, y_ticks_label = y_ticks_tick[
                    1:][::2], y_ticks_label[1:][::2]
                y_ticks_tick.insert(0, 0), y_ticks_label.insert(0, 0)
            else:
                y_ticks_tick, y_ticks_label = y_ticks_tick[::
                                                           2], y_ticks_label[::
                                                                             2]
        y_ticks = [y_ticks_tick, y_ticks_label]

        ax = fig.add_subplot(111)
        SBG.stackedBarPlot(ax,
                           data,
                           d_colors,
                           xLabels=d_labels,
                           edgeCols=['#000000'] * len(d_colors),
                           yTicks=y_ticks,
                           ylabel='Percent of reads (%)',
                           gap=.2,
                           endGaps=True)

        plt.title("%s %s %s gene usage" %
                  (prj_name, chain_type, germline_type))

        fig.subplots_adjust(bottom=0.4)
        plt.tight_layout()
        for t in ax.xaxis.get_ticklabels():
            t.set_horizontalalignment('center')
            if str(t).split("'")[1] in detected_genes:
                t.set_color('blue')

        plt.savefig(
            '%s/%s_%s_%s_gene_usage_identity%s.png' %
            (prj_tree.figure, prj_name, chain_type, germline_type, pic_type),
            dpi=300)
        del fig
        plt.close()

        #Plot black percentage gene usage
        black_colors = ['#000000'] * len(d_colors)
        fig = plt.figure()
        ax = fig.add_subplot(111)
        SBG.stackedBarPlot(
            ax,
            data,
            #d_colors,
            black_colors,
            xLabels=d_labels,
            edgeCols=['#000000'] * len(d_colors),
            yTicks=y_ticks,
            ylabel='Percent of reads (%)',
            gap=.2,
            endGaps=True)

        plt.title("%s %s %s gene usage" %
                  (prj_name, chain_type, germline_type))

        fig.subplots_adjust(bottom=0.4)
        plt.tight_layout()
        for t in ax.xaxis.get_ticklabels():
            t.set_horizontalalignment('center')
            #if str(t).split("'")[1] in detected_genes:
            #	t.set_color('blue')

        plt.savefig(
            '%s/%s_%s_%s_gene_usage_percentage%s.png' %
            (prj_tree.figure, prj_name, chain_type, germline_type, pic_type),
            dpi=300)
        del fig
        plt.close()
        #Write down gene usage percentage info
        record_gene_usage_percent(gene_number_dict, germline_type, pic_type)
post_A_2 = []
for i in pre_A_2:
  total = sum(i)
  post_A_partial = []
  for j in i:
    post_A_partial.append((j/float(total))*100)
  post_A_2.append(post_A_partial)

my_color_list_2 = [
(0.9769448735268946, 0.6468696110452877, 0.2151452804329661), 
(0.7645505989354233, 0.4875228836084111, 0.30496111115768654),  
(0.1562085876188265, 0.44786703170340336, 0.9887241674046707), ]


ax1 = plt.subplot2grid((20,7), (0,0), rowspan = 4, colspan=7)
SBG = StackedBarGrapher()
SBG.stackedBarPlot(ax1,
                   post_A_2,
                   my_color_list_2,
                   xLabels=d_labels,
                   yTicks=3,
                   widths=d_widths,
                   gap = 0.005,
                   scale=False
)

for i in range(len(post_A_2)):
    Ai = [x for x in post_A_2[i] if x>0]
    y = [x/2.0 for x in Ai]
    for j in range(len(Ai)):
        if j>0:
Exemple #4
0
import seaborn as sb
from matplotlib import pyplot as plt
import matplotlib as mpl
import seaborn as sns
import numpy as np

SBDIR = '~/soft/src/dev/tools/stackedBarGraph/'
GFDIR = '/na/home/splis/soft/src/dev/craft/gunfolds/tools/'
import sys, os

sys.path.append(os.path.expanduser(SBDIR))
sys.path.append(os.path.expanduser(GFDIR))

import zickle as zkl
from stackedBarGraph import StackedBarGrapher
SBG = StackedBarGrapher()

def gettimes(d):
    t = [x['ms'] for x in d]
    time  = map(lambda x: x/1000./60., t)
    return time

l = [(0.15, 'leibnitz_nodes_15_density_0.1_newp_.zkl'),
     (0.20, 'leibnitz_nodes_20_density_0.1_newp_.zkl'),
     (0.25, 'leibnitz_nodes_25_density_0.1_newp_.zkl'),
     (0.30, 'leibnitz_nodes_30_density_0.1_newp_.zkl'),
     (0.35, 'leibnitz_nodes_35_density_0.1_newp_.zkl')]

fig = pl.figure(figsize=[10,3])
#Read in data & create total column
(0.39140782566655674, 0.761012099948101, 0.7475874114794775), 
(0.0965359591761811, 0.43566457484639054, 0.9375581594394308), 
(0.859944654091911, 0.208070821188862, 0.8893517695418856), 
(0.022700048163251885, 0.658455757390323, 0.45194508876647577), 
(0.5934259725250017, 0.6259544064286037, 0.8943937276483482), 
(0.1248759682295419, 0.1286185769691658, 0.6973677590395778), 
(0.1834548561930609, 0.8625908063396674, 0.2808367027257399), 
(0.7072265637451247, 0.795648339142106, 0.4662593453344923), 
(0.9522043509564118, 0.8383482335114356, 0.04624824811210648), 
(0.2509444122476855, 0.723665792376911, 0.1685356796751546)] 

d_widths = [.5]*len(['2','3','4'])


ax1 = plt.subplot2grid((7,7), (0,0), rowspan = 6, colspan=7)
SBG = StackedBarGrapher()
SBG.stackedBarPlot(ax1,
                   A,
                   my_color_list,
                   xLabels=['2','3','4'],
                   yTicks=3,
                   widths=d_widths,
                   gap = 0.005,
                   scale=False
)

for i in range(len(A)):
    Ai = [x for x in A[i] if x>0]
    y = [x/2.0 for x in Ai]
    for j in range(len(Ai)):
        if j>0:
    total = sum(i)
    post_A_partial = []
    for j in i:
        post_A_partial.append((j / float(total)) * 100)
    post_A_2.append(post_A_partial)

my_color_list_2 = [
    (0.9769448735268946, 0.6468696110452877, 0.2151452804329661),
    (0.7645505989354233, 0.4875228836084111, 0.30496111115768654),
    (0.6151274326753975, 0.496189476149738, 0.75244053646953548),
    (0.1562085876188265, 0.44786703170340336, 0.9887241674046707),
    (0.4210506028639077, 0.2200011667972023, 0.37841949185273394)
]

ax1 = plt.subplot2grid((20, 7), (0, 0), rowspan=4, colspan=7)
SBG = StackedBarGrapher()
SBG.stackedBarPlot(ax1,
                   post_A_2,
                   my_color_list_2,
                   xLabels=d_labels,
                   yTicks=3,
                   widths=d_widths,
                   gap=0.005,
                   scale=False)

for i in range(len(post_A_2)):
    Ai = [x for x in post_A_2[i] if x > 0]
    y = [x / 2.0 for x in Ai]
    for j in range(len(Ai)):
        if j > 0:
            yy = y[j] + np.sum(Ai[0:j])
def main():
    #sys.exit(0)

    germline_fasta = SeqIO.index(
        "/zzh_gpfs02/yanmingchen/HJT-PGM/Naive/Naive_IgM/Igblast_database/20150429-human-gl-vdj.fasta",
        "fasta")

    mutation_patterns_files = glob.glob('%s/%s_*_mutation_patterns.txt' %
                                        (prj_tree.data, prj_name))
    mean_list = []
    ploy_gene_position_dict = {}
    for mutation_patterns_file in mutation_patterns_files:
        print mutation_patterns_file
        ref_seq_id_name = mutation_patterns_file.split("_")[-3]
        print ref_seq_id_name
        #if ref_seq_id_name == "IGHV1-18":
        mutation_patterns_file = open(mutation_patterns_file, "rU")
        if os.fstat(mutation_patterns_file.fileno()).st_size:

            mutation_patterns_reader = np.loadtxt(mutation_patterns_file)
            print mutation_patterns_reader, len(mutation_patterns_reader)
            mutation_patterns_reader = copy.deepcopy(
                mutation_patterns_reader[:10])
            if mutation_patterns_reader.ndim == 2:
                #print mutation_patterns_reader
                #print len(mutation_patterns_reader)
                #print mutation_patterns_reader.shape, mutation_patterns_reader.ndim
                line_numbers = np.sum(mutation_patterns_reader, axis=1)
                #print line_numbers
                line_number_percent_list = []
            elif mutation_patterns_reader.ndim == 1:
                #print mutation_patterns_reader
                mutation_patterns_reader = mutation_patterns_reader.reshape(
                    1, len(mutation_patterns_reader))
                #print mutation_patterns_reader
                #print len(mutation_patterns_reader)
                #print mutation_patterns_reader.shape, mutation_patterns_reader.ndim
                line_numbers = np.sum(mutation_patterns_reader, axis=1)
                #print line_numbers

            line_numbers = [
                x / (index + 1) for (index, x) in enumerate(line_numbers)
            ]
            third_quartile = scist.scoreatpercentile(line_numbers, 75)
            interquartile_range = scist.scoreatpercentile(
                line_numbers, 75) - scist.scoreatpercentile(line_numbers, 25)
            #print mutation_patterns_reader, len(mutation_patterns_reader)
            print third_quartile, interquartile_range, line_numbers
            start_bin = 0
            for bin_index in sorted(range(1, 6), reverse=True):
                #print bin_index, mutation_patterns_reader[bin_index]
                try:
                    bin_numbers = line_numbers[bin_index]
                    print bin_numbers, third_quartile, interquartile_range, (
                        bin_numbers - third_quartile) - (1.5 *
                                                         interquartile_range)
                    if (bin_numbers - third_quartile) > (
                            1.5 * interquartile_range):  #0.4 Naive :0.3
                        print "yes"
                        print bin_numbers, third_quartile, interquartile_range, (
                            bin_numbers -
                            third_quartile) - (1.5 * interquartile_range)
                        mutation_patterns_reader = copy.deepcopy(
                            mutation_patterns_reader[bin_index:])
                        start_bin = bin_index
                        break
                except IndexError:
                    continue
            #if ref_seq_id_name == "IGHV1-69":
            #	sys.exit(0)
            for line_index, line in enumerate(mutation_patterns_reader):
                for row_index, row in enumerate(line):
                    if line_numbers[line_index] != 0:
                        #print line_index, line_index + start_bin, line_numbers[line_index + start_bin], line_numbers
                        mutation_patterns_reader[
                            line_index, row_index] = mutation_patterns_reader[
                                line_index,
                                row_index] / line_numbers[line_index +
                                                          start_bin] * 100
            #print mutation_patterns_reader, mutation_patterns_reader.shape

            mutation_patterns_reader = mutation_patterns_reader.T
            #print mutation_patterns_reader, mutation_patterns_reader.shape
            if mutation_patterns_reader.shape[1] == 1:
                ploy_gene_position_dict[ref_seq_id_name] = [0]
            else:
                ploy_gene_position_dict[ref_seq_id_name] = [0]
                left, width = .45, .5
                bottom, height = .5, .5
                right = left + width
                top = bottom + height
                fig = plt.figure(figsize=(8, 6))
                ax = fig.add_subplot(111)
                line_flag = 1
                for index, line in enumerate(mutation_patterns_reader):
                    Yi = line
                    Xi = np.arange(start_bin + 1, start_bin + len(line) + 1)
                    #print Xi, Yi, type(Xi), type(Yi), len(Xi), len(Yi)
                    X = sm.add_constant(Xi)
                    est = sm.OLS(Yi, X)
                    est = est.fit()
                    if est.params[0] >= 12.5:
                        if est.pvalues[0] <= 0.05:
                            ploy_gene_position_dict.setdefault(
                                ref_seq_id_name, []).append(index + 1)
                            #print est.summary()
                            print est.params[0]
                            #print est.tvalues
                            print est.pvalues[0]
                            #print help(sm.regression.linear_model.OLSResults)
                            #sys,exit(0)
                            X_prime = np.linspace(X.min(), X.max(),
                                                  100)[:, np.newaxis]
                            X_prime = sm.add_constant(X_prime)
                            y_hat = est.predict(X_prime)
                            plt.plot(Xi, Yi, 'g', alpha=0.9, linewidth=2)
                            plt.scatter(Xi,
                                        Yi,
                                        c='g',
                                        marker="o",
                                        alpha=0.9,
                                        linewidth=2)

                            plt.plot(X_prime[:, 1], y_hat, 'r')
                            ax.text(.2,
                                    top - (0.05 * line_flag),
                                    "Nt postition: " + str(index + 1),
                                    horizontalalignment='right',
                                    verticalalignment='top',
                                    transform=ax.transAxes)
                            ax.text(.2,
                                    top - (0.05 * line_flag) - 0.05,
                                    "P-value: " +
                                    str(round(est.pvalues[0], 3)),
                                    horizontalalignment='right',
                                    verticalalignment='top',
                                    transform=ax.transAxes)
                            line_flag += 2

                    else:
                        plt.plot(Xi, Yi, 'b', alpha=0.3, linewidth=2)
                        plt.scatter(Xi,
                                    Yi,
                                    c='b',
                                    marker="o",
                                    alpha=0.3,
                                    linewidth=2)

                plt.xlabel("Mutation Count (Sequence)")
                plt.ylabel("Mutation Freq. (position)")
                plt.ylim(ymin=0)
                plt.ylim(0, 100)
                #plt.xlim(start_bin , start_bin + len(line))
                plt.xlim(0, 10)
                #plt.xticks(range(0,len(line),1), ('0','1', '2', '3', '4', '5', '6', '7', '8', '9', '10'))
                plt.title(ref_seq_id_name)
                fig.savefig("%s/%s_%s_mutation_patterns0.4.png" %
                            (prj_tree.figure, prj_name, ref_seq_id_name),
                            dpi=300)
                plt.close(fig)
        else:
            print "yes"
            ploy_gene_position_dict[ref_seq_id_name] = [0]

    #output
    print ploy_gene_position_dict
    pickle_file = '%s/%s_ploy_gene_position_dict_dump' % (prj_tree.tmp,
                                                          prj_name)
    pickle_file_handle = open(pickle_file, 'wb')
    dump_tuple = (ploy_gene_position_dict)
    pickle.dump(dump_tuple, pickle_file_handle)
    pickle_file_handle.close()
    #Step 2: get PLOY position nucl and plot stack bar
    for germline_type in ('V', 'J'):
        pickle_file = '%s/%s_gene_usage_info_dump_%s_%s%s' % (
            prj_tree.tmp, prj_name, germline_type, chain_type, pic_type)
        f_IgH = open(pickle_file, 'rb')
        pickle_tuple_IgH = pickle.load(f_IgH)
        columns_IgH, geneusage_data_IgH, gene_usage_ids_IgH = pickle_tuple_IgH[
            0], pickle_tuple_IgH[1], pickle_tuple_IgH[2]
        f_IgH.close()
        geneusage_dict = {}
        for i in range(len(pickle_tuple_IgH[0])):
            geneusage_dict.setdefault(columns_IgH[i].split("*")[0],
                                      []).append(sum(geneusage_data_IgH[i]))
        for (key, value) in geneusage_dict.items():
            geneusage_dict[key] = sum(value)
        result_file_name = open(
            '%s/%s_%s_%s_right_allele_usage%s.txt' %
            (prj_tree.data, prj_name, chain_type, germline_type, pic_type),
            "rU")
        result_file = csv.reader(result_file_name, delimiter="\t")
        max_freq_allele_dict = {}
        for line in result_file:
            max_freq_allele_dict[line[0]] = line[0] + "*" + line[1]
        print max_freq_allele_dict

        pickle_files = glob.glob(
            '%s/%s_mutation_pattrens_dump_%s_%s_*' %
            (prj_tree.tmp, prj_name, germline_type, chain_type))
        for pickle_file in pickle_files:

            print pickle_file
            ref_seq_id_name = pickle_file.split('_')[-1]
            outfile = open(
                '%s/%s_mutation_spectrum_%s_%s_%s_all' %
                (prj_tree.data, prj_name, germline_type, chain_type,
                 ref_seq_id_name), "w")
            writer = csv.writer(outfile, delimiter="\t")
            pickle_file_handle = open(pickle_file, 'rb')
            pickle_tuple = pickle.load(pickle_file_handle)
            mutation_patterns_group = pickle_tuple
            #print mutation_patterns_group
            pickle_file_handle.close()
            try:
                max_freq_allele = max_freq_allele_dict[ref_seq_id_name]
            except KeyError:
                continue
            germline_fasta_seq = germline_fasta[max_freq_allele].seq.upper()
            mutation_spectrum_array = np.zeros((len(germline_fasta_seq), 5))
            for (key, value) in mutation_patterns_group.items():
                #if key <= 10:
                for record in value:
                    read_id = record[0]
                    position_records = record[1]
                    for position_record_item in position_records:
                        position = position_record_item[0]
                        ref_nucl = position_record_item[1]
                        query_nucl = position_record_item[2]
                        if query_nucl == "A":
                            mutation_spectrum_array[position - 1][0] += 1
                        elif query_nucl == "C":
                            mutation_spectrum_array[position - 1][1] += 1
                        elif query_nucl == "-":
                            mutation_spectrum_array[position - 1][2] += 1
                        elif query_nucl == "T":
                            mutation_spectrum_array[position - 1][3] += 1
                        elif query_nucl == "G":
                            mutation_spectrum_array[position - 1][4] += 1
            for index, nucl in enumerate(germline_fasta_seq):
                if nucl == "A":
                    print "mutation_spectrum_array[index][0]", mutation_spectrum_array[
                        index][0]
                    #mutation_spectrum_array[index][0] =  0#-sum(mutation_spectrum_array[index])
                if nucl == "C":
                    print "mutation_spectrum_array[index][1]", mutation_spectrum_array[
                        index][1]
                    #mutation_spectrum_array[index][1] =  0#-sum(mutation_spectrum_array[index])
                if nucl == "T":
                    print "mutation_spectrum_array[index][3]", mutation_spectrum_array[
                        index][3]
                    #mutation_spectrum_array[index][3] =  0#-sum(mutation_spectrum_array[index])
                if nucl == "G":
                    print "mutation_spectrum_array[index][4]", mutation_spectrum_array[
                        index][4]
                    #mutation_spectrum_array[index][4] =  0#-sum(mutation_spectrum_array[index])
            for index, line in enumerate(mutation_spectrum_array):
                writer.writerow([germline_fasta_seq[index]] + list(line))
            outfile.close()
            #Plot mutation spectrum
            d_colors = [
                '#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33',
                '#b30000', '#777777'
            ]
            #gap = 0.2
            SBG = StackedBarGrapher()
            fig = plt.figure()
            ax5 = fig.add_subplot(111)
            SBG.stackedBarPlot(ax5,
                               mutation_spectrum_array,
                               d_colors,
                               edgeCols=d_colors,
                               ylabel='Number of reads')
            plt.title("%s %s" %
                      (ref_seq_id_name, geneusage_dict[ref_seq_id_name]))
            #plt.xticks(range(0,10,1), ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'))

            plt.savefig('%s/%s_%s_%s_%s_nutation_spectrum_all.png' %
                        (prj_tree.figure, prj_name, chain_type, germline_type,
                         ref_seq_id_name),
                        dpi=300)
            del fig
            plt.close()

            # ploy_nucl_position_draw
            poly_nucl_positions = ploy_gene_position_dict[ref_seq_id_name]
            if len(poly_nucl_positions) > 1:
                for poly_nucl_position in poly_nucl_positions[1:]:
                    poly_nucl_position_array = np.zeros((10, 4))

                    for (key, value) in mutation_patterns_group.items():
                        if key <= 10:
                            for record in value:
                                read_id = record[0]
                                position_records = record[1]
                                for position_record_item in position_records:
                                    position = position_record_item[0]
                                    if position == poly_nucl_position:
                                        ref_nucl = position_record_item[1]
                                        query_nucl = position_record_item[2]
                                        if query_nucl == "A":
                                            poly_nucl_position_array[key -
                                                                     1][0] += 1
                                        elif query_nucl == "T":
                                            poly_nucl_position_array[key -
                                                                     1][1] += 1
                                        elif query_nucl == "C":
                                            poly_nucl_position_array[key -
                                                                     1][2] += 1
                                        elif query_nucl == "G":
                                            poly_nucl_position_array[key -
                                                                     1][3] += 1

                    #print poly_nucl_position_array
                    #d_colors = ['#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33', '#b30000', '#777777']
                    d_colors = ['red', 'yellow', 'blue', 'green']
                    gap = 0.2
                    SBG = StackedBarGrapher()
                    fig = plt.figure()
                    ax5 = fig.add_subplot(111)
                    SBG.stackedBarPlot(ax5,
                                       poly_nucl_position_array,
                                       d_colors,
                                       edgeCols=['#000000'] * 7,
                                       ylabel='Number of reads',
                                       gap=gap)
                    plt.title("%s %s %s" %
                              (ref_seq_id_name, poly_nucl_position, ref_nucl))
                    plt.xticks(
                        range(0, 10, 1),
                        ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'))
                    plt.savefig(
                        '%s/%s_%s_%s_%s_position%s_all.png' %
                        (prj_tree.figure, prj_name, chain_type, germline_type,
                         ref_seq_id_name, poly_nucl_position),
                        dpi=300)
                    del fig
                    plt.close()
Exemple #8
0
def draw_graph(Holes_Objects, Processes_Objects, Block_List):
    SBG = StackedBarGrapher()

    Memory_List = []
    for each in Holes_Objects:
        Memory_List.append([float(each.address), float(each.size), 1])

    if (Block_List == []):
        for x in range(len(Holes_Objects)):
            Block_List.append([
                float(Holes_Objects[x].address + Holes_Objects[x].size),
                float(Holes_Objects[x + 1].address - Holes_Objects[x].address -
                      Holes_Objects[x].size)
                if x < len(Holes_Objects) - 1 else 0, 0
            ])

    for each in Block_List:
        Memory_List.append(each)

    for each in Processes_Objects:
        if (each.allocated_to != -1):
            Memory_List.append(
                [float(each.allocated_to.address),
                 float(each.size), 2])

    print Memory_List

    Memory_List.sort(key=lambda x: x[0])

    print Memory_List

    np_array = []
    d_colors = []

    for each in Memory_List:
        np_array.append(each[1])
        if (each[2] == 0):
            d_colors.append('#aaaaaa')
        elif (each[2] == 1):
            d_colors.append('#0000bb')
        elif (each[2] == 2):
            d_colors.append('#bb0000')

    d = np.array([np_array])

    print d

    d_widths = [1]
    d_labels = ["Memory"]
    fig = plt.figure()

    ax = fig.add_subplot(111)
    SBG.stackedBarPlot(
        ax,
        d,
        d_colors,
        xLabels=d_labels,
        yTicks=7,
        widths=d_widths,
    )
    plt.title("Memory Allocation")

    fig.subplots_adjust(bottom=0.4)
    plt.tight_layout()
    plt.show()
    plt.close(fig)
    del fig
def plot_gene_usage(result_dict, pic_type, germline_gene_list, germline_type):
    SBG = StackedBarGrapher()
    gene_number_dict = {}
    total_number = sum(result_dict.values())
    for (key, value) in result_dict.items():
        gene_number_dict.setdefault(key.split('*')[0], []).append(
            float(value) / total_number * 100)
    max_len_value = 0
    for key, value in gene_number_dict.items():
        if max_len_value < len(value):
            max_len_value = len(value)
    gene_names = get_all_gene_name(germline_gene_list)
    data = np.zeros((len(gene_names), max_len_value))
    for index, gene in enumerate(gene_names):
        try:
            gene_number = gene_number_dict[gene]
            print gene_number
            for n_index, number in enumerate(sorted(gene_number,
                                                    reverse=True)):

                data[index][n_index] = number
        except KeyError:
            pass

    d_labels = gene_names
    d_colors = [
        '#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33', '#b30000',
        '#777777', '#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33',
        '#b30000', '#777777', '#2166ac', '#fee090', '#fdbb84', '#fc8d59',
        '#e34a33', '#b30000', '#777777'
    ]
    rows = ['%d%%' % x for x in range(75, 101)]
    #rows.insert(0, "<75%")
    colors = plt.cm.rainbow(np.linspace(0, 1, 10))
    #d_colors = colors
    fig = plt.figure()
    max_y_tick = math.ceil(max(np.sum(data, axis=1)))
    y_ticks_tick, y_ticks_label, tick_log = [0], [0], 0

    while max_y_tick % 2 != 0:
        max_y_tick += 1
    while tick_log <= max_y_tick:
        tick_log += 2
        y_ticks_tick.append(tick_log * 1)
        y_ticks_label.append(tick_log * 1)
    while len(y_ticks_tick) > 14:
        if len(y_ticks_tick) % 2 == 0:
            y_ticks_tick, y_ticks_label = y_ticks_tick[1:][::2], y_ticks_label[
                1:][::2]
            y_ticks_tick.insert(0, 0), y_ticks_label.insert(0, 0)
        else:
            y_ticks_tick, y_ticks_label = y_ticks_tick[::2], y_ticks_label[::2]
    y_ticks = [y_ticks_tick, y_ticks_label]
    ax = fig.add_subplot(111)
    SBG.stackedBarPlot(ax,
                       data,
                       d_colors,
                       xLabels=d_labels,
                       edgeCols=['#000000'] * len(d_colors),
                       yTicks=y_ticks,
                       ylabel='Percent of reads (%)',
                       gap=.2,
                       endGaps=True)

    plt.title("%s %s %s gene usage" % (prj_name, chain_type, germline_type))

    fig.subplots_adjust(bottom=0.4)
    plt.tight_layout()
    for t in ax.xaxis.get_ticklabels():
        t.set_horizontalalignment('center')
        if str(t).split("'")[1] in list(gene_number_dict.keys()):
            t.set_color('blue')

    plt.savefig(
        '/zzh_gpfs02/yanmingchen/HJT-PGM/Naive/%s/%s_%s_%s_gene_usage%s.png' %
        (prj_name, prj_name, chain_type, germline_type, pic_type),
        dpi=300)
    del fig
    plt.close()
(0.6151274326753975, 0.496189476149738, 0.75244053646953548), 
(0.1562085876188265, 0.44786703170340336, 0.9887241674046707), 
(0.4210506028639077, 0.2200011667972023, 0.37841949185273394), 
(0.7728656344058752, 0.17367399916287833, 0.026245548153039366), 
(0.904005064928743, 0.3038725882767085, 0.9399279068775889), 
(0.39140782566655674, 0.761012099948101, 0.7475874114794775), 
(0.0965359591761811, 0.43566457484639054, 0.9375581594394308), 
(0.859944654091911, 0.208070821188862, 0.8893517695418856), 
(0.5934259725250017, 0.6259544064286037, 0.8943937276483482), 
(0.1834548561930609, 0.8625908063396674, 0.2808367027257399), 
(0.9522043509564118, 0.8383482335114356, 0.04624824811210648), 
(0.2509444122476855, 0.723665792376911, 0.1685356796751546)] 


ax1 = plt.subplot2grid((7,7), (0,0), rowspan = 6, colspan=7)
SBG = StackedBarGrapher()
SBG.stackedBarPlot(ax1,
                   post_A,
                   my_color_list,
                   xLabels=d_labels,
                   yTicks=3,
                   widths=d_widths,
                   gap = 0.005,
                   scale=False
)

for i in range(len(post_A)):
    Ai = [x for x in post_A[i] if x>0]
    y = [x/2.0 for x in Ai]
    for j in range(len(Ai)):
        if j>0:
(0.39140782566655674, 0.761012099948101, 0.7475874114794775), 
(0.0965359591761811, 0.43566457484639054, 0.9375581594394308), 
(0.859944654091911, 0.208070821188862, 0.8893517695418856), 
(0.022700048163251885, 0.658455757390323, 0.45194508876647577), 
(0.5934259725250017, 0.6259544064286037, 0.8943937276483482), 
(0.1248759682295419, 0.1286185769691658, 0.6973677590395778), 
(0.1834548561930609, 0.8625908063396674, 0.2808367027257399), 
(0.7072265637451247, 0.795648339142106, 0.4662593453344923), 
(0.9522043509564118, 0.8383482335114356, 0.04624824811210648), 
(0.2509444122476855, 0.723665792376911, 0.1685356796751546)] 

d_widths = [.5]*len(['2','3','4'])


ax1 = plt.subplot2grid((7,7), (0,0), rowspan = 6, colspan=7)
SBG = StackedBarGrapher()
SBG.stackedBarPlot(ax1,
                   A,
                   my_color_list,
                   xLabels=['2','3','4'],
                   yTicks=3,
                   widths=d_widths,
                   gap = 0.005,
                   scale=False
)

for i in range(len(A)):
    Ai = [x for x in A[i] if x>0]
    y = [x/2.0 for x in Ai]
    for j in range(len(Ai)):
        if j>0:
Exemple #12
0
import numpy as np
from matplotlib import pyplot as plt
from stackedBarGraph import StackedBarGrapher

SBG = StackedBarGrapher()

d = np.array([[101., 0., 0., 0., 0., 0., 0.], [92., 3., 0., 4., 5., 6., 0.],
              [56., 7., 8., 9., 23., 4., 5.], [81., 2., 4., 5., 32., 33., 4.],
              [0., 45., 2., 3., 45., 67., 8.], [99., 5., 0., 0., 0., 43.,
                                                56.]])

d_widths = [.5, .5, .5, .5, .5, .5]
d_labels = ["fred", "julie", "sam", "peter", "rob", "baz"]
d_colors = [
    '#2166ac', '#fee090', '#fdbb84', '#fc8d59', '#e34a33', '#b30000', '#777777'
]
fig = plt.figure()

ax = fig.add_subplot(111)
SBG.stackedBarPlot(ax,
                   d,
                   d_colors,
                   xLabels=d_labels,
                   yTicks=7,
                   widths=d_widths,
                   scale=True,
                   gap=1)
plt.title("Scaled bars with set widths")

fig.subplots_adjust(bottom=0.4)
plt.tight_layout()
Exemple #13
0
import pandas as pd
import pylab as pl
from matplotlib import pyplot as plt
import matplotlib as mpl
import seaborn as sns
import numpy as np

import sys
sys.path.append('/na/home/splis/soft/src/dev/craft/gunfolds/tools/')
sys.path.append('/na/homes/splis/soft/src/dev/tools/stackedBarGraph/')
import zickle as zkl
from stackedBarGraph import StackedBarGrapher
SBG = StackedBarGrapher()

fig = pl.figure(figsize=[10,1.3])
#Read in data & create total column

d = zkl.load("hooke_nodes_6_g32g1_.zkl")#hooke_nodes_35_newp_.zkl")
densities = np.sort(d.keys())

def get_counts(d):
    eqc = [len(x['eq']) for x in d]
    keys = np.sort(np.unique(eqc))
    c = {}
    for k in keys:
        c[k] = len(np.where(eqc == k)[0])
    return c

# unique size
usz = set()
dc = {}