def draw_swarm(self,columns,path=None): values = [] for column in self[columns]: values.append(self[column].values) fig = pyplot.figure() ax = fig.add_subplot(111) beeswarm(values, method="square", labels=columns, ax=ax) if path is not None: fig.savefig(path) else: return fig
def Beeswarm(self): colors = ['red','cyan','green','magenta','blue','black'] # boardcasting color cycle colors = self.data.num/len(colors)*colors+colors[0:self.data.num%len(colors)] # Data datavect = [] [datavect.append(self.data.series['y'+str(n+1)]) for n in range(self.data.num)] print(datavect) # names datanames = [] [datanames.append(self.data.names['y'+str(n+1)]) for n in range(self.data.num)] self.bs, ax = beeswarm(datavect, method=self.Style, labels=datanames, col=colors) # Format style # make sure axis tickmark points out ax.tick_params(axis='both',direction='out') ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') # save current figure handle self.fig = plt.gcf() self.axs = ax # Do annotation: compare significance X = ax.get_xticks() Y = [max(x) for x in datavect] self.label_diff(0,1,'p=0.0370',X,Y) self.label_diff(1,2,'p<0.0001',X,Y)
def plot(self): bs, ax = beeswarm( self.data, method="center", s=.5, __s=10, labelrotation="horizontal", ax=self.figure.add_subplot(*self.plot_tuple), labels=self.x_labels, col="white", edgecolors="black" ) if self.plot_tuple[2] != 1: ax.set_xlabel("Day & frequency") ax.set_ylabel("Colony Size (cells)") ax.set_title(self.graph_label) ax.set_ylim(bottom=0, top=self.cap) ax.set_yticks((0, 10, 20, 30, 40)) ax.grid(color='black', linestyle='-', linewidth=.5, axis="y")
def analyze_transcript_position(feature_counts, feature_type, cutoff=0): feature_counts_indexed = feature_counts.set_index(feature_counts[("Transcript0","Transcript0")]) intron_count = 1 read_dict = {} #Pick every row with intron number while intron_count < 10: read_list = [] for row in feature_counts_indexed.iterrows(): if row[1][1] == intron_count: if feature_type == "Exon": read_list.append(row[1][2]) elif feature_type == "Intron": read_list.append(row[1][3]) else: print "Unknown feature type" #Append to dictionary - keys are intron number (or exon) and values are list of read counts print feature_type+" position "+str(intron_count) print "Before cutoff:"+str(len(read_list))+" "+feature_type+"s analyzed" read_list = [math.log(x, 10) for x in read_list if x > cutoff] read_dict[intron_count] = read_list print "After cutoff:"+str(len(read_list))+" "+feature_type+"s analyzed \n" intron_count += 1 #Extract keys and lists for use in beeswarm values_list = [] name_list = [] for intron_number, read_list in read_dict.iteritems(): name_list.append(intron_number) values_list.append(read_list) color_list = ["red", "orange", "yellow", "green", "teal", "blue", "purple", "indigo", "grey"] #Make beeswarm - x-axis is intron number and y-axis is counts bs, ax = beeswarm(values_list, method="swarm", labels=name_list, col=color_list) #Do I need to normalize to total RNA?
def beeswarm_plot(DataFrame_list, SampleTuple_list, DataFrame2=None, base=10, color_list="blue", select_random=False): print SampleTuple_list print len(SampleTuple_list) values_list = [] name_list = [] median_list = [] a=0 while a < len(DataFrame_list): n=0 while n < len(SampleTuple_list): print SampleTuple_list[n] values = get_ratios(DataFrame_list[a], SampleTuple_list[n][0], SampleTuple_list[n][1], log=True, base=base) median_list.append(np.median(np.array(values))) if select_random != False: values = random.sample(values, select_random) values_list.append(values) name_list.append(SampleTuple_list[n][0]) n += 1 a += 1 print median_list bs, ax = beeswarm(values_list, method="swarm", labels=name_list, col=color_list)
def Beeswarm(self): colors = ['red', 'cyan', 'green', 'magenta', 'blue', 'black'] # boardcasting color cycle colors = self.data.num / len( colors) * colors + colors[0:self.data.num % len(colors)] # Data datavect = [] [ datavect.append(self.data.series['y' + str(n + 1)]) for n in range(self.data.num) ] print(datavect) # names datanames = [] [ datanames.append(self.data.names['y' + str(n + 1)]) for n in range(self.data.num) ] self.bs, ax = beeswarm(datavect, method=self.Style, labels=datanames, col=colors) # Format style # make sure axis tickmark points out ax.tick_params(axis='both', direction='out') ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.xaxis.set_ticks_position('bottom') ax.yaxis.set_ticks_position('left') # save current figure handle self.fig = plt.gcf() self.axs = ax # Do annotation: compare significance X = ax.get_xticks() Y = [max(x) for x in datavect] self.label_diff(0, 1, 'p=0.0370', X, Y) self.label_diff(1, 2, 'p<0.0001', X, Y)
def analyze_transcript_position(feature_counts, feature_type, cutoff=0): feature_counts_indexed = feature_counts.set_index(feature_counts[("Transcript0","Transcript0")]) intron_count = 1 read_dict = {} #Pick every row with intron number while intron_count < 10: read_list = [] for row in feature_counts_indexed.iterrows(): if row[1][1] == intron_count: if feature_type == "Exon": read_list.append(row[1][2]) elif feature_type == "Intron": read_list.append(row[1][3]) else: print "Unknown feature type" #Append to dictionary - keys are intron number (or exon) and values are list of read counts print feature_type+" position "+str(intron_count) print "Before cutoff:"+str(len(read_list))+" "+feature_type+"s analyzed" read_list = [math.log(x, 10) for x in read_list if x > cutoff] read_dict[intron_count] = read_list print "After cutoff:"+str(len(read_list))+" "+feature_type+"s analyzed \n" intron_count += 1 #Extract keys and lists for use in beeswarm values_list = [] name_list = [] for intron_number, read_list in read_dict.iteritems(): name_list.append(intron_number) values_list.append(read_list) color_list = ["red", "orange", "yellow", "green", "teal", "blue", "purple", "indigo", "grey"] #Make beeswarm - x-axis is intron number and y-axis is counts bs, ax = beeswarm(values_list, method="swarm", labels=name_list, col=color_list)
#!/usr/bin/python import numpy as np import matplotlib.pyplot as plt from beeswarm import * import sys, getopt from Bio import SeqIO from Bio.Seq import Seq from Bio.Alphabet import IUPAC from collections import Counter d1 = np.random.uniform(low=-3, high=3, size=100) d2 = np.random.normal(size=100) bs, ax = beeswarm([d1,d2], method="swarm", labels=["sample 1", "sample 2"], col=["blue","red"])
ax2 = plt.subplot(412) ax2.set_title('Medium GC') ax3 = plt.subplot(413) ax3.set_title('High GC') ax4 = plt.subplot(414) ax4.set_title('High GC') axes = [ax1, ax2, ax3, ax4] beeswarm([Low100pg,Low10pg,Low1pg,Low100fg], method="swarm", labels=["100pg","10pg","1pg","100fg"], col=["black","red","green","blue"], ax=ax1) beeswarm([Medium100pg,Medium10pg,Medium1pg,Medium100fg], method="swarm", labels=["100pg","10pg","1pg","100fg"], col=["black","red","green","blue"], ax=ax2) beeswarm([High100pg,High10pg,High1pg,High100fg], method="swarm", labels=["100pg","10pg","1pg","100fg"], col=["black","red","green","blue"], ax=ax3) beeswarm([Fake100pg,Fake10pg,Fake1pg,Fake100fg], method="swarm", labels=["100pg","10pg","1pg","100fg"], col=["black","red","green","blue"], ax=ax4) # set y-axes #ax2.set_xlim([0, 100]) ax1.set_ylim([-10, 6]) ax2.set_ylim([-10, 6]) ax3.set_ylim([-10, 6]) #ax4.set_ylim([-1.4, 0.4])
f, ax = plt.subplots(figsize=(12, 9)) sns.heatmap(corrmap, square=True) ''' audio only, PHQ8_Binary ''' pd_audio = pd.read_csv('/Users/mac/Downloads/avec2017/audio_fea_train_binary.csv', \ index_col=0) pd_audio.shape no_important = [867, 852, 795, 307, 196, 195, 206, 362, 419, 1009, 1192] pd_important = pd_audio.ix[:, no_important] no = 9 bs, ax = beeswarm([pd_important[pd_important.y==0].iloc[:,no].values, \ pd_important[pd_important.y==1].iloc[:,no].values], \ method = 'swarm', labels = ['0', '1'], col = ['blue', 'red']) ''' text only, PHQ8_Score ''' pd_text = pd.read_csv('/Users/mac/Downloads/avec2017/text_fea_train.csv', \ index_col=0) pd_text.shape no_important = [0, 1, 2, 3, 4, 5, 6, 7, 8, 10] pd_important = pd_text.ix[:, no_important] corrmap = pd_important.corr() f, ax = plt.subplots(figsize=(12, 9)) sns.heatmap(corrmap, square=True)