Beispiel #1
0
	def draw_swarm(self,columns,path=None):
		values = []
		for column in self[columns]:
			values.append(self[column].values)
		fig = pyplot.figure()
		ax = fig.add_subplot(111)
		beeswarm(values, method="square", labels=columns, ax=ax)
		if path is not None:
			fig.savefig(path)
		else:
			return fig
 def Beeswarm(self):
     colors = ['red','cyan','green','magenta','blue','black']
     # boardcasting color cycle
     colors = self.data.num/len(colors)*colors+colors[0:self.data.num%len(colors)]
     # Data
     datavect = []
     [datavect.append(self.data.series['y'+str(n+1)]) for n in range(self.data.num)]
     print(datavect)
     # names
     datanames = []
     [datanames.append(self.data.names['y'+str(n+1)]) for n in range(self.data.num)]
     self.bs, ax = beeswarm(datavect, method=self.Style, 
                                  labels=datanames, 
                                  col=colors)
     # Format style
     # make sure axis tickmark points out
     ax.tick_params(axis='both',direction='out')
     ax.spines['right'].set_visible(False)
     ax.spines['top'].set_visible(False)
     ax.xaxis.set_ticks_position('bottom')
     ax.yaxis.set_ticks_position('left')
     # save current figure handle
     self.fig = plt.gcf()
     self.axs = ax
     # Do annotation: compare significance
     X = ax.get_xticks()
     Y = [max(x) for x in datavect]
     self.label_diff(0,1,'p=0.0370',X,Y)
     self.label_diff(1,2,'p<0.0001',X,Y)
Beispiel #3
0
	def plot(self):
		bs, ax = beeswarm(
			self.data,
			method="center",
			s=.5,
			__s=10,
			labelrotation="horizontal",
			ax=self.figure.add_subplot(*self.plot_tuple), labels=self.x_labels,
			col="white",
			edgecolors="black"
		)
		if self.plot_tuple[2] != 1:
			ax.set_xlabel("Day & frequency")
		ax.set_ylabel("Colony Size (cells)")
		ax.set_title(self.graph_label)
		ax.set_ylim(bottom=0, top=self.cap)
		ax.set_yticks((0, 10, 20, 30, 40))
		ax.grid(color='black', linestyle='-', linewidth=.5, axis="y")
Beispiel #4
0
def analyze_transcript_position(feature_counts, feature_type, cutoff=0):
    feature_counts_indexed = feature_counts.set_index(feature_counts[("Transcript0","Transcript0")])
    intron_count = 1
    read_dict = {}
    
    #Pick every row with intron number
    while intron_count < 10:
        read_list = []
        for row in feature_counts_indexed.iterrows():
            if  row[1][1] == intron_count:
                if feature_type == "Exon":
                    read_list.append(row[1][2])
                elif feature_type == "Intron":
                    read_list.append(row[1][3])
                else: print "Unknown feature type"
                
                #Append to dictionary - keys are intron number (or exon) and values are list of read counts
        print feature_type+" position "+str(intron_count)
        print "Before cutoff:"+str(len(read_list))+" "+feature_type+"s analyzed"
        read_list = [math.log(x, 10) for x in read_list if x > cutoff]
        read_dict[intron_count] = read_list
        print "After cutoff:"+str(len(read_list))+" "+feature_type+"s analyzed \n"
        intron_count += 1
            
    #Extract keys and lists for use in beeswarm
    values_list = []
    name_list = []
    for intron_number, read_list in read_dict.iteritems():
        name_list.append(intron_number)
        values_list.append(read_list)
        
    color_list = ["red", "orange", "yellow", "green", "teal", "blue", "purple", "indigo", "grey"]
        
    
    #Make beeswarm - x-axis is intron number and y-axis is counts
    bs, ax = beeswarm(values_list, method="swarm", labels=name_list, col=color_list)
    
    #Do I need to normalize to total RNA?

     
                
Beispiel #5
0
def beeswarm_plot(DataFrame_list, SampleTuple_list, DataFrame2=None, base=10, color_list="blue", select_random=False):
    print SampleTuple_list
    print len(SampleTuple_list)
    values_list = []
    name_list = []
    median_list = []
    a=0
    while a < len(DataFrame_list):
        n=0
        while n < len(SampleTuple_list):
            print SampleTuple_list[n]
            values = get_ratios(DataFrame_list[a], SampleTuple_list[n][0], SampleTuple_list[n][1], log=True, base=base)
            median_list.append(np.median(np.array(values)))
            if select_random != False:
                values = random.sample(values, select_random)
            values_list.append(values)
            name_list.append(SampleTuple_list[n][0])
            n += 1
        a += 1
    print median_list
    bs, ax = beeswarm(values_list, method="swarm", labels=name_list, col=color_list)
 def Beeswarm(self):
     colors = ['red', 'cyan', 'green', 'magenta', 'blue', 'black']
     # boardcasting color cycle
     colors = self.data.num / len(
         colors) * colors + colors[0:self.data.num % len(colors)]
     # Data
     datavect = []
     [
         datavect.append(self.data.series['y' + str(n + 1)])
         for n in range(self.data.num)
     ]
     print(datavect)
     # names
     datanames = []
     [
         datanames.append(self.data.names['y' + str(n + 1)])
         for n in range(self.data.num)
     ]
     self.bs, ax = beeswarm(datavect,
                            method=self.Style,
                            labels=datanames,
                            col=colors)
     # Format style
     # make sure axis tickmark points out
     ax.tick_params(axis='both', direction='out')
     ax.spines['right'].set_visible(False)
     ax.spines['top'].set_visible(False)
     ax.xaxis.set_ticks_position('bottom')
     ax.yaxis.set_ticks_position('left')
     # save current figure handle
     self.fig = plt.gcf()
     self.axs = ax
     # Do annotation: compare significance
     X = ax.get_xticks()
     Y = [max(x) for x in datavect]
     self.label_diff(0, 1, 'p=0.0370', X, Y)
     self.label_diff(1, 2, 'p<0.0001', X, Y)
Beispiel #7
0
def analyze_transcript_position(feature_counts, feature_type, cutoff=0):
    feature_counts_indexed = feature_counts.set_index(feature_counts[("Transcript0","Transcript0")])
    intron_count = 1
    read_dict = {}
    
    #Pick every row with intron number
    while intron_count < 10:
        read_list = []
        for row in feature_counts_indexed.iterrows():
            if  row[1][1] == intron_count:
                if feature_type == "Exon":
                    read_list.append(row[1][2])
                elif feature_type == "Intron":
                    read_list.append(row[1][3])
                else: print "Unknown feature type"
                
                #Append to dictionary - keys are intron number (or exon) and values are list of read counts
        print feature_type+" position "+str(intron_count)
        print "Before cutoff:"+str(len(read_list))+" "+feature_type+"s analyzed"
        read_list = [math.log(x, 10) for x in read_list if x > cutoff]
        read_dict[intron_count] = read_list
        print "After cutoff:"+str(len(read_list))+" "+feature_type+"s analyzed \n"
        intron_count += 1
            
    #Extract keys and lists for use in beeswarm
    values_list = []
    name_list = []
    for intron_number, read_list in read_dict.iteritems():
        name_list.append(intron_number)
        values_list.append(read_list)
        
    color_list = ["red", "orange", "yellow", "green", "teal", "blue", "purple", "indigo", "grey"]
        
    
    #Make beeswarm - x-axis is intron number and y-axis is counts
    bs, ax = beeswarm(values_list, method="swarm", labels=name_list, col=color_list)
#!/usr/bin/python
import numpy as np
import matplotlib.pyplot as plt
from beeswarm import *
import sys, getopt
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from collections import Counter

d1 = np.random.uniform(low=-3, high=3, size=100)
d2 = np.random.normal(size=100)
    
bs, ax = beeswarm([d1,d2], method="swarm", labels=["sample 1", "sample 2"], col=["blue","red"]) 

  

ax2 = plt.subplot(412)
ax2.set_title('Medium GC')


ax3 = plt.subplot(413)
ax3.set_title('High GC')

ax4 = plt.subplot(414)
ax4.set_title('High GC')


axes = [ax1, ax2, ax3, ax4]


beeswarm([Low100pg,Low10pg,Low1pg,Low100fg], method="swarm", labels=["100pg","10pg","1pg","100fg"], col=["black","red","green","blue"], ax=ax1)
beeswarm([Medium100pg,Medium10pg,Medium1pg,Medium100fg], method="swarm", labels=["100pg","10pg","1pg","100fg"], col=["black","red","green","blue"], ax=ax2)
beeswarm([High100pg,High10pg,High1pg,High100fg], method="swarm", labels=["100pg","10pg","1pg","100fg"], col=["black","red","green","blue"], ax=ax3)
beeswarm([Fake100pg,Fake10pg,Fake1pg,Fake100fg], method="swarm", labels=["100pg","10pg","1pg","100fg"], col=["black","red","green","blue"], ax=ax4)




# set y-axes
#ax2.set_xlim([0, 100])
ax1.set_ylim([-10, 6])
ax2.set_ylim([-10, 6])
ax3.set_ylim([-10, 6])
#ax4.set_ylim([-1.4, 0.4])

Beispiel #10
0
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(corrmap, square=True)
'''
 audio only, PHQ8_Binary
'''
pd_audio = pd.read_csv('/Users/mac/Downloads/avec2017/audio_fea_train_binary.csv', \
                       index_col=0)
pd_audio.shape

no_important = [867, 852, 795, 307, 196, 195, 206, 362, 419, 1009, 1192]
pd_important = pd_audio.ix[:, no_important]

no = 9
bs, ax = beeswarm([pd_important[pd_important.y==0].iloc[:,no].values, \
                   pd_important[pd_important.y==1].iloc[:,no].values], \
                method = 'swarm', labels = ['0', '1'], col = ['blue', 'red'])
'''
 text only, PHQ8_Score
'''
pd_text = pd.read_csv('/Users/mac/Downloads/avec2017/text_fea_train.csv', \
                       index_col=0)
pd_text.shape

no_important = [0, 1, 2, 3, 4, 5, 6, 7, 8, 10]
pd_important = pd_text.ix[:, no_important]

corrmap = pd_important.corr()

f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(corrmap, square=True)