Esempio n. 1
0
def load_genusData(gfilename):
    #read csv file
    csv_list, nrows, ncols = csvReader.csv_reader(gfilename)

    x_data = csv_list[0][2:]

    genus_dic = {}

    table_sum = [0 for i in range(len(x_data))]
    bottom_data = {}
    bottom_data[0] = [0 for i in range(len(x_data))]

    for row_num in range(1, nrows):
        current_genus = csv_list[row_num][0]
        current_type = csv_list[row_num][1]

        genus_dic[row_num] = {}
        genus_dic[row_num]['type'] = current_type
        genus_dic[row_num]['rate'] = csv_list[row_num][2:]
        genus_dic[row_num]['name'] = current_genus

        table_sum = [i + j for i, j in zip(table_sum, csv_list[row_num][2:])]
        bottom_data[row_num] = table_sum

    #conver to percent
    for row_num in range(1, nrows):
        genus_dic[row_num]['rate'] = [
            i / j * 100 for i, j in zip(genus_dic[row_num]['rate'], table_sum)
        ]
        bottom_data[row_num] = [
            i / j * 100 for i, j in zip(bottom_data[row_num], table_sum)
        ]

    return nrows, x_data, genus_dic, bottom_data
Esempio n. 2
0
def load_bacterium(genus_filename, group_filename):
    #read csv file
    csv_list, nrows, ncols = csvReader.csv_reader(genus_filename)

    #read group file
    group, group_names = load_groupData(group_filename)

    bacterium = {}
    sample_data = []
    group_id = []
    sample_names = []

    for col_num in range(2, ncols):
        sample_name = csv_list[0][col_num]
        group_id.append(group[sample_name])
        sample_names.append(sample_name)

        data = []

        for row_num in range(1, nrows):
            data.append(csv_list[row_num][col_num])

        sample_data.append(data)
        bacterium['data'] = np.asarray(sample_data)
        bacterium['sample'] = np.asarray(sample_names)
        bacterium['group_id'] = np.asarray(group_id)
        bacterium['group_names'] = np.asarray(group_names)

    return bacterium
Esempio n. 3
0
def get_genusSize(gfilename):

    csv_list, nrows, ncols = csvReader.csv_reader(gfilename)

    genus_size = {}

    for row_num in range(1, nrows):
        current_genus = csv_list[row_num][0]
        genus_size[current_genus] = csv_list[row_num][2:]

    return genus_size
Esempio n. 4
0
def load_groupData(group_filename):
    #read group file
    csv_list, nrows, ncols = csvReader.csv_reader(group_filename)

    group = {}
    group_names = []

    for n in range(nrows):
        if not csv_list[n][1].lower() in group_names:
            group_names.append(csv_list[n][1].lower())

        group[csv_list[n][0]] = group_names.index(csv_list[n][1].lower())

    return group, group_names
Esempio n. 5
0
def kinship(dir):

    kinList = [[], []]

    for s in range(dir):

        csvFile = getCSV(rootdir, s)

        famList = csv_reader(csvFile)
        #in famList, fathers are indexed in 0 and sons in 1

        #print famList

        for rows in famList:

            imFather = folderFind(rootdir, s, rows[0])
            imSon = folderFind(rootdir, s, rows[1])

            if len(imFather) > 0 and len(imSon) > 0:

                if len(imFather) > len(imSon):

                    for i in range(len(imFather) - len(imSon)):

                        imSon.append(imSon[0])

                elif len(imSon) > len(imFather):

                    for i in range(len(imSon) - len(imFather)):

                        imFather.append(imFather[0])

                kinList[0].append(imFather)
                kinList[1].append(imSon)

    #print len(kinList[1])
    #print len(kinList[0])
    dads = [val for sublist in kinList[0] for val in sublist]  #Flatten lists
    sons = [val for sublist in kinList[1] for val in sublist]  #Flatten lists
    print len(dads)
    #print len(sons)
    kinList = [[], []]  #Clear list to copy unidimentional list

    for n in range(len(dads)):

        kinList[0].append(dads[n])
        kinList[1].append(sons[n])

    return kinList
Esempio n. 6
0
def load_speciesData(sfilename, gfilename):
    #read csv file
    csv_list, nrows, ncols = csvReader.csv_reader(sfilename)
    genus_size = get_genusSize(gfilename)

    x_data = csv_list[0][2:]

    species_dic = {}
    bottom_data = {}

    for row_num in range(1, nrows):
        current_genus = csv_list[row_num][0]
        current_species = csv_list[row_num][1]

        if (species_dic.get(current_genus) == None):
            n = 1
            species_dic[current_genus] = {}
            bottom_data[current_genus] = {}
            bottom_data[current_genus][0] = [0 for i in range(len(x_data))]
            n = n - 1

        n = n + 1

        species_dic[current_genus][n] = {}

        if genus_size.get(current_genus) != None:
            #size of species_bar is absolute value! not relative!
            species_dic[current_genus][n]['rate'] = [
                i * j for i, j in zip(csv_list[row_num][2:],
                                      genus_size[current_genus])
            ]
            species_dic[current_genus][n]['name'] = current_species

            bottom_data[current_genus][n] = [
                i + j for i, j in zip(bottom_data[current_genus][n - 1],
                                      species_dic[current_genus][n]['rate'])
            ]

    return nrows, x_data, species_dic, bottom_data
Esempio n. 7
0
def run(filename, group_filename, checked) :
	#read csv file
	csv_list, nrows, ncols = csvReader.csv_reader(filename)

	x_data = csv_list[0][2:]

	#load group file
	group, group_names = load_groupData(group_filename)

	global group1 
	global group2

	group1 = []
	group2 = [] 

	#index of group in x_data
	group1_ind = [] 
	group2_ind = []
	
	#separate group1 and group2 
	for n in range(len(group)) :
		group1_id = group_names.index(checked[0])
		group2_id = group_names.index(checked[1])

		if group.values()[n] == group1_id :
			group1.append(group.keys()[n])
			group1_ind.append(x_data.index(group.keys()[n]))
		elif group.values()[n] == group2_id :
			group2.append(group.keys()[n])
			group2_ind.append(x_data.index(group.keys()[n]))

	filtered = {}
	filtered['data'] = []
	filtered['genus'] = []
	filtered['pvalue'] = []


	#extract data which have low pvalue
	for row_num in range(1,nrows):

		current_genus = csv_list[row_num][0]
		current_type = csv_list[row_num][1]
		
		test_group1 = []
		test_group2 = []
		
		for i in group1_ind :
			test_group1.append(csv_list[row_num][2+i])
		for i in group2_ind :
			test_group2.append(csv_list[row_num][2+i])

		# calculate pvalue
		pvalue = sci.ttest_ind(test_group1, test_group2, equal_var=True)[1]

		if pvalue < 0.05 :
			filtered['data'].append(test_group1+test_group2)
			filtered['genus'].append(current_genus+"("+str(current_type)[0]+")")
			filtered['pvalue'].append(round(pvalue,4))

	d3 = bd.draw_graph(group1, group2, checked, 
		x_label = [i for i in range(len(group1+group2))], 
		y_label=[i for i in range(len(filtered['data']))], 
		pvalue_label = filtered['pvalue'],
		fit_data = filtered['data'],
		genus_data = filtered['genus'],
		title = "Original dataset",
		x = [i for i in range(len(group1+group2))])

	d3.draw()

	if len(filtered['data']) > 0 :
		biclustering(filtered, checked)
	else :
		print "no data"

	plt.show()
Esempio n. 8
0
def run(genus_filename, group_filename):
    #load genus file
    csv_list, nrows, ncols = csvReader.csv_reader(genus_filename)

    #load bacterium data
    bacterium = load_bacterium(genus_filename, group_filename)
    X = bacterium['data']
    y = bacterium['group_id']
    group_names = bacterium['group_names']

    n_components = 2

    pca = PCA(n_components=n_components)
    X_pca = pca.fit_transform(X)

    colors = ['navy', 'turquoise', 'darkorange', 'green', 'yellow']

    f, ax1 = plt.subplots(1, figsize=(6, 6))
    plt.subplots_adjust(left=None,
                        bottom=0.2,
                        right=None,
                        top=None,
                        wspace=None,
                        hspace=None)

    #draw scatters
    for X_transformed, title in [(X_pca, "PCA")]:
        for color, i, group_name in zip(colors, range(len(group_names)),
                                        group_names):
            plt.scatter(X_transformed[y == i, 0],
                        X_transformed[y == i, 1],
                        color=color,
                        lw=2,
                        label=group_name)
            plt.title(title + " of bacterium dataset")
        plt.legend(loc="best", shadow=False, scatterpoints=1)
        plt.axis([-1, 1, -0.5, 0.5])

    global ann
    ann = []

    #set annotation
    def annotation():
        for label, x, y in zip(bacterium['sample'], X_pca[:, 0], X_pca[:, 1]):
            ann.append(
                ax1.annotate(label,
                             xy=(x, y),
                             xytext=(-2, 2),
                             textcoords='offset points',
                             ha='right',
                             va='bottom',
                             fontsize=6,
                             color='gray'))

    global txt, arr
    txt = []
    arr = []

    #loadings
    def loading(load):
        for i in range(len(pca.components_[0])):
            x, y = pca.components_[0][i], pca.components_[1][i]
            if x > load or y > load:
                arr.append(
                    ax1.arrow(0,
                              0,
                              x * 0.5,
                              y * 0.5,
                              color='coral',
                              width=0.001,
                              head_width=0.01))
                txt.append(
                    ax1.text(x * 0.25,
                             y * 0.25,
                             csv_list[i + 1][0],
                             color='coral',
                             ha='center',
                             va='center',
                             fontsize=7))
        f.canvas.draw()

    axloading = plt.axes([0.20, 0.10, 0.62, 0.03],
                         axisbg='lightgoldenrodyellow')
    sloading = Slider(axloading,
                      'loading',
                      0.05,
                      0.5,
                      valinit=0.3,
                      color='coral')

    def update(val):
        for i in range(len(txt)):
            txt[i].remove()
            arr[i].remove()
        arr[:] = []
        txt[:] = []

        load = sloading.val
        loading(load)
        f.canvas.draw_idle()

    sloading.on_changed(update)
    showax = plt.axes([0.7, 0.025, 0.1, 0.04])
    hideax = plt.axes([0.8, 0.025, 0.1, 0.04])

    bshow = Button(showax,
                   'show',
                   color='lightgoldenrodyellow',
                   hovercolor='0.975')
    bhide = Button(hideax,
                   'hide',
                   color='lightgoldenrodyellow',
                   hovercolor='0.975')

    def show(event):
        annotation()

    def hide(event):
        for i in range(len(txt)):
            txt[i].remove()
            arr[i].remove()
        arr[:] = []
        txt[:] = []

        for i in range(len(ann)):
            ann[i].remove()
        ann[:] = []

        f.canvas.draw()

    bshow.on_clicked(show)
    bhide.on_clicked(hide)

    plt.show()