def test_getEcotypeInfo(self): from common import getEcotypeInfo import StockDB, Stock_250kDB #StockDB has to be setup otherwise, StockDB.Ecotype.table is None in getEcotypeInfo() hostname='papaya.usc.edu' dbname='stock_250k' db_user='******' db_passwd = '' drivername='mysql' schema = None db = Stock_250kDB.Stock_250kDB(drivername=drivername, username=db_user, password=db_passwd, hostname=hostname, database=dbname, schema=schema) #doesn't matter which database to connect as far as StockDB is imported #db = StockDB.StockDB(drivername=drivername, username=db_user, # password=db_passwd, hostname=hostname, database=dbname, schema=schema) db.setup(create_tables=False) import pdb pdb.set_trace() getEcotypeInfo(db)
def test_getEcotypeInfo(self): from common import getEcotypeInfo import StockDB, Stock_250kDB #StockDB has to be setup otherwise, StockDB.Ecotype.table is None in getEcotypeInfo() hostname = 'papaya.usc.edu' dbname = 'stock_250k' db_user = '******' db_passwd = '' drivername = 'mysql' schema = None db = Stock_250kDB.Stock_250kDB(drivername=drivername, username=db_user, password=db_passwd, hostname=hostname, database=dbname, schema=schema) #doesn't matter which database to connect as far as StockDB is imported #db = StockDB.StockDB(drivername=drivername, username=db_user, # password=db_passwd, hostname=hostname, database=dbname, schema=schema) db.setup(create_tables=False) import pdb pdb.set_trace() getEcotypeInfo(db)
def run(self): if self.debug: import pdb pdb.set_trace() db = Stock_250kDB.Stock_250kDB(drivername=self.drivername, username=self.db_user, password=self.db_passwd, hostname=self.hostname, database=self.dbname, schema=self.schema) db.setup(create_tables=False) session = db.session snpData = SNPData(input_fname=self.input_fname, turn_into_integer=1, turn_into_array=1, ignore_2nd_column=1) if self.eigen_vector_fname and self.eigen_value_fname: eigen_value_ls = self.getEigenValueFromFile(self.eigen_value_fname) eigen_value_ls = numpy.array(eigen_value_ls) explained_var = eigen_value_ls/numpy.sum(eigen_value_ls) PC_data = self.getPCFromFile(self.eigen_vector_fname) PC_matrix = PC_data.PC_matrix else: max_no_of_snps = 10000 if len(snpData.col_id_ls)>max_no_of_snps: #2008-12-01 randomly pick max_no_of_snps SNPs picked_col_index_ls = random.sample(range(len(snpData.col_id_ls)), max_no_of_snps) new_col_id_ls = [snpData.col_id_ls[i] for i in picked_col_index_ls] newSnpData = SNPData(row_id_ls=snpData.row_id_ls, col_id_ls=new_col_id_ls, strain_acc_list=snpData.strain_acc_list,\ category_list=snpData.category_list) newSnpData.data_matrix = snpData.data_matrix[:, picked_col_index_ls] snpData = newSnpData snpData, allele_index2allele_ls = snpData.convertSNPAllele2Index() explained_var = None PC_matrix = None header_phen, strain_acc_list_phen, category_list_phen, data_matrix_phen = read_data(self.phenotype_fname, turn_into_integer=0) phenData = SNPData(header=header_phen, strain_acc_list=snpData.strain_acc_list, data_matrix=data_matrix_phen) #row label is that of the SNP matrix, because the phenotype matrix is gonna be re-ordered in that way phenData.data_matrix = Kruskal_Wallis.get_phenotype_matrix_in_data_matrix_order(snpData.row_id_ls, strain_acc_list_phen, phenData.data_matrix) #tricky, using strain_acc_list_phen phenotype_col_index = self.findOutWhichPhenotypeColumn(phenData, Set([self.phenotype_method_id]))[0] ecotype_info = getEcotypeInfo(db, self.country_order_type) #the offset below decides where the label of strains/snps should start in axe_snp_matrix #2008-11-14 only for PlotGroupOfSNPs.py. you can set it to 1 cuz we dont' draw axe_snp_matrix here. snp_id_label_y_offset = 0.95 StrainID2PCAPosInfo = self.getStrainID2PCAPosInfo(snpData, pca_range=[0,1], snp_id_label_y_offset=snp_id_label_y_offset, explained_var=explained_var, T=PC_matrix) axe_y_offset1 = 0.03 axe_height1 = 0.45 #height of axe_chromosome, twice height of axe_map_phenotype_legend axe_y_offset2 = axe_y_offset1+axe_height1 axe_height2 = 0.5 #height of axe_strain_pca, axe_snp_matrix, axe_map axe_y_offset3 = axe_y_offset2+axe_height2 axe_x_offset1 = 0.05 axe_width1 = 0.8 #width of axe_strain_pca axe_x_offset2 = axe_x_offset1 + 0.02 + axe_width1 axe_width2 = 0.05 #width of axe_chromosome, axe_snp_matrix, axe_snp_pca axe_x_offset3 = axe_x_offset2 + axe_width2 axe_width3 = 0.02 #width of axe_phenotype phenotype_method = Stock_250kDB.PhenotypeMethod.get(self.phenotype_method_id) phenotype_cmap = mpl.cm.jet max_phenotype = numpy.nanmax(phenData.data_matrix[:,phenotype_col_index]) #nanmax ignores the nan elements min_phenotype = numpy.nanmin(phenData.data_matrix[:,phenotype_col_index]) #nanmin ignores the nan elements phenotype_gap = max_phenotype - min_phenotype phenotype_jitter = phenotype_gap/10. phenotype_norm = mpl.colors.Normalize(vmin=min_phenotype-phenotype_jitter, vmax=max_phenotype+phenotype_jitter) axe_map_phenotype_legend = pylab.axes([axe_x_offset2, axe_y_offset1, axe_width2, 0.3], frameon=False) cb = mpl.colorbar.ColorbarBase(axe_map_phenotype_legend, cmap=phenotype_cmap, norm=phenotype_norm, orientation='vertical') cb.set_label('Legend Of Phenotype %s %s'%(phenotype_method.id, phenotype_method.short_name)) axe_strain_map = pylab.axes([axe_x_offset1, axe_y_offset2, axe_width1, axe_height2], frameon=False) axe_strain_pca = pylab.axes([axe_x_offset1, axe_y_offset1, axe_width1, axe_height1], frameon=False) axe_strain_map_pca_cover = pylab.axes([axe_x_offset1, axe_y_offset1, axe_width1, axe_height1+axe_height2], frameon=False, \ sharex=axe_strain_pca) #cover both axe_strain_map and axe_strain_pca axe_strain_map_pca_cover.set_yticks([]) axe_strain_pca_xlim = [-0.05,1.05] axe_strain_pca_ylim = [0, 1.05] axe_strain_pca.set_xlim(axe_strain_pca_xlim) axe_strain_pca.set_ylim(axe_strain_pca_ylim) axe_strain_map_pca_cover_ylim = [0, (axe_height1+axe_height2)/axe_height1] #set it accordingly axe_strain_map_pca_cover.set_ylim(axe_strain_map_pca_cover_ylim) axe_strain_pca.grid(True, alpha=0.3) axe_strain_pca.set_xticks([]) axe_strain_pca.set_yticks([]) axe_strain_pca_legend = None #no pca legend self.drawStrainPCA(axe_strain_pca, axe_strain_map, axe_strain_map_pca_cover, axe_strain_pca_legend, StrainID2PCAPosInfo, \ ecotype_info, phenData, \ phenotype_col_index, phenotype_cmap, phenotype_norm, rightmost_x_value=axe_strain_pca_xlim[1],\ strain_color_type=2, pca2map_line_color=None, ecotype_width_on_map=10,\ draw_lines_to_axe_snp_matrix = False, strain_size_on_axe_strain_pca=14, pic_area=self.pic_area,\ map_pca_line_alpha=0.2, map_pca_linewidth=0.2) #customize a couple of things axe_strain_pca.set_xlim(axe_strain_pca_xlim) axe_strain_pca.set_ylim(axe_strain_pca_ylim) axe_strain_map_pca_cover.set_ylim(axe_strain_map_pca_cover_ylim) png_output_fname = '%s.png'%self.output_fname_prefix pylab.savefig(png_output_fname, dpi=400) pylab.savefig('%s.svg'%self.output_fname_prefix) self.plotLatLonPhenVsPC(ecotype_info, StrainID2PCAPosInfo, phenData, phenotype_col_index, phenotype_cmap, phenotype_norm, self.output_fname_prefix, commit=self.commit)
def test_drawMap(self): sys.stderr.write("Drawing graph on a map ...\n") #import pdb #pdb.set_trace() import StockDB, Stock_250kDB #StockDB has to be setup otherwise, StockDB.Ecotype.table is None in getEcotypeInfo() hostname = 'papaya.usc.edu' dbname = 'stock_250k' db_user = '******' db_passwd = '' drivername = 'mysql' schema = None db = Stock_250kDB.Stock_250kDB(drivername=drivername, username=db_user, password=db_passwd, hostname=hostname, database=dbname, schema=schema) #doesn't matter which database to connect as far as StockDB is imported #db = StockDB.StockDB(drivername=drivername, username=db_user, # password=db_passwd, hostname=hostname, database=dbname, schema=schema) db.setup(create_tables=False) from common import getEcotypeInfo ecotype_info = getEcotypeInfo(db) from matplotlib.toolkits.basemap import Basemap #from mpl_toolkits.basemap import Basemap import pylab from matplotlib import rcParams rcParams['font.size'] = 6 rcParams['legend.fontsize'] = 6 #rcParams['text.fontsize'] = 6 #deprecated. use font.size instead rcParams['axes.labelsize'] = 4 rcParams['axes.titlesize'] = 8 rcParams['xtick.labelsize'] = 4 rcParams['ytick.labelsize'] = 4 pylab.clf() #fig = pylab.figure() #fig.add_axes([0.05,0.05,0.9,0.9]) #[left, bottom, width, height] axe_map = pylab.axes([0.5, 0.02, 0.4, 0.8], frameon=False) axe_map.set_title("Global Arabidopsis Ecotype Distribution") axe_map.set_xlabel('ecotype is drawn as circles.') axe_map.set_ylabel('strains') pic_area = [-140, -40, 140, 70] m = Basemap(llcrnrlon=pic_area[0],llcrnrlat=pic_area[1],urcrnrlon=pic_area[2],urcrnrlat=pic_area[3],\ resolution='l',projection='mill', ax=axe_map) """ llcrnrx = -self.rmajor llcrnry = -self.rmajor urcrnrx = -llcrnrx urcrnry = -llcrnry """ #m.drawcoastlines() #m.bluemarble() m.drawparallels(pylab.arange(-90, 90, 30), labels=[1, 1, 0, 1], size=4, linewidth=0.1) m.drawmeridians(pylab.arange(-180, 180, 30), labels=[1, 1, 0, 1], size=4, linewidth=0.1) m.fillcontinents() m.drawcountries(linewidth=0.1) #m.drawstates() #m.drawlsmask((0,255,0,255), (0,0,255,255), lakes=True) #m.drawlsmask('coral','aqua',lakes=True) print "xlim:", axe_map.get_xlim() print "ylim:", axe_map.get_ylim() xlim = axe_map.get_xlim() ylim = axe_map.get_ylim() """ for strain_id in StrainID2PCAPosInfo.strain_id_ls: img_y_pos = StrainID2PCAPosInfo.strain_id2img_y_pos[strain_id] phenotype_row_index = phenData.row_id2row_index[strain_id] phenotype = phenData.data_matrix[phenotype_row_index][phenotype_col_index] ecotype_id = int(strain_id) ecotype_obj = ecotype_info.ecotype_id2ecotype_obj.get(ecotype_id) if ecotype_obj: lat, lon = ecotype_obj.latitude, ecotype_obj.longitude else: sys.stderr.write("Warning: Ecotype %s not in ecotype_info (fetched from stock db).\n"%ecotype_id) continue if lat and lon: x, y = m(lon, lat) color = cmap(norm(phenotype)) ax.plot([0, x], [img_y_pos, y], linestyle='--', alpha=0.2, linewidth=0.2) ax.scatter([x],[y], s=10, linewidth=0, facecolor=color) #, zorder=10) """ #pylab.title("Global Arabidopsis Ecotype Distribution") output_fname_prefix = '/tmp/map' print "ylim:", axe_map.get_ylim() axe_map.set_xlim(xlim) axe_map.set_ylim(ylim) axe_chromosome = pylab.axes([0.05, 0.02, 0.8, 0.8], frameon=False) axe_chromosome.set_title("chromosome") #fix the two transformations before doing cross-axe drawings axe_map.transData.freeze() # eval the lazy objects axe_map.transAxes.freeze() axe_chromosome.transData.freeze() # eval the lazy objects axe_chromosome.transAxes.freeze() no_of_ecotypes = 200 ecotype_id_ls = ecotype_info.ecotype_id2ecotype_obj.keys( )[:no_of_ecotypes] no_of_ecotypes_drawn = 0 for i in range(no_of_ecotypes): ecotype_id = ecotype_id_ls[i] y_pos = i / float(no_of_ecotypes) #y_pos = i/float(no_of_ecotypes)*ylim[1] ecotype_obj = ecotype_info.ecotype_id2ecotype_obj.get(ecotype_id) if ecotype_obj: lat, lon = ecotype_obj.latitude, ecotype_obj.longitude else: sys.stderr.write( "Warning: Ecotype %s not in ecotype_info (fetched from stock db).\n" % ecotype_id) continue if lat and lon: x, y = m(lon, lat) #axe_map.plot([0, x], [y_pos, y], linestyle='--', alpha=0.2, linewidth=0.2) axe_map.set_xlim(xlim) axe_map.set_ylim(ylim) axe_map.scatter([x], [y], s=5, linewidth=0, facecolor='r', alpha=0.2, zorder=10) canvas_x, canvas_y = axe_map.transData.xy_tup((x, y)) axe_chromosome_xy = axe_chromosome.transData.inverse_xy_tup( (canvas_x, canvas_y)) axe_chromosome.plot([0, axe_chromosome_xy[0]], [y_pos, axe_chromosome_xy[1]], linestyle='--', alpha=0.2, linewidth=0.2) no_of_ecotypes_drawn += 1 #release two transformations axe_map.transData.thaw() # eval the lazy objects axe_map.transAxes.thaw() axe_chromosome.transData.thaw() # eval the lazy objects axe_chromosome.transAxes.thaw() #set to the same x/y_lim before cross-axe drawing axe_map.set_xlim(xlim) axe_map.set_ylim(ylim) axe_chromosome.set_xlim([0, 1]) axe_chromosome.set_ylim([0, 1]) if output_fname_prefix: pylab.savefig('%s.png' % output_fname_prefix, dpi=600) pylab.savefig('%s.svg' % output_fname_prefix) sys.stderr.write("%s ecotypes drawn. Done.\n" % (no_of_ecotypes_drawn))
def test_drawMap(self): sys.stderr.write("Drawing graph on a map ...\n") #import pdb #pdb.set_trace() import StockDB, Stock_250kDB #StockDB has to be setup otherwise, StockDB.Ecotype.table is None in getEcotypeInfo() hostname='papaya.usc.edu' dbname='stock_250k' db_user='******' db_passwd = '' drivername='mysql' schema = None db = Stock_250kDB.Stock_250kDB(drivername=drivername, username=db_user, password=db_passwd, hostname=hostname, database=dbname, schema=schema) #doesn't matter which database to connect as far as StockDB is imported #db = StockDB.StockDB(drivername=drivername, username=db_user, # password=db_passwd, hostname=hostname, database=dbname, schema=schema) db.setup(create_tables=False) from common import getEcotypeInfo ecotype_info = getEcotypeInfo(db) from matplotlib.toolkits.basemap import Basemap #from mpl_toolkits.basemap import Basemap import pylab from matplotlib import rcParams rcParams['font.size'] = 6 rcParams['legend.fontsize'] = 6 #rcParams['text.fontsize'] = 6 #deprecated. use font.size instead rcParams['axes.labelsize'] = 4 rcParams['axes.titlesize'] = 8 rcParams['xtick.labelsize'] = 4 rcParams['ytick.labelsize'] = 4 pylab.clf() #fig = pylab.figure() #fig.add_axes([0.05,0.05,0.9,0.9]) #[left, bottom, width, height] axe_map = pylab.axes([0.5, 0.02, 0.4, 0.8], frameon=False) axe_map.set_title("Global Arabidopsis Ecotype Distribution") axe_map.set_xlabel('ecotype is drawn as circles.') axe_map.set_ylabel('strains') pic_area=[-140,-40,140,70] m = Basemap(llcrnrlon=pic_area[0],llcrnrlat=pic_area[1],urcrnrlon=pic_area[2],urcrnrlat=pic_area[3],\ resolution='l',projection='mill', ax=axe_map) """ llcrnrx = -self.rmajor llcrnry = -self.rmajor urcrnrx = -llcrnrx urcrnry = -llcrnry """ #m.drawcoastlines() #m.bluemarble() m.drawparallels(pylab.arange(-90,90,30), labels=[1,1,0,1], size=4, linewidth=0.1) m.drawmeridians(pylab.arange(-180,180,30), labels=[1,1,0,1], size=4, linewidth=0.1) m.fillcontinents() m.drawcountries(linewidth=0.1) #m.drawstates() #m.drawlsmask((0,255,0,255), (0,0,255,255), lakes=True) #m.drawlsmask('coral','aqua',lakes=True) print "xlim:", axe_map.get_xlim() print "ylim:", axe_map.get_ylim() xlim = axe_map.get_xlim() ylim = axe_map.get_ylim() """ for strain_id in StrainID2PCAPosInfo.strain_id_ls: img_y_pos = StrainID2PCAPosInfo.strain_id2img_y_pos[strain_id] phenotype_row_index = phenData.row_id2row_index[strain_id] phenotype = phenData.data_matrix[phenotype_row_index][phenotype_col_index] ecotype_id = int(strain_id) ecotype_obj = ecotype_info.ecotype_id2ecotype_obj.get(ecotype_id) if ecotype_obj: lat, lon = ecotype_obj.latitude, ecotype_obj.longitude else: sys.stderr.write("Warning: Ecotype %s not in ecotype_info (fetched from stock db).\n"%ecotype_id) continue if lat and lon: x, y = m(lon, lat) color = cmap(norm(phenotype)) ax.plot([0, x], [img_y_pos, y], linestyle='--', alpha=0.2, linewidth=0.2) ax.scatter([x],[y], s=10, linewidth=0, facecolor=color) #, zorder=10) """ #pylab.title("Global Arabidopsis Ecotype Distribution") output_fname_prefix = '/tmp/map' print "ylim:", axe_map.get_ylim() axe_map.set_xlim(xlim) axe_map.set_ylim(ylim) axe_chromosome = pylab.axes([0.05, 0.02, 0.8, 0.8], frameon=False) axe_chromosome.set_title("chromosome") #fix the two transformations before doing cross-axe drawings axe_map.transData.freeze() # eval the lazy objects axe_map.transAxes.freeze() axe_chromosome.transData.freeze() # eval the lazy objects axe_chromosome.transAxes.freeze() no_of_ecotypes = 200 ecotype_id_ls = ecotype_info.ecotype_id2ecotype_obj.keys()[:no_of_ecotypes] no_of_ecotypes_drawn = 0 for i in range(no_of_ecotypes): ecotype_id = ecotype_id_ls[i] y_pos = i/float(no_of_ecotypes) #y_pos = i/float(no_of_ecotypes)*ylim[1] ecotype_obj = ecotype_info.ecotype_id2ecotype_obj.get(ecotype_id) if ecotype_obj: lat, lon = ecotype_obj.latitude, ecotype_obj.longitude else: sys.stderr.write("Warning: Ecotype %s not in ecotype_info (fetched from stock db).\n"%ecotype_id) continue if lat and lon: x, y = m(lon, lat) #axe_map.plot([0, x], [y_pos, y], linestyle='--', alpha=0.2, linewidth=0.2) axe_map.set_xlim(xlim) axe_map.set_ylim(ylim) axe_map.scatter([x],[y], s=5, linewidth=0, facecolor='r', alpha=0.2, zorder=10) canvas_x, canvas_y = axe_map.transData.xy_tup((x,y)) axe_chromosome_xy = axe_chromosome.transData.inverse_xy_tup((canvas_x,canvas_y)) axe_chromosome.plot([0,axe_chromosome_xy[0]], [y_pos, axe_chromosome_xy[1]], linestyle='--', alpha=0.2, linewidth=0.2) no_of_ecotypes_drawn += 1 #release two transformations axe_map.transData.thaw() # eval the lazy objects axe_map.transAxes.thaw() axe_chromosome.transData.thaw() # eval the lazy objects axe_chromosome.transAxes.thaw() #set to the same x/y_lim before cross-axe drawing axe_map.set_xlim(xlim) axe_map.set_ylim(ylim) axe_chromosome.set_xlim([0,1]) axe_chromosome.set_ylim([0,1]) if output_fname_prefix: pylab.savefig('%s.png'%output_fname_prefix, dpi=600) pylab.savefig('%s.svg'%output_fname_prefix) sys.stderr.write("%s ecotypes drawn. Done.\n"%(no_of_ecotypes_drawn))