def run(self): lens1 = base.newlens(self.lens1, self.position) lens2 = base.newlens(self.lens2, self.position) gff = base.newgff(self.gff) bkinfo = pd.read_csv(self.blockinfo) bkinfo['chr1'] = bkinfo['chr1'].astype(str) bkinfo['chr2'] = bkinfo['chr2'].astype(str) bkinfo = bkinfo[(bkinfo['length'] > int(self.block_length)) & (bkinfo['chr1'].isin(lens1.index)) & (bkinfo['chr2'].isin(lens2.index)) & (bkinfo['pvalue'] < float(self.pvalue))] cor = [[ k, i, 0, lens1[i], j, 0, lens2[j], float(self.h**o[0]), float(self.h**o[1]) ] for k in range(1, int(self.multiple) + 1) for i in lens1.index for j in lens2.index] cor = pd.DataFrame(cor, columns=[ 'sub', 'chr1', 'start1', 'end1', 'chr2', 'start2', 'end2', 'homo1', 'homo2' ]) cor['chr1'] = cor['chr1'].astype(str) cor['chr2'] = cor['chr2'].astype(str) if self.tandem == True or self.tandem == 'true' or self.tandem == 1: bkinfo = self.remove_tandem(bkinfo) for k in cor['sub'].drop_duplicates().values: gff['sub' + str(k)] = '' arr = self.colinearity_region(gff, cor, bkinfo)
def run(self): lens1 = base.newlens(self.lens1, self.position) lens2 = base.newlens(self.lens2, self.position) if re.search('\d', self.figsize): self.figsize = [float(k) for k in self.figsize.split(',')] else: self.figsize = np.array( [1, float(lens1.sum())/float(lens2.sum())])*10 plt.rcParams['ytick.major.pad'] = 0 fig, ax = plt.subplots(figsize=self.figsize) ax.xaxis.set_ticks_position('top') step1 = 1 / float(lens1.sum()) step2 = 1 / float(lens2.sum()) base.dotplot_frame(fig, ax, lens1, lens2, step1, step2, self.genome1_name, self.genome2_name) gff1 = base.newgff(self.gff1) gff2 = base.newgff(self.gff2) gff1 = base.gene_location(gff1, lens1, step1, self.position) gff2 = base.gene_location(gff2, lens2, step2, self.position) block_list = pd.read_csv(self.block_list, header=None) bkinfo = pd.read_csv(self.blockinfo,index_col=0) bkinfo['chr1'] = bkinfo['chr1'].astype(str) bkinfo['chr2'] = bkinfo['chr2'].astype(str) align = self.alignment(gff1, gff2, block_list, bkinfo) alignment = align[gff1.columns[-int(len(block_list[0].drop_duplicates())):]] alignment.to_csv(self.savefile, sep='\t', header=None) df = self.pair_positon( alignment, gff1['loc'], gff2['loc'], self.colors) plt.scatter(df['loc2'], df['loc1'], s=float(self.markersize), c=df['color'], alpha=0.5, edgecolors=None, linewidths=0, marker='o') plt.subplots_adjust(left=0.07, right=0.97, top=0.93, bottom=0.03) plt.savefig(self.savefig, dpi=500) sys.exit(0)
def run(self): lens1 = base.newlens(self.lens1, 'order') lens2 = base.newlens(self.lens2, 'order') lens1 = lens1[lens1 > 4] lens2 = lens2[lens2 > 4] gff1 = base.newgff(self.gff1) gff2 = base.newgff(self.gff2) gff1 = gff1[gff1['chr'].isin(lens1.index)] gff2 = gff2[gff2['chr'].isin(lens2.index)] blast = base.newblast(self.blast, int(self.score), float(self.evalue), gff1, gff2) df = self.deal_blast(blast, gff1, gff2, int(self.repnum)) for (chr1, chr2), group in df.groupby(['chr1', 'chr2']): group = group.sort_values(by=['loc1', 'loc2']) dir1 = './' + self.dir + '/pair/' + str(chr1) + '.vs.' + str( chr2) + '.pair' dir2 = './' + self.dir + '/block/' + str(chr1) + '.vs.' + str( chr2) + '.blk' group[[0, 'stand1', 'loc1', 1, 'stand2', 'loc2']].to_csv(dir1, sep=' ', index=None, header=None) args = [ 'blockscan', '-chr1len', lens1[str(chr1)], '-chr2len', lens2[str(chr2)], '-mg1', self.mg[0], '-mg2', self.mg[1], dir1, '>' + dir2 ] command = ' '.join([str(k) for k in args]) os.system(command) args = [ 'cat', self.dir + '/block/*.blk', '>', self.dir + '.block.old.txt' ] command = ' '.join([str(k) for k in args]) os.system(command) self.rewriteblock(blast, self.dir + '.block.old.txt', self.dir + '.block.txt')
def run(self): lens1 = base.newlens(self.lens1, self.position) lens2 = base.newlens(self.lens2, self.position) if re.search('\d', self.figsize): self.figsize = [float(k) for k in self.figsize.split(',')] else: self.figsize = np.array( [1, float(lens1.sum()) / float(lens2.sum())]) * 10 plt.rcParams['ytick.major.pad'] = 0 fig, ax = plt.subplots(figsize=self.figsize) ax.xaxis.set_ticks_position('top') step1 = 1 / float(lens1.sum()) step2 = 1 / float(lens2.sum()) base.dotplot_frame(fig, ax, lens1, lens2, step1, step2, self.genome1_name, self.genome2_name) gff1 = base.newgff(self.gff1) gff2 = base.newgff(self.gff2) gff1 = base.gene_location(gff1, lens1, step1, self.position) gff2 = base.gene_location(gff2, lens2, step2, self.position) blast = base.newblast(self.blast, int(self.score), float(self.evalue), gff1, gff2) df = self.pair_positon(blast, gff1, gff2, int(self.multiple), int(self.repnum)) plt.scatter(df['loc2'], df['loc1'], s=float(self.markersize), c=df['color'], alpha=0.5, edgecolors=None, linewidths=0, marker='o') plt.subplots_adjust(left=0.07, right=0.97, top=0.93, bottom=0.03) plt.savefig(self.savefile, dpi=500) sys.exit(0)
def run(self): lens1 = base.newlens(self.lens1, self.position) lens2 = base.newlens(self.lens2, self.position) bkinfo = pd.read_csv(self.blockinfo) bkinfo['chr1'] = bkinfo['chr1'].astype(str) bkinfo['chr2'] = bkinfo['chr2'].astype(str) bkinfo = bkinfo[(bkinfo['length'] >= int(self.block_length)) & (bkinfo['chr1'].isin(lens1.index)) & (bkinfo['chr2'].isin(lens2.index)) & (bkinfo['pvalue'] <= float(self.pvalue))] cor = [[ k, i, 0, lens1[i], j, 0, lens2[j], float(self.h**o[0]), float(self.h**o[1]) ] for k in range(1, int(self.multiple) + 1) for i in lens1.index for j in lens2.index] cor = pd.DataFrame(cor, columns=[ 'sub', 'chr1', 'start1', 'end1', 'chr2', 'start2', 'end2', 'homo1', 'homo2' ]) cor['chr1'] = cor['chr1'].astype(str) cor['chr2'] = cor['chr2'].astype(str) if self.tandem == False or self.tandem.upper() == 'FALSE': bkinfo = self.remove_tandem(bkinfo) self.remove_ks_hit(bkinfo) arr = self.collinearity_region(cor, bkinfo, lens1) bkinfo.loc[bkinfo.index.isin(arr), :].to_csv(self.savefile, index=False)
def run(self): lens1 = base.newlens(self.lens1, 'order') lens2 = base.newlens(self.lens2, 'order') gff1 = base.newgff(self.gff1) gff2 = base.newgff(self.gff2) gff1 = gff1[gff1['chr'].isin(lens1.index)] gff2 = gff2[gff2['chr'].isin(lens2.index)] blast = base.newblast(self.blast, int(self.score), float(self.evalue), gff1, gff2, self.blast_reverse) blast = self.deal_blast(blast, gff1, gff2, int(self.multiple), int(self.repeat_number)) blast['loc1'] = blast[0].map(gff1.loc[:, self.position]) blast['loc2'] = blast[1].map(gff2.loc[:, self.position]) blast['chr1'] = blast[0].map(gff1.loc[:, 'chr']) blast['chr2'] = blast[1].map(gff2.loc[:, 'chr']) total = [] for (chr1, chr2), group in blast.groupby(['chr1', 'chr2']): df = pd.DataFrame(np.zeros((lens1[chr1], lens2[chr2]))) for index, row in group.iterrows(): df.loc[row['loc1'], row['loc2']] = row['grading'] df = df.loc[:, df.sum(axis=0) != 0] df = df.loc[df.sum(axis=1) != 0, :] collinearity = improvedcollinearity.collinearity(self.options, df) data = collinearity.run() fp = self.dir + '/' + str(chr1) + '.vs.' + str(chr2) + '.blk' gf1, gf2 = gff1[gff1['chr'] == chr1], gff2[gff2['chr'] == chr1] self.write_block(fp, data, chr1, chr2, gff1, gff2) args = ['cat', self.dir + '/*.blk', '>', self.savefile] command = ' '.join([str(k) for k in args]) os.system(command) shutil.rmtree(self.dir) sys.exit(0)
def run(self): axis = [0, 1, 1, 0] lens1 = base.newlens(self.lens1, self.position) lens2 = base.newlens(self.lens2, self.position) if re.search('\d', self.figsize): self.figsize = [float(k) for k in self.figsize.split(',')] else: self.figsize = np.array( [1, float(lens1.sum()) / float(lens2.sum())]) * 10 step1 = 1 / float(lens1.sum()) step2 = 1 / float(lens2.sum()) fig, ax = plt.subplots(figsize=self.figsize) plt.rcParams['ytick.major.pad'] = 0 ax.xaxis.set_ticks_position('top') base.dotplot_frame(fig, ax, lens1, lens2, step1, step2, self.genome1_name, self.genome2_name, [1, 1]) bkinfo = pd.read_csv(self.blockinfo) if self.blockinfo_reverse == True or self.blockinfo_reverse.upper( ) == 'TRUE': bkinfo[['chr1', 'chr2']] = bkinfo[['chr2', 'chr1']] bkinfo[['block1', 'block2']] = bkinfo[['block2', 'block1']] bkinfo['chr1'] = bkinfo['chr1'].astype(str) bkinfo['chr2'] = bkinfo['chr2'].astype(str) bkinfo = bkinfo[(bkinfo['length'] > int(self.block_length)) & (bkinfo['chr1'].isin(lens1.index)) & (bkinfo['chr2'].isin(lens2.index)) & (bkinfo['pvalue'] < float(self.pvalue))] if self.tandem == True or self.tandem == 'true' or self.tandem == 1: bkinfo = self.remove_tandem(bkinfo) pos, pairs = self.block_position(bkinfo, lens1, lens2, step1, step2) cm = plt.cm.get_cmap('gist_rainbow') # gist_rainbow,hsv df = pd.DataFrame(pairs, columns=['loc1', 'loc2', 'ks']) df = df[(df['ks'] >= self.area[0]) & (df['ks'] <= self.area[1])] df.drop_duplicates(inplace=True) sc = plt.scatter(df['loc1'], df['loc2'], s=float(self.markersize), c=df['ks'], alpha=0.9, edgecolors=None, linewidths=0, marker='o', vmin=self.area[0], vmax=self.area[1], cmap=cm) # sc = plt.scatter(df['loc1'], df['loc2'], s=float(self.markersize), c='dimgray', # alpha=0.7, edgecolors=None, linewidths=0, marker='o', vmin=self.area[0], vmax=self.area[1]) cbar = fig.colorbar(sc, shrink=0.5, pad=0.03, fraction=0.1) align = dict(family='Arial', style='normal', horizontalalignment="center", verticalalignment="center") cbar.set_label('Ks', labelpad=12.5, fontsize=18, **align) ax.axis(axis) plt.subplots_adjust(left=0.09, right=0.96, top=0.93, bottom=0.03) plt.savefig(self.savefig, dpi=500) plt.show() sys.exit(0)
def run(self): lens1 = base.newlens(self.lens1, self.position) lens2 = base.newlens(self.lens2, self.position) gff1 = base.newgff(self.gff1) gff2 = base.newgff(self.gff2) gff1 = gff1[gff1['chr'].isin(lens1.index)] gff2 = gff2[gff2['chr'].isin(lens2.index)] blast = base.newblast(self.blast, int(self.score), float(self.evalue), gff1, gff2) blast = self.blast_homo(blast, gff1, gff2, int(self.repnum)) blast.index = blast[0] + ',' + blast[1] colinearity = base.read_colinearscan(self.colinearity) ks = base.read_ks(self.ks, self.ks_col) data = self.block_position(colinearity, blast, gff1, gff2, ks)
def run(self): axis = [0, 1, 1, 0] lens1 = base.newlens(self.lens1, self.position) lens2 = base.newlens(self.lens2, self.position) if re.search('\d', self.figsize): self.figsize = [float(k) for k in self.figsize.split(',')] else: self.figsize = np.array( [1, float(lens1.sum()) / float(lens2.sum())]) * 10 plt.rcParams['ytick.major.pad'] = 0 fig, ax = plt.subplots(figsize=self.figsize) ax.xaxis.set_ticks_position('top') step1 = 1 / float(lens1.sum()) step2 = 1 / float(lens2.sum()) base.dotplot_frame(fig, ax, lens1, lens2, step1, step2, self.genome1_name, self.genome2_name, [0, 1]) gff1 = base.newgff(self.gff1) gff2 = base.newgff(self.gff2) gff1 = base.gene_location(gff1, lens1, step1, self.position) gff2 = base.gene_location(gff2, lens2, step2, self.position) bkinfo = pd.read_csv(self.blockinfo, index_col='id') if self.blockinfo_reverse == True or self.blockinfo_reverse.upper( ) == 'TRUE': bkinfo[['chr1', 'chr2']] = bkinfo[['chr2', 'chr1']] bkinfo[['block1', 'block2']] = bkinfo[['block2', 'block1']] bkinfo['chr1'] = bkinfo['chr1'].astype(str) bkinfo['chr2'] = bkinfo['chr2'].astype(str) bkinfo[self.classid] = bkinfo[self.classid].astype(str) bkinfo = bkinfo[bkinfo['chr1'].isin(lens1.index) & (bkinfo['chr2'].isin(lens2.index))] align = self.alignment(gff1, gff2, bkinfo) alignment = align[ gff1.columns[-int(len(bkinfo[self.classid].drop_duplicates())):]] alignment.to_csv(self.savefile, header=None) df = self.pair_positon(alignment, gff1['loc'], gff2['loc'], self.colors) plt.scatter(df['loc2'], df['loc1'], s=float(self.markersize), c=df['color'], alpha=0.5, edgecolors=None, linewidths=0, marker='o') ax.axis(axis) plt.subplots_adjust(left=0.07, right=0.97, top=0.93, bottom=0.03) plt.savefig(self.savefig, dpi=500) plt.show() sys.exit(0)
def run(self): lens1 = base.newlens(self.lens1, 'order') lens2 = base.newlens(self.lens2, 'order') gff1 = base.newgff(self.gff1) gff2 = base.newgff(self.gff2) gff1 = gff1[gff1['chr'].isin(lens1.index)] gff2 = gff2[gff2['chr'].isin(lens2.index)] blast = base.newblast(self.blast, int(self.score), float(self.evalue), gff1, gff2, self.blast_reverse) blast = self.deal_blast(blast, gff1, gff2, int(self.multiple), int(self.repeat_number)) blast['loc1'] = blast[0].map(gff1.loc[:, self.position]) blast['loc2'] = blast[1].map(gff2.loc[:, self.position]) blast['chr1'] = blast[0].map(gff1.loc[:, 'chr']) blast['chr2'] = blast[1].map(gff2.loc[:, 'chr']) total = [] for (chr1, chr2), group in blast.groupby(['chr1', 'chr2']): total.append([chr1, chr2, group]) del blast, group gc.collect() n = int(np.ceil(len(total) / float(self.process))) result, data = '', [] try: pool = Pool(self.process) for i in range(0, len(total), n): data.append( pool.apply_async(self.single_pool, args=(total[i:i + n], gff1, gff2, lens1, lens2))) pool.close() pool.join() except: pool.terminate() for k in data: result += k.get() result = re.split('\n', result) fout = open(self.savefile, 'w') num = 1 for line in result: if re.match(r"# Alignment", line): s = '# Alignment ' + str(num) + ':' fout.writelines(s + line.split(':')[1] + '\n') num += 1 continue if len(line) > 0: fout.writelines(line + '\n') fout.close() sys.exit(0)
def run(self): alignment = pd.read_csv(self.alignment, header=None) alignment.replace('.', np.nan, inplace=True) alignment.dropna(thresh=int(self.minimum), inplace=True) if hasattr(self, 'gff') and hasattr(self, 'lens'): gff = base.newgff(self.gff) lens = base.newlens(self.lens, self.position) alignment = pd.merge(alignment, gff[['chr', self.position]], left_on=0, right_on=gff.index, how='left') alignment.dropna(subset=['chr', 'order'], inplace=True) alignment['order'] = alignment['order'].astype(int) alignment = alignment[alignment['chr'].isin(lens.index)] alignment.drop(alignment.columns[-2:], axis=1, inplace=True) data = self.grouping(alignment) fout = open(self.trees_file, 'w') fout.close() for i in range(0, len(data), 100): trees = ' '.join([str(k) for k in data[i:i + 100]]) args = ['cat', trees, '>>', self.trees_file] command = ' '.join([str(k) for k in args]) os.system(command) df = pd.read_csv(self.trees_file, header=None, sep='\t') df[1] = data df[0].to_csv(self.trees_file, index=None, sep='\t', header=False) print("done")
def run(self): fig = plt.figure(figsize=(tuple(self.figsize))) root = plt.axes([0, 0, 1, 1]) mpl.rcParams['agg.path.chunksize'] = 100000000 lens = base.newlens(self.lens1, self.position) radius, angle_gap = float(self.radius), float(self.angle_gap) angle = (2 * np.pi - (int(len(lens))) * angle_gap) / (int(lens.sum())) loc_chr = self.chr_loction(lens, angle_gap, angle) list_colors = [str(k).strip() for k in re.split(',|:', self.colors)] chr_color = dict(zip(list_colors[::2], list_colors[1::2])) for k in loc_chr: start, end = loc_chr[k] self.Wedge(root, (0.0, 0.0), radius + 0.03, start * 180 / np.pi, end * 180 / np.pi, 0.03, chr_color[k], 0.9) gff = pd.read_csv(self.gff, sep='\t', header=None, index_col=1) gff.rename(columns={0: 'chr', 1: 'id', 2: 'start', 3: 'end', 5: 'order'}, inplace=True) alignment = pd.read_csv(self.alignment, sep='\t', header=None) newalignment = self.deal_alignment( alignment, gff, lens, loc_chr, angle) for k, v in enumerate(newalignment.columns[1:-2]): r = radius + self.ring_width*(k+1) self.plot_circle(loc_chr, r, lw=0.5, alpha=0.5, color='grey') self.plot_bar(newalignment[[v, 'rad']], r + self.ring_width * 0.15, self.ring_width*0.7, 0.15, chr_color, 0.9) labels = self.chr_label + lens.index labels = dict(zip(lens.index, labels)) self.plot_labels(labels, loc_chr, radius - 0.03, fontsize=9) root.set_xlim(-1, 1) root.set_ylim(-1.05, 0.95) root.set_axis_off() plt.savefig(self.savefile, dpi=500) sys.exit(0)
def run(self): lens1 = base.newlens(self.lens1, self.position) lens2 = base.newlens(self.lens2, self.position) gff1 = base.newgff(self.gff1) gff2 = base.newgff(self.gff2) gff1 = gff1[gff1['chr'].isin(lens1.index)] gff2 = gff2[gff2['chr'].isin(lens2.index)] blast = base.newblast(self.blast, int(self.score), float(self.evalue), gff1, gff2, self.blast_reverse) blast = self.blast_homo(blast, gff1, gff2, int(self.repeat_number)) blast.index = blast[0] + ',' + blast[1] collinearity = self.auto_file(gff1, gff2) ks = base.read_ks(self.ks, self.ks_col) data = self.block_position(collinearity, blast, gff1, gff2, ks) data['class1'] = 0 data['class2'] = 0 data.to_csv(self.savefile, index=None)
def run(self): gff = base.newgff(self.gff) lens = base.newlens(self.lens, self.position) gff = gff[gff['chr'].isin(lens.index)] alignment = pd.read_csv(self.alignment, header=None, index_col=0) alignment = alignment.join(gff[['chr', self.position]], how='left') self.retain = self.align_chr(alignment) self.retain[self.retain.columns[:-2]].to_csv(self.savefile, sep='\t', header=None) fig, axs = plt.subplots(len(lens), 1, sharex=True, sharey=True, figsize=tuple(self.figsize)) fig.add_subplot(111, frameon=False) align = dict(family='Arial', verticalalignment="center", horizontalalignment="center") plt.ylabel(self.ylabel + '\n\n\n\n', fontsize=18, **align) for spine in plt.gca().spines.values(): spine.set_visible(False) plt.tick_params(top=False, bottom=False, left=False, right=False, labelleft=False, labelbottom=False) groups = self.retain.groupby(['chr']) for i in range(len(lens)): group = groups.get_group(lens.index[i]) for j in self.retain.columns[:-2]: axs[i].plot(group['order'].values, group[j].values, linestyle='-', color=self.colors[j - 1], linewidth=1) axs[i].spines['right'].set_visible(False) axs[i].spines['top'].set_visible(False) axs[i].set_ylim(self.ylim) axs[i].tick_params(labelsize=12) align = dict(family='Arial', verticalalignment="center", horizontalalignment="left") for i in range(len(lens)): x, y = axs[i].get_xlim()[1] * 0.90, axs[i].get_ylim()[1] * 0.5 axs[i].text(x, y, self.refgenome + str(lens.index[i]), fontsize=14, **align) plt.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.05) plt.savefig(self.savefig, dpi=500) plt.show() sys.exit(0)
def run(self): alignment = pd.read_csv(self.alignment, header=None, index_col=0) alignment.replace('\w+', 1, regex=True, inplace=True) alignment.replace('.', 0, inplace=True) alignment.fillna(0, inplace=True) gff = base.newgff(self.gff) lens = base.newlens(self.lens, self.position) gff = gff[gff['chr'].isin(lens.index)] alignment = alignment.join(gff[['chr', self.position]], how='left') alignment.dropna(axis=0, how='any', inplace=True) p = self.cal_pindex(alignment) print('Polyploidy-index: ', p) sys.exit(0)
def run(self): bkinfo = pd.read_csv(self.blockinfo, index_col='id') bkinfo['chr1'] = bkinfo['chr1'].astype(str) bkinfo['chr2'] = bkinfo['chr2'].astype(str) if self.blockinfo_reverse == True or self.blockinfo_reverse.upper( ) == 'TRUE': bkinfo[['chr1', 'chr2']] = bkinfo[['chr2', 'chr1']] bkinfo[['block1', 'block2']] = bkinfo[['block2', 'block1']] bkinfo = bkinfo[bkinfo['length'] > int(self.block_length)] bkinfo['class1'] = '' bkinfo['col1'] = '' bkinfo['class2'] = '' bkinfo['col2'] = '' gff1 = base.newgff(self.gff1) gff2 = base.newgff(self.gff2) lens = base.newlens(self.the_other_lens, self.position) blast = base.newblast(self.blast, int(self.score), float(self.evalue), gff1, gff2, self.blast_reverse) index = [ group.sort_values( by=11, ascending=False)[:int(self.repeat_number)].index.tolist() for name, group in blast.groupby([0]) ] blast = blast.loc[np.concatenate( np. array([k[:int(self.repeat_number)] for k in index], dtype=object)), [0, 1]] pairs = self.colinear_gene_pairs(bkinfo, gff1, gff2) data = [] for k, v in lens.items(): for i in range(1, v + 1): data.append([k, i, 0]) df = pd.DataFrame(data) gff1['color'] = np.nan gff2['color'] = np.nan gff1['classification'] = np.nan gff2['classification'] = np.nan if hasattr(self, 'ancestor_top'): ancestor = base.read_calassfication(self.ancestor_top) data = self.karyotype_top(pairs, ancestor, gff1, gff2) if hasattr(self, 'ancestor_left'): ancestor = base.read_calassfication(self.ancestor_left) data = self.karyotype_left(pairs, ancestor, gff1, gff2) the_other_ancestor_file = self.karyotype_map(data, lens) the_other_ancestor_file = self.new_ancestor(the_other_ancestor_file, gff1, gff2, blast) the_other_ancestor_file.to_csv(self.the_other_ancestor_file, sep='\t', header=False, index=False)
def run(self): alignment = pd.read_csv(self.alignment, header=None) alignment.replace('.', np.nan, inplace=True) gff = base.newgff(self.gff) lens = base.newlens(self.lens, self.position) gff = gff[gff['chr'].isin(lens.index)] alignment.dropna(thresh=3, inplace=True) alignment = pd.merge( alignment, gff[['chr', self.position]], left_on=0, right_on=gff.index, how='left') data = self.grouping(alignment) data = [k+'.treefile' for k in data] fout = open(self.trees_file, 'w') fout.close() for i in range(0, len(data), 100): trees = ' '.join([str(k) for k in data[i:i+100]]) args = ['cat', trees, '>>', self.trees_file] command = ' '.join([str(k) for k in args]) os.system(command) df = pd.read_csv(self.trees_file, header=None, sep='\t') df[1] = data df[[1, 0]].to_csv(self.trees_file, index=None, sep='\t', header=False)
def run(self): fig = plt.figure(figsize=(tuple(self.figsize))) root = plt.axes([0, 0, 1, 1]) mpl.rcParams['agg.path.chunksize'] = 100000000 lens = base.newlens(self.lens, self.position) radius, angle_gap = float(self.radius), float(self.angle_gap) angle = (2 * np.pi - (int(len(lens)) + 1.5) * angle_gap) / (int(lens.sum())) loc_chr = self.chr_loction(lens, angle_gap, angle) list_colors = [str(k).strip() for k in re.split(',|:', self.colors)] chr_color = dict(zip(list_colors[::2], list_colors[1::2])) for k in loc_chr: start, end = loc_chr[k] self.Wedge(root, (0.0, 0.0), radius + self.ring_width, start * 180 / np.pi, end * 180 / np.pi, self.ring_width * 0.3, chr_color[k], 0.9) gff = base.newgff(self.gff) if hasattr(self, 'ancestor'): ancestor = pd.read_csv(self.ancestor, sep='\t', header=None) al = pd.read_csv(self.ancestor_location, sep='\t', header=None) al.rename(columns={ 0: 'chr', 1: 'start', 2: 'end', 3: 'color' }, inplace=True) al['chr'] = al['chr'].astype(str) data = self.deal_ancestor(ancestor, gff, lens, loc_chr, angle, al) self.plot_collinearity(data, radius, lw=0.1, alpha=0.8) if hasattr(self, 'alignment'): alignment = pd.read_csv(self.alignment, sep='\t', header=None) newalignment = self.deal_alignment(alignment, gff, lens, loc_chr, angle) names = [str(k) for k in self.column_names.split(',')] n = 0 align = dict(family='Arial', verticalalignment="center", horizontalalignment="center") for k, v in enumerate(newalignment.columns[1:-2]): r = radius + self.ring_width * (k + 1) self.plot_circle(loc_chr, r, lw=0.5, alpha=1, color='grey') self.plot_bar(newalignment[[v, 'rad']], r + self.ring_width * 0.15, self.ring_width * 0.7, 0.15, chr_color, 1) if n % 2 == 0: loc = 0.05 x, y = (r + self.ring_width * 0.5) * np.cos(loc), ( r + self.ring_width * 0.5) * np.sin(loc) plt.text(x, y, names[n], rotation=loc * 180 / np.pi, fontsize=10, **align) else: loc = -0.08 x, y = (r + self.ring_width * 0.5) * np.cos(loc), ( r + self.ring_width * 0.5) * np.sin(loc) plt.text(x, y, names[n], fontsize=10, rotation=loc * 180 / np.pi, **align) n += 1 labels = self.chr_label + lens.index labels = dict(zip(lens.index, labels)) self.plot_labels(root, labels, loc_chr, radius - self.ring_width * 0.3, fontsize=self.label_size) root.set_xlim(-1, 1) root.set_ylim(-1.05, 0.95) root.set_axis_off() plt.savefig(self.savefig, dpi=500) plt.show() sys.exit(0)
def run(self): length = 1 axis = [0, 1, 1, 0] left, right, top, bottom = 0.07, 0.97, 0.93, 0.03 lens1 = base.newlens(self.lens1, self.position) lens2 = base.newlens(self.lens2, self.position) step1 = 1 / float(lens1.sum()) step2 = 1 / float(lens2.sum()) if self.ancestor_left != None: axis[0] = -0.02 lens_ancestor_left = pd.read_csv(self.ancestor_left, sep="\t", header=None) lens_ancestor_left[0] = lens_ancestor_left[0].astype(str) lens_ancestor_left[3] = lens_ancestor_left[3].astype(str) lens_ancestor_left[4] = lens_ancestor_left[4].astype(int) lens_ancestor_left[ 4] = lens_ancestor_left[4] / lens_ancestor_left[4].max() lens_ancestor_left = lens_ancestor_left[lens_ancestor_left[0].isin( lens1.index)] if self.ancestor_top != None: axis[3] = -0.02 lens_ancestor_top = pd.read_csv(self.ancestor_top, sep="\t", header=None) lens_ancestor_top[0] = lens_ancestor_top[0].astype(str) lens_ancestor_top[3] = lens_ancestor_top[3].astype(str) lens_ancestor_top[4] = lens_ancestor_top[4].astype(int) lens_ancestor_top[ 4] = lens_ancestor_top[4] / lens_ancestor_top[4].max() lens_ancestor_top = lens_ancestor_top[lens_ancestor_top[0].isin( lens2.index)] if re.search('\d', self.figsize): self.figsize = [float(k) for k in self.figsize.split(',')] else: self.figsize = np.array( [1, float(lens1.sum()) / float(lens2.sum())]) * 10 plt.rcParams['ytick.major.pad'] = 0 fig, ax = plt.subplots(figsize=self.figsize) ax.xaxis.set_ticks_position('top') base.dotplot_frame(fig, ax, lens1, lens2, step1, step2, self.genome1_name, self.genome2_name, [axis[0], axis[3]]) gff1 = base.newgff(self.gff1) gff2 = base.newgff(self.gff2) gff1 = base.gene_location(gff1, lens1, step1, self.position) gff2 = base.gene_location(gff2, lens2, step2, self.position) if self.ancestor_top != None: top = top self.ancestor_posion(ax, gff2, lens_ancestor_top, 'top') if self.ancestor_left != None: left = left self.ancestor_posion(ax, gff1, lens_ancestor_left, 'left') blast = base.newblast(self.blast, int(self.score), float(self.evalue), gff1, gff2, self.blast_reverse) df = self.pair_positon(blast, gff1, gff2, int(self.multiple), int(self.repeat_number)) ax.scatter(df['loc2'], df['loc1'], s=float(self.markersize), c=df['color'], alpha=0.5, edgecolors=None, linewidths=0, marker='o') ax.axis(axis) plt.subplots_adjust(left=left, right=right, top=top, bottom=bottom) plt.savefig(self.savefig, dpi=500) plt.show() sys.exit(0)
def run(self): fig, ax = plt.subplots(figsize=self.figsize) mpl.rcParams['agg.path.chunksize'] = 100000000 lens = base.newlens(self.lens, self.position) radius, angle_gap = float(self.radius), float(self.angle_gap) angle = (2 * np.pi - (int(len(lens)) + 1.5) * angle_gap) / (int(lens.sum())) loc_chr = self.chr_location(lens, angle_gap, angle) list_colors = [str(k).strip() for k in re.split(',|:', self.colors)] chr_color = dict(zip(list_colors[::2], list_colors[1::2])) gff = base.newgff(self.gff) if hasattr(self, 'ancestor'): ancestor = pd.read_csv(self.ancestor, header=None) al = pd.read_csv(self.ancestor_location, sep='\t', header=None) al.rename(columns={ 0: 'chr', 1: 'start', 2: 'end', 3: 'color' }, inplace=True) al['chr'] = al['chr'].astype(str) data = self.deal_ancestor(ancestor, gff, lens, loc_chr, angle, al) self.plot_collinearity(data, radius, lw=0.1, alpha=0.8) if hasattr(self, 'alignment'): alignment = pd.read_csv(self.alignment, header=None) newalignment = self.deal_alignment(alignment, gff, lens, loc_chr, angle) if ',' in self.column_names: names = [str(k) for k in self.column_names.split(',')] else: names = [None] * len(newalignment.columns) n = 0 align = dict(family='Arial', verticalalignment="center", horizontalalignment="center") for k, v in enumerate(newalignment.columns[1:-2]): r = radius + self.ring_width * (k + 1) self.plot_circle(loc_chr, r, lw=0.5, alpha=1, color='grey') self.plot_bar(newalignment[[v, 'rad']], r + self.ring_width * 0.15, self.ring_width * 0.7, 0.15, chr_color, 1) if n % 2 == 0: loc = 0.05 x, y = (r+self.ring_width*0.5) * \ np.cos(loc), (r+self.ring_width*0.5) * np.sin(loc) plt.text(x, y, names[n], rotation=loc * 180 / np.pi, fontsize=self.label_size, **align) else: loc = -0.08 x, y = (r+self.ring_width*0.5) * \ np.cos(loc), (r+self.ring_width*0.5) * np.sin(loc) plt.text(x, y, names[n], fontsize=self.label_size, rotation=loc * 180 / np.pi, **align) n += 1 if hasattr(self, 'ancestor'): colors = al['color'].drop_duplicates().values.tolist() ancestor_chr_color = dict(zip(range(1, len(colors) + 1), colors)) self.plot_legend(ax, ancestor_chr_color, self.legend_square[0], self.legend_square[1]) if hasattr(self, 'alignment'): del chr_color['nan'] self.plot_legend(ax, chr_color, self.legend_square[0], self.legend_square[1]) labels = self.chr_label + lens.index labels = dict(zip(lens.index, labels)) self.plot_labels(ax, labels, loc_chr, radius + self.ring_width * 0.3, fontsize=self.label_size) plt.axis('off') a = (ax.get_ylim()[1]-ax.get_ylim()[0]) / \ (ax.get_xlim()[1]-ax.get_xlim()[0]) fig.set_size_inches(self.figsize[0], self.figsize[0] * a, forward=True) plt.savefig(self.savefig, dpi=500) plt.show() sys.exit(0)