Esempio n. 1
0
 def run(self):
     lens1 = base.newlens(self.lens1, self.position)
     lens2 = base.newlens(self.lens2, self.position)
     gff = base.newgff(self.gff)
     bkinfo = pd.read_csv(self.blockinfo)
     bkinfo['chr1'] = bkinfo['chr1'].astype(str)
     bkinfo['chr2'] = bkinfo['chr2'].astype(str)
     bkinfo = bkinfo[(bkinfo['length'] > int(self.block_length))
                     & (bkinfo['chr1'].isin(lens1.index)) &
                     (bkinfo['chr2'].isin(lens2.index)) &
                     (bkinfo['pvalue'] < float(self.pvalue))]
     cor = [[
         k, i, 0, lens1[i], j, 0, lens2[j],
         float(self.h**o[0]),
         float(self.h**o[1])
     ] for k in range(1,
                      int(self.multiple) + 1) for i in lens1.index
            for j in lens2.index]
     cor = pd.DataFrame(cor,
                        columns=[
                            'sub', 'chr1', 'start1', 'end1', 'chr2',
                            'start2', 'end2', 'homo1', 'homo2'
                        ])
     cor['chr1'] = cor['chr1'].astype(str)
     cor['chr2'] = cor['chr2'].astype(str)
     if self.tandem == True or self.tandem == 'true' or self.tandem == 1:
         bkinfo = self.remove_tandem(bkinfo)
     for k in cor['sub'].drop_duplicates().values:
         gff['sub' + str(k)] = ''
     arr = self.colinearity_region(gff, cor, bkinfo)
Esempio n. 2
0
 def run(self):
     lens1 = base.newlens(self.lens1, self.position)
     lens2 = base.newlens(self.lens2, self.position)
     if re.search('\d', self.figsize):
         self.figsize = [float(k) for k in self.figsize.split(',')]
     else:
         self.figsize = np.array(
             [1, float(lens1.sum())/float(lens2.sum())])*10
     plt.rcParams['ytick.major.pad'] = 0
     fig, ax = plt.subplots(figsize=self.figsize)
     ax.xaxis.set_ticks_position('top')
     step1 = 1 / float(lens1.sum())
     step2 = 1 / float(lens2.sum())
     base.dotplot_frame(fig, ax, lens1, lens2, step1, step2,
                        self.genome1_name, self.genome2_name)
     gff1 = base.newgff(self.gff1)
     gff2 = base.newgff(self.gff2)
     gff1 = base.gene_location(gff1, lens1, step1, self.position)
     gff2 = base.gene_location(gff2, lens2, step2, self.position)
     block_list = pd.read_csv(self.block_list, header=None)
     bkinfo = pd.read_csv(self.blockinfo,index_col=0)
     bkinfo['chr1'] = bkinfo['chr1'].astype(str)
     bkinfo['chr2'] = bkinfo['chr2'].astype(str)
     align = self.alignment(gff1, gff2, block_list, bkinfo)
     alignment = align[gff1.columns[-int(len(block_list[0].drop_duplicates())):]]
     alignment.to_csv(self.savefile, sep='\t', header=None)
     df = self.pair_positon(
         alignment, gff1['loc'], gff2['loc'], self.colors)
     plt.scatter(df['loc2'], df['loc1'], s=float(self.markersize), c=df['color'],
                 alpha=0.5, edgecolors=None, linewidths=0, marker='o')
     plt.subplots_adjust(left=0.07, right=0.97, top=0.93, bottom=0.03)
     plt.savefig(self.savefig, dpi=500)
     sys.exit(0)
Esempio n. 3
0
 def run(self):
     lens1 = base.newlens(self.lens1, 'order')
     lens2 = base.newlens(self.lens2, 'order')
     lens1 = lens1[lens1 > 4]
     lens2 = lens2[lens2 > 4]
     gff1 = base.newgff(self.gff1)
     gff2 = base.newgff(self.gff2)
     gff1 = gff1[gff1['chr'].isin(lens1.index)]
     gff2 = gff2[gff2['chr'].isin(lens2.index)]
     blast = base.newblast(self.blast, int(self.score), float(self.evalue),
                           gff1, gff2)
     df = self.deal_blast(blast, gff1, gff2, int(self.repnum))
     for (chr1, chr2), group in df.groupby(['chr1', 'chr2']):
         group = group.sort_values(by=['loc1', 'loc2'])
         dir1 = './' + self.dir + '/pair/' + str(chr1) + '.vs.' + str(
             chr2) + '.pair'
         dir2 = './' + self.dir + '/block/' + str(chr1) + '.vs.' + str(
             chr2) + '.blk'
         group[[0, 'stand1', 'loc1', 1, 'stand2',
                'loc2']].to_csv(dir1, sep=' ', index=None, header=None)
         args = [
             'blockscan', '-chr1len', lens1[str(chr1)], '-chr2len',
             lens2[str(chr2)], '-mg1', self.mg[0], '-mg2', self.mg[1], dir1,
             '>' + dir2
         ]
         command = ' '.join([str(k) for k in args])
         os.system(command)
     args = [
         'cat', self.dir + '/block/*.blk', '>', self.dir + '.block.old.txt'
     ]
     command = ' '.join([str(k) for k in args])
     os.system(command)
     self.rewriteblock(blast, self.dir + '.block.old.txt',
                       self.dir + '.block.txt')
Esempio n. 4
0
 def run(self):
     lens1 = base.newlens(self.lens1, self.position)
     lens2 = base.newlens(self.lens2, self.position)
     if re.search('\d', self.figsize):
         self.figsize = [float(k) for k in self.figsize.split(',')]
     else:
         self.figsize = np.array(
             [1, float(lens1.sum()) / float(lens2.sum())]) * 10
     plt.rcParams['ytick.major.pad'] = 0
     fig, ax = plt.subplots(figsize=self.figsize)
     ax.xaxis.set_ticks_position('top')
     step1 = 1 / float(lens1.sum())
     step2 = 1 / float(lens2.sum())
     base.dotplot_frame(fig, ax, lens1, lens2, step1, step2,
                        self.genome1_name, self.genome2_name)
     gff1 = base.newgff(self.gff1)
     gff2 = base.newgff(self.gff2)
     gff1 = base.gene_location(gff1, lens1, step1, self.position)
     gff2 = base.gene_location(gff2, lens2, step2, self.position)
     blast = base.newblast(self.blast, int(self.score), float(self.evalue),
                           gff1, gff2)
     df = self.pair_positon(blast, gff1, gff2, int(self.multiple),
                            int(self.repnum))
     plt.scatter(df['loc2'],
                 df['loc1'],
                 s=float(self.markersize),
                 c=df['color'],
                 alpha=0.5,
                 edgecolors=None,
                 linewidths=0,
                 marker='o')
     plt.subplots_adjust(left=0.07, right=0.97, top=0.93, bottom=0.03)
     plt.savefig(self.savefile, dpi=500)
     sys.exit(0)
Esempio n. 5
0
 def run(self):
     lens1 = base.newlens(self.lens1, self.position)
     lens2 = base.newlens(self.lens2, self.position)
     bkinfo = pd.read_csv(self.blockinfo)
     bkinfo['chr1'] = bkinfo['chr1'].astype(str)
     bkinfo['chr2'] = bkinfo['chr2'].astype(str)
     bkinfo = bkinfo[(bkinfo['length'] >= int(self.block_length))
                     & (bkinfo['chr1'].isin(lens1.index)) &
                     (bkinfo['chr2'].isin(lens2.index)) &
                     (bkinfo['pvalue'] <= float(self.pvalue))]
     cor = [[
         k, i, 0, lens1[i], j, 0, lens2[j],
         float(self.h**o[0]),
         float(self.h**o[1])
     ] for k in range(1,
                      int(self.multiple) + 1) for i in lens1.index
            for j in lens2.index]
     cor = pd.DataFrame(cor,
                        columns=[
                            'sub', 'chr1', 'start1', 'end1', 'chr2',
                            'start2', 'end2', 'homo1', 'homo2'
                        ])
     cor['chr1'] = cor['chr1'].astype(str)
     cor['chr2'] = cor['chr2'].astype(str)
     if self.tandem == False or self.tandem.upper() == 'FALSE':
         bkinfo = self.remove_tandem(bkinfo)
     self.remove_ks_hit(bkinfo)
     arr = self.collinearity_region(cor, bkinfo, lens1)
     bkinfo.loc[bkinfo.index.isin(arr), :].to_csv(self.savefile,
                                                  index=False)
Esempio n. 6
0
 def run(self):
     lens1 = base.newlens(self.lens1, 'order')
     lens2 = base.newlens(self.lens2, 'order')
     gff1 = base.newgff(self.gff1)
     gff2 = base.newgff(self.gff2)
     gff1 = gff1[gff1['chr'].isin(lens1.index)]
     gff2 = gff2[gff2['chr'].isin(lens2.index)]
     blast = base.newblast(self.blast, int(self.score), float(self.evalue),
                           gff1, gff2, self.blast_reverse)
     blast = self.deal_blast(blast, gff1, gff2, int(self.multiple),
                             int(self.repeat_number))
     blast['loc1'] = blast[0].map(gff1.loc[:, self.position])
     blast['loc2'] = blast[1].map(gff2.loc[:, self.position])
     blast['chr1'] = blast[0].map(gff1.loc[:, 'chr'])
     blast['chr2'] = blast[1].map(gff2.loc[:, 'chr'])
     total = []
     for (chr1, chr2), group in blast.groupby(['chr1', 'chr2']):
         df = pd.DataFrame(np.zeros((lens1[chr1], lens2[chr2])))
         for index, row in group.iterrows():
             df.loc[row['loc1'], row['loc2']] = row['grading']
         df = df.loc[:, df.sum(axis=0) != 0]
         df = df.loc[df.sum(axis=1) != 0, :]
         collinearity = improvedcollinearity.collinearity(self.options, df)
         data = collinearity.run()
         fp = self.dir + '/' + str(chr1) + '.vs.' + str(chr2) + '.blk'
         gf1, gf2 = gff1[gff1['chr'] == chr1], gff2[gff2['chr'] == chr1]
         self.write_block(fp, data, chr1, chr2, gff1, gff2)
     args = ['cat', self.dir + '/*.blk', '>', self.savefile]
     command = ' '.join([str(k) for k in args])
     os.system(command)
     shutil.rmtree(self.dir)
     sys.exit(0)
Esempio n. 7
0
 def run(self):
     axis = [0, 1, 1, 0]
     lens1 = base.newlens(self.lens1, self.position)
     lens2 = base.newlens(self.lens2, self.position)
     if re.search('\d', self.figsize):
         self.figsize = [float(k) for k in self.figsize.split(',')]
     else:
         self.figsize = np.array(
             [1, float(lens1.sum()) / float(lens2.sum())]) * 10
     step1 = 1 / float(lens1.sum())
     step2 = 1 / float(lens2.sum())
     fig, ax = plt.subplots(figsize=self.figsize)
     plt.rcParams['ytick.major.pad'] = 0
     ax.xaxis.set_ticks_position('top')
     base.dotplot_frame(fig, ax, lens1, lens2, step1, step2,
                        self.genome1_name, self.genome2_name, [1, 1])
     bkinfo = pd.read_csv(self.blockinfo)
     if self.blockinfo_reverse == True or self.blockinfo_reverse.upper(
     ) == 'TRUE':
         bkinfo[['chr1', 'chr2']] = bkinfo[['chr2', 'chr1']]
         bkinfo[['block1', 'block2']] = bkinfo[['block2', 'block1']]
     bkinfo['chr1'] = bkinfo['chr1'].astype(str)
     bkinfo['chr2'] = bkinfo['chr2'].astype(str)
     bkinfo = bkinfo[(bkinfo['length'] > int(self.block_length))
                     & (bkinfo['chr1'].isin(lens1.index)) &
                     (bkinfo['chr2'].isin(lens2.index)) &
                     (bkinfo['pvalue'] < float(self.pvalue))]
     if self.tandem == True or self.tandem == 'true' or self.tandem == 1:
         bkinfo = self.remove_tandem(bkinfo)
     pos, pairs = self.block_position(bkinfo, lens1, lens2, step1, step2)
     cm = plt.cm.get_cmap('gist_rainbow')  # gist_rainbow,hsv
     df = pd.DataFrame(pairs, columns=['loc1', 'loc2', 'ks'])
     df = df[(df['ks'] >= self.area[0]) & (df['ks'] <= self.area[1])]
     df.drop_duplicates(inplace=True)
     sc = plt.scatter(df['loc1'],
                      df['loc2'],
                      s=float(self.markersize),
                      c=df['ks'],
                      alpha=0.9,
                      edgecolors=None,
                      linewidths=0,
                      marker='o',
                      vmin=self.area[0],
                      vmax=self.area[1],
                      cmap=cm)
     # sc = plt.scatter(df['loc1'], df['loc2'], s=float(self.markersize), c='dimgray',
     #                  alpha=0.7, edgecolors=None, linewidths=0, marker='o', vmin=self.area[0], vmax=self.area[1])
     cbar = fig.colorbar(sc, shrink=0.5, pad=0.03, fraction=0.1)
     align = dict(family='Arial',
                  style='normal',
                  horizontalalignment="center",
                  verticalalignment="center")
     cbar.set_label('Ks', labelpad=12.5, fontsize=18, **align)
     ax.axis(axis)
     plt.subplots_adjust(left=0.09, right=0.96, top=0.93, bottom=0.03)
     plt.savefig(self.savefig, dpi=500)
     plt.show()
     sys.exit(0)
Esempio n. 8
0
 def run(self):
     lens1 = base.newlens(self.lens1, self.position)
     lens2 = base.newlens(self.lens2, self.position)
     gff1 = base.newgff(self.gff1)
     gff2 = base.newgff(self.gff2)
     gff1 = gff1[gff1['chr'].isin(lens1.index)]
     gff2 = gff2[gff2['chr'].isin(lens2.index)]
     blast = base.newblast(self.blast, int(self.score), float(self.evalue),
                           gff1, gff2)
     blast = self.blast_homo(blast, gff1, gff2, int(self.repnum))
     blast.index = blast[0] + ',' + blast[1]
     colinearity = base.read_colinearscan(self.colinearity)
     ks = base.read_ks(self.ks, self.ks_col)
     data = self.block_position(colinearity, blast, gff1, gff2, ks)
Esempio n. 9
0
 def run(self):
     axis = [0, 1, 1, 0]
     lens1 = base.newlens(self.lens1, self.position)
     lens2 = base.newlens(self.lens2, self.position)
     if re.search('\d', self.figsize):
         self.figsize = [float(k) for k in self.figsize.split(',')]
     else:
         self.figsize = np.array(
             [1, float(lens1.sum()) / float(lens2.sum())]) * 10
     plt.rcParams['ytick.major.pad'] = 0
     fig, ax = plt.subplots(figsize=self.figsize)
     ax.xaxis.set_ticks_position('top')
     step1 = 1 / float(lens1.sum())
     step2 = 1 / float(lens2.sum())
     base.dotplot_frame(fig, ax, lens1, lens2, step1, step2,
                        self.genome1_name, self.genome2_name, [0, 1])
     gff1 = base.newgff(self.gff1)
     gff2 = base.newgff(self.gff2)
     gff1 = base.gene_location(gff1, lens1, step1, self.position)
     gff2 = base.gene_location(gff2, lens2, step2, self.position)
     bkinfo = pd.read_csv(self.blockinfo, index_col='id')
     if self.blockinfo_reverse == True or self.blockinfo_reverse.upper(
     ) == 'TRUE':
         bkinfo[['chr1', 'chr2']] = bkinfo[['chr2', 'chr1']]
         bkinfo[['block1', 'block2']] = bkinfo[['block2', 'block1']]
     bkinfo['chr1'] = bkinfo['chr1'].astype(str)
     bkinfo['chr2'] = bkinfo['chr2'].astype(str)
     bkinfo[self.classid] = bkinfo[self.classid].astype(str)
     bkinfo = bkinfo[bkinfo['chr1'].isin(lens1.index)
                     & (bkinfo['chr2'].isin(lens2.index))]
     align = self.alignment(gff1, gff2, bkinfo)
     alignment = align[
         gff1.columns[-int(len(bkinfo[self.classid].drop_duplicates())):]]
     alignment.to_csv(self.savefile, header=None)
     df = self.pair_positon(alignment, gff1['loc'], gff2['loc'],
                            self.colors)
     plt.scatter(df['loc2'],
                 df['loc1'],
                 s=float(self.markersize),
                 c=df['color'],
                 alpha=0.5,
                 edgecolors=None,
                 linewidths=0,
                 marker='o')
     ax.axis(axis)
     plt.subplots_adjust(left=0.07, right=0.97, top=0.93, bottom=0.03)
     plt.savefig(self.savefig, dpi=500)
     plt.show()
     sys.exit(0)
Esempio n. 10
0
 def run(self):
     lens1 = base.newlens(self.lens1, 'order')
     lens2 = base.newlens(self.lens2, 'order')
     gff1 = base.newgff(self.gff1)
     gff2 = base.newgff(self.gff2)
     gff1 = gff1[gff1['chr'].isin(lens1.index)]
     gff2 = gff2[gff2['chr'].isin(lens2.index)]
     blast = base.newblast(self.blast, int(self.score), float(self.evalue),
                           gff1, gff2, self.blast_reverse)
     blast = self.deal_blast(blast, gff1, gff2, int(self.multiple),
                             int(self.repeat_number))
     blast['loc1'] = blast[0].map(gff1.loc[:, self.position])
     blast['loc2'] = blast[1].map(gff2.loc[:, self.position])
     blast['chr1'] = blast[0].map(gff1.loc[:, 'chr'])
     blast['chr2'] = blast[1].map(gff2.loc[:, 'chr'])
     total = []
     for (chr1, chr2), group in blast.groupby(['chr1', 'chr2']):
         total.append([chr1, chr2, group])
     del blast, group
     gc.collect()
     n = int(np.ceil(len(total) / float(self.process)))
     result, data = '', []
     try:
         pool = Pool(self.process)
         for i in range(0, len(total), n):
             data.append(
                 pool.apply_async(self.single_pool,
                                  args=(total[i:i + n], gff1, gff2, lens1,
                                        lens2)))
         pool.close()
         pool.join()
     except:
         pool.terminate()
     for k in data:
         result += k.get()
     result = re.split('\n', result)
     fout = open(self.savefile, 'w')
     num = 1
     for line in result:
         if re.match(r"# Alignment", line):
             s = '# Alignment ' + str(num) + ':'
             fout.writelines(s + line.split(':')[1] + '\n')
             num += 1
             continue
         if len(line) > 0:
             fout.writelines(line + '\n')
     fout.close()
     sys.exit(0)
Esempio n. 11
0
 def run(self):
     alignment = pd.read_csv(self.alignment, header=None)
     alignment.replace('.', np.nan, inplace=True)
     alignment.dropna(thresh=int(self.minimum), inplace=True)
     if hasattr(self, 'gff') and hasattr(self, 'lens'):
         gff = base.newgff(self.gff)
         lens = base.newlens(self.lens, self.position)
         alignment = pd.merge(alignment,
                              gff[['chr', self.position]],
                              left_on=0,
                              right_on=gff.index,
                              how='left')
         alignment.dropna(subset=['chr', 'order'], inplace=True)
         alignment['order'] = alignment['order'].astype(int)
         alignment = alignment[alignment['chr'].isin(lens.index)]
         alignment.drop(alignment.columns[-2:], axis=1, inplace=True)
     data = self.grouping(alignment)
     fout = open(self.trees_file, 'w')
     fout.close()
     for i in range(0, len(data), 100):
         trees = ' '.join([str(k) for k in data[i:i + 100]])
         args = ['cat', trees, '>>', self.trees_file]
         command = ' '.join([str(k) for k in args])
         os.system(command)
     df = pd.read_csv(self.trees_file, header=None, sep='\t')
     df[1] = data
     df[0].to_csv(self.trees_file, index=None, sep='\t', header=False)
     print("done")
Esempio n. 12
0
 def run(self):
     fig = plt.figure(figsize=(tuple(self.figsize)))
     root = plt.axes([0, 0, 1, 1])
     mpl.rcParams['agg.path.chunksize'] = 100000000
     lens = base.newlens(self.lens1, self.position)
     radius, angle_gap = float(self.radius), float(self.angle_gap)
     angle = (2 * np.pi - (int(len(lens))) * angle_gap) / (int(lens.sum()))
     loc_chr = self.chr_loction(lens, angle_gap, angle)
     list_colors = [str(k).strip() for k in re.split(',|:', self.colors)]
     chr_color = dict(zip(list_colors[::2], list_colors[1::2]))
     for k in loc_chr:
         start, end = loc_chr[k]
         self.Wedge(root, (0.0, 0.0), radius + 0.03, start * 180 /
                    np.pi, end * 180 / np.pi, 0.03, chr_color[k], 0.9)
     gff = pd.read_csv(self.gff, sep='\t', header=None, index_col=1)
     gff.rename(columns={0: 'chr', 1: 'id', 2: 'start',
                         3: 'end', 5: 'order'}, inplace=True)
     alignment = pd.read_csv(self.alignment, sep='\t', header=None)
     newalignment = self.deal_alignment(
         alignment, gff, lens, loc_chr, angle)
     for k, v in enumerate(newalignment.columns[1:-2]):
         r = radius + self.ring_width*(k+1)
         self.plot_circle(loc_chr, r, lw=0.5, alpha=0.5, color='grey')
         self.plot_bar(newalignment[[v, 'rad']], r + self.ring_width *
                       0.15, self.ring_width*0.7, 0.15, chr_color, 0.9)
     labels = self.chr_label + lens.index
     labels = dict(zip(lens.index, labels))
     self.plot_labels(labels, loc_chr, radius - 0.03, fontsize=9)
     root.set_xlim(-1, 1)
     root.set_ylim(-1.05, 0.95)
     root.set_axis_off()
     plt.savefig(self.savefile, dpi=500)
     sys.exit(0)
Esempio n. 13
0
 def run(self):
     lens1 = base.newlens(self.lens1, self.position)
     lens2 = base.newlens(self.lens2, self.position)
     gff1 = base.newgff(self.gff1)
     gff2 = base.newgff(self.gff2)
     gff1 = gff1[gff1['chr'].isin(lens1.index)]
     gff2 = gff2[gff2['chr'].isin(lens2.index)]
     blast = base.newblast(self.blast, int(self.score), float(self.evalue),
                           gff1, gff2, self.blast_reverse)
     blast = self.blast_homo(blast, gff1, gff2, int(self.repeat_number))
     blast.index = blast[0] + ',' + blast[1]
     collinearity = self.auto_file(gff1, gff2)
     ks = base.read_ks(self.ks, self.ks_col)
     data = self.block_position(collinearity, blast, gff1, gff2, ks)
     data['class1'] = 0
     data['class2'] = 0
     data.to_csv(self.savefile, index=None)
Esempio n. 14
0
 def run(self):
     gff = base.newgff(self.gff)
     lens = base.newlens(self.lens, self.position)
     gff = gff[gff['chr'].isin(lens.index)]
     alignment = pd.read_csv(self.alignment, header=None, index_col=0)
     alignment = alignment.join(gff[['chr', self.position]], how='left')
     self.retain = self.align_chr(alignment)
     self.retain[self.retain.columns[:-2]].to_csv(self.savefile,
                                                  sep='\t',
                                                  header=None)
     fig, axs = plt.subplots(len(lens),
                             1,
                             sharex=True,
                             sharey=True,
                             figsize=tuple(self.figsize))
     fig.add_subplot(111, frameon=False)
     align = dict(family='Arial',
                  verticalalignment="center",
                  horizontalalignment="center")
     plt.ylabel(self.ylabel + '\n\n\n\n', fontsize=18, **align)
     for spine in plt.gca().spines.values():
         spine.set_visible(False)
     plt.tick_params(top=False,
                     bottom=False,
                     left=False,
                     right=False,
                     labelleft=False,
                     labelbottom=False)
     groups = self.retain.groupby(['chr'])
     for i in range(len(lens)):
         group = groups.get_group(lens.index[i])
         for j in self.retain.columns[:-2]:
             axs[i].plot(group['order'].values,
                         group[j].values,
                         linestyle='-',
                         color=self.colors[j - 1],
                         linewidth=1)
         axs[i].spines['right'].set_visible(False)
         axs[i].spines['top'].set_visible(False)
         axs[i].set_ylim(self.ylim)
         axs[i].tick_params(labelsize=12)
     align = dict(family='Arial',
                  verticalalignment="center",
                  horizontalalignment="left")
     for i in range(len(lens)):
         x, y = axs[i].get_xlim()[1] * 0.90, axs[i].get_ylim()[1] * 0.5
         axs[i].text(x,
                     y,
                     self.refgenome + str(lens.index[i]),
                     fontsize=14,
                     **align)
     plt.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.05)
     plt.savefig(self.savefig, dpi=500)
     plt.show()
     sys.exit(0)
Esempio n. 15
0
 def run(self):
     alignment = pd.read_csv(self.alignment, header=None, index_col=0)
     alignment.replace('\w+', 1, regex=True, inplace=True)
     alignment.replace('.', 0, inplace=True)
     alignment.fillna(0, inplace=True)
     gff = base.newgff(self.gff)
     lens = base.newlens(self.lens, self.position)
     gff = gff[gff['chr'].isin(lens.index)]
     alignment = alignment.join(gff[['chr', self.position]], how='left')
     alignment.dropna(axis=0, how='any', inplace=True)
     p = self.cal_pindex(alignment)
     print('Polyploidy-index: ', p)
     sys.exit(0)
Esempio n. 16
0
 def run(self):
     bkinfo = pd.read_csv(self.blockinfo, index_col='id')
     bkinfo['chr1'] = bkinfo['chr1'].astype(str)
     bkinfo['chr2'] = bkinfo['chr2'].astype(str)
     if self.blockinfo_reverse == True or self.blockinfo_reverse.upper(
     ) == 'TRUE':
         bkinfo[['chr1', 'chr2']] = bkinfo[['chr2', 'chr1']]
         bkinfo[['block1', 'block2']] = bkinfo[['block2', 'block1']]
     bkinfo = bkinfo[bkinfo['length'] > int(self.block_length)]
     bkinfo['class1'] = ''
     bkinfo['col1'] = ''
     bkinfo['class2'] = ''
     bkinfo['col2'] = ''
     gff1 = base.newgff(self.gff1)
     gff2 = base.newgff(self.gff2)
     lens = base.newlens(self.the_other_lens, self.position)
     blast = base.newblast(self.blast, int(self.score), float(self.evalue),
                           gff1, gff2, self.blast_reverse)
     index = [
         group.sort_values(
             by=11,
             ascending=False)[:int(self.repeat_number)].index.tolist()
         for name, group in blast.groupby([0])
     ]
     blast = blast.loc[np.concatenate(
         np.
         array([k[:int(self.repeat_number)] for k in index], dtype=object)),
                       [0, 1]]
     pairs = self.colinear_gene_pairs(bkinfo, gff1, gff2)
     data = []
     for k, v in lens.items():
         for i in range(1, v + 1):
             data.append([k, i, 0])
     df = pd.DataFrame(data)
     gff1['color'] = np.nan
     gff2['color'] = np.nan
     gff1['classification'] = np.nan
     gff2['classification'] = np.nan
     if hasattr(self, 'ancestor_top'):
         ancestor = base.read_calassfication(self.ancestor_top)
         data = self.karyotype_top(pairs, ancestor, gff1, gff2)
     if hasattr(self, 'ancestor_left'):
         ancestor = base.read_calassfication(self.ancestor_left)
         data = self.karyotype_left(pairs, ancestor, gff1, gff2)
     the_other_ancestor_file = self.karyotype_map(data, lens)
     the_other_ancestor_file = self.new_ancestor(the_other_ancestor_file,
                                                 gff1, gff2, blast)
     the_other_ancestor_file.to_csv(self.the_other_ancestor_file,
                                    sep='\t',
                                    header=False,
                                    index=False)
Esempio n. 17
0
 def run(self):
     alignment = pd.read_csv(self.alignment, header=None)
     alignment.replace('.', np.nan, inplace=True)
     gff = base.newgff(self.gff)
     lens = base.newlens(self.lens, self.position)
     gff = gff[gff['chr'].isin(lens.index)]
     alignment.dropna(thresh=3, inplace=True)
     alignment = pd.merge(
         alignment, gff[['chr', self.position]], left_on=0, right_on=gff.index, how='left')
     data = self.grouping(alignment)
     data = [k+'.treefile' for k in data]
     fout = open(self.trees_file, 'w')
     fout.close()
     for i in range(0, len(data), 100):
         trees = ' '.join([str(k) for k in data[i:i+100]])
         args = ['cat', trees, '>>', self.trees_file]
         command = ' '.join([str(k) for k in args])
         os.system(command)
     df = pd.read_csv(self.trees_file, header=None, sep='\t')
     df[1] = data
     df[[1, 0]].to_csv(self.trees_file, index=None, sep='\t', header=False)
Esempio n. 18
0
    def run(self):
        fig = plt.figure(figsize=(tuple(self.figsize)))
        root = plt.axes([0, 0, 1, 1])
        mpl.rcParams['agg.path.chunksize'] = 100000000
        lens = base.newlens(self.lens, self.position)
        radius, angle_gap = float(self.radius), float(self.angle_gap)
        angle = (2 * np.pi -
                 (int(len(lens)) + 1.5) * angle_gap) / (int(lens.sum()))
        loc_chr = self.chr_loction(lens, angle_gap, angle)
        list_colors = [str(k).strip() for k in re.split(',|:', self.colors)]
        chr_color = dict(zip(list_colors[::2], list_colors[1::2]))
        for k in loc_chr:
            start, end = loc_chr[k]
            self.Wedge(root, (0.0, 0.0), radius + self.ring_width,
                       start * 180 / np.pi, end * 180 / np.pi,
                       self.ring_width * 0.3, chr_color[k], 0.9)
        gff = base.newgff(self.gff)
        if hasattr(self, 'ancestor'):
            ancestor = pd.read_csv(self.ancestor, sep='\t', header=None)
            al = pd.read_csv(self.ancestor_location, sep='\t', header=None)
            al.rename(columns={
                0: 'chr',
                1: 'start',
                2: 'end',
                3: 'color'
            },
                      inplace=True)
            al['chr'] = al['chr'].astype(str)
            data = self.deal_ancestor(ancestor, gff, lens, loc_chr, angle, al)
            self.plot_collinearity(data, radius, lw=0.1, alpha=0.8)

        if hasattr(self, 'alignment'):
            alignment = pd.read_csv(self.alignment, sep='\t', header=None)
            newalignment = self.deal_alignment(alignment, gff, lens, loc_chr,
                                               angle)
            names = [str(k) for k in self.column_names.split(',')]
            n = 0
            align = dict(family='Arial',
                         verticalalignment="center",
                         horizontalalignment="center")
            for k, v in enumerate(newalignment.columns[1:-2]):
                r = radius + self.ring_width * (k + 1)
                self.plot_circle(loc_chr, r, lw=0.5, alpha=1, color='grey')
                self.plot_bar(newalignment[[v, 'rad']],
                              r + self.ring_width * 0.15,
                              self.ring_width * 0.7, 0.15, chr_color, 1)
                if n % 2 == 0:
                    loc = 0.05
                    x, y = (r + self.ring_width * 0.5) * np.cos(loc), (
                        r + self.ring_width * 0.5) * np.sin(loc)
                    plt.text(x,
                             y,
                             names[n],
                             rotation=loc * 180 / np.pi,
                             fontsize=10,
                             **align)
                else:
                    loc = -0.08
                    x, y = (r + self.ring_width * 0.5) * np.cos(loc), (
                        r + self.ring_width * 0.5) * np.sin(loc)
                    plt.text(x,
                             y,
                             names[n],
                             fontsize=10,
                             rotation=loc * 180 / np.pi,
                             **align)
                n += 1
        labels = self.chr_label + lens.index
        labels = dict(zip(lens.index, labels))
        self.plot_labels(root,
                         labels,
                         loc_chr,
                         radius - self.ring_width * 0.3,
                         fontsize=self.label_size)
        root.set_xlim(-1, 1)
        root.set_ylim(-1.05, 0.95)
        root.set_axis_off()
        plt.savefig(self.savefig, dpi=500)
        plt.show()
        sys.exit(0)
Esempio n. 19
0
 def run(self):
     length = 1
     axis = [0, 1, 1, 0]
     left, right, top, bottom = 0.07, 0.97, 0.93, 0.03
     lens1 = base.newlens(self.lens1, self.position)
     lens2 = base.newlens(self.lens2, self.position)
     step1 = 1 / float(lens1.sum())
     step2 = 1 / float(lens2.sum())
     if self.ancestor_left != None:
         axis[0] = -0.02
         lens_ancestor_left = pd.read_csv(self.ancestor_left,
                                          sep="\t",
                                          header=None)
         lens_ancestor_left[0] = lens_ancestor_left[0].astype(str)
         lens_ancestor_left[3] = lens_ancestor_left[3].astype(str)
         lens_ancestor_left[4] = lens_ancestor_left[4].astype(int)
         lens_ancestor_left[
             4] = lens_ancestor_left[4] / lens_ancestor_left[4].max()
         lens_ancestor_left = lens_ancestor_left[lens_ancestor_left[0].isin(
             lens1.index)]
     if self.ancestor_top != None:
         axis[3] = -0.02
         lens_ancestor_top = pd.read_csv(self.ancestor_top,
                                         sep="\t",
                                         header=None)
         lens_ancestor_top[0] = lens_ancestor_top[0].astype(str)
         lens_ancestor_top[3] = lens_ancestor_top[3].astype(str)
         lens_ancestor_top[4] = lens_ancestor_top[4].astype(int)
         lens_ancestor_top[
             4] = lens_ancestor_top[4] / lens_ancestor_top[4].max()
         lens_ancestor_top = lens_ancestor_top[lens_ancestor_top[0].isin(
             lens2.index)]
     if re.search('\d', self.figsize):
         self.figsize = [float(k) for k in self.figsize.split(',')]
     else:
         self.figsize = np.array(
             [1, float(lens1.sum()) / float(lens2.sum())]) * 10
     plt.rcParams['ytick.major.pad'] = 0
     fig, ax = plt.subplots(figsize=self.figsize)
     ax.xaxis.set_ticks_position('top')
     base.dotplot_frame(fig, ax, lens1, lens2, step1, step2,
                        self.genome1_name, self.genome2_name,
                        [axis[0], axis[3]])
     gff1 = base.newgff(self.gff1)
     gff2 = base.newgff(self.gff2)
     gff1 = base.gene_location(gff1, lens1, step1, self.position)
     gff2 = base.gene_location(gff2, lens2, step2, self.position)
     if self.ancestor_top != None:
         top = top
         self.ancestor_posion(ax, gff2, lens_ancestor_top, 'top')
     if self.ancestor_left != None:
         left = left
         self.ancestor_posion(ax, gff1, lens_ancestor_left, 'left')
     blast = base.newblast(self.blast, int(self.score), float(self.evalue),
                           gff1, gff2, self.blast_reverse)
     df = self.pair_positon(blast, gff1, gff2, int(self.multiple),
                            int(self.repeat_number))
     ax.scatter(df['loc2'],
                df['loc1'],
                s=float(self.markersize),
                c=df['color'],
                alpha=0.5,
                edgecolors=None,
                linewidths=0,
                marker='o')
     ax.axis(axis)
     plt.subplots_adjust(left=left, right=right, top=top, bottom=bottom)
     plt.savefig(self.savefig, dpi=500)
     plt.show()
     sys.exit(0)
Esempio n. 20
0
    def run(self):
        fig, ax = plt.subplots(figsize=self.figsize)
        mpl.rcParams['agg.path.chunksize'] = 100000000
        lens = base.newlens(self.lens, self.position)
        radius, angle_gap = float(self.radius), float(self.angle_gap)
        angle = (2 * np.pi -
                 (int(len(lens)) + 1.5) * angle_gap) / (int(lens.sum()))
        loc_chr = self.chr_location(lens, angle_gap, angle)
        list_colors = [str(k).strip() for k in re.split(',|:', self.colors)]
        chr_color = dict(zip(list_colors[::2], list_colors[1::2]))
        gff = base.newgff(self.gff)
        if hasattr(self, 'ancestor'):
            ancestor = pd.read_csv(self.ancestor, header=None)
            al = pd.read_csv(self.ancestor_location, sep='\t', header=None)
            al.rename(columns={
                0: 'chr',
                1: 'start',
                2: 'end',
                3: 'color'
            },
                      inplace=True)
            al['chr'] = al['chr'].astype(str)
            data = self.deal_ancestor(ancestor, gff, lens, loc_chr, angle, al)
            self.plot_collinearity(data, radius, lw=0.1, alpha=0.8)

        if hasattr(self, 'alignment'):
            alignment = pd.read_csv(self.alignment, header=None)
            newalignment = self.deal_alignment(alignment, gff, lens, loc_chr,
                                               angle)
            if ',' in self.column_names:
                names = [str(k) for k in self.column_names.split(',')]
            else:
                names = [None] * len(newalignment.columns)
            n = 0
            align = dict(family='Arial',
                         verticalalignment="center",
                         horizontalalignment="center")
            for k, v in enumerate(newalignment.columns[1:-2]):
                r = radius + self.ring_width * (k + 1)
                self.plot_circle(loc_chr, r, lw=0.5, alpha=1, color='grey')
                self.plot_bar(newalignment[[v, 'rad']],
                              r + self.ring_width * 0.15,
                              self.ring_width * 0.7, 0.15, chr_color, 1)
                if n % 2 == 0:
                    loc = 0.05
                    x, y = (r+self.ring_width*0.5) * \
                        np.cos(loc), (r+self.ring_width*0.5) * np.sin(loc)
                    plt.text(x,
                             y,
                             names[n],
                             rotation=loc * 180 / np.pi,
                             fontsize=self.label_size,
                             **align)
                else:
                    loc = -0.08
                    x, y = (r+self.ring_width*0.5) * \
                        np.cos(loc), (r+self.ring_width*0.5) * np.sin(loc)
                    plt.text(x,
                             y,
                             names[n],
                             fontsize=self.label_size,
                             rotation=loc * 180 / np.pi,
                             **align)
                n += 1
        if hasattr(self, 'ancestor'):
            colors = al['color'].drop_duplicates().values.tolist()
            ancestor_chr_color = dict(zip(range(1, len(colors) + 1), colors))
            self.plot_legend(ax, ancestor_chr_color, self.legend_square[0],
                             self.legend_square[1])
        if hasattr(self, 'alignment'):
            del chr_color['nan']
            self.plot_legend(ax, chr_color, self.legend_square[0],
                             self.legend_square[1])
        labels = self.chr_label + lens.index
        labels = dict(zip(lens.index, labels))
        self.plot_labels(ax,
                         labels,
                         loc_chr,
                         radius + self.ring_width * 0.3,
                         fontsize=self.label_size)

        plt.axis('off')
        a = (ax.get_ylim()[1]-ax.get_ylim()[0]) / \
            (ax.get_xlim()[1]-ax.get_xlim()[0])
        fig.set_size_inches(self.figsize[0], self.figsize[0] * a, forward=True)
        plt.savefig(self.savefig, dpi=500)
        plt.show()
        sys.exit(0)