def masterd4nonSp(prefix): F = fasta(prefix + ".fasta") ax1, ax2, ax3, nvec, allax = parse_eigenvectors(prefix) pdbs, ind = get_gmfile(prefix) equi = equi2dict(prefix) ld = list2dict() groupset = set(ld.values()) dgroup = {} if pdbs and nvec == ind: for e in range(len(pdbs)): group = ld[equi[pdbs[e]]] if not group in dgroup: dgroup[group] = [] if dpc: dgroup[group].append((F.chains[pdbs[e][:4]], allax[e][X - 1], allax[e][Y - 1], ax3[e])) else: dgroup[group].append((F.chains[pdbs[e][:4]], ax1[e], ax2[e], ax3[e])) return dgroup, groupset else: print "Something is wrong with the files. Please check them and try again" sys.exit()
def masterd4nonSp(prefix): F = fasta(prefix + '.fasta') ax1, ax2, ax3, nvec, allax = parse_eigenvectors(prefix) pdbs, ind = get_gmfile(prefix) equi = equi2dict(prefix) ld = list2dict() groupset = set(ld.values()) dgroup = {} if pdbs and nvec == ind: for e in range(len(pdbs)): group = ld[equi[pdbs[e]]] if not group in dgroup: dgroup[group] = [] if dpc: dgroup[group].append((F.chains[pdbs[e][:4]], allax[e][X - 1], allax[e][Y - 1], ax3[e])) else: dgroup[group].append( (F.chains[pdbs[e][:4]], ax1[e], ax2[e], ax3[e])) return dgroup, groupset else: print 'Something is wrong with the files. Please check them and try again' sys.exit()
def number_sps(prefix, db, level, remote): """ read a fasta and gm file file, and return a dictiorary the taxonomic level (as in domain, class, etc...) as keys and the sp as values. Also will return a set with the different groups. """ print "You choose taxonomic level %d:" % (level) print "\t1 normally is superkingdom or domain" print "\t>1 depends on the lineage reported in NCBI" if not remote: print "\nGoing locally:" # import dictionary P = Popen("echo $BLASTDB", shell=True, stderr=PIPE, stdout=PIPE) PATH = P.communicate()[0].strip() print "\tImporting dictionary with species and lineage from %s." % (PATH) print "\tIf the path above is blank, set the $BLASTDB environment." " Make sure that the TaxDB.bin is in such path" current = time.time() d = pickle.load(open("%s/TaxDB.bin" % (PATH))) print "\tTime elapsed to load the dictionary: %f" % (time.time() - current) dgroup = {} temp = [] # spslist=[] F = fasta(prefix + ".fasta") ax1, ax2, ax3, nvec, allax = parse_eigenvectors(prefix) # if len(F.n) != nvec: # print 'Some individuals are missing.' print "Looking for the final gm file." pdbs, ind = get_gmfile(prefix) if pdbs and nvec == ind: count = 1 for e in range(len(pdbs)): if remote: # print 'Sit thight, this will take a while, since it will go remotely to the NCBI taxnonomy database.'\ # ' Remember to run this in a weekend to avoid be blacklisted, or just because is going to take very'\ # ' long.' while 1: try: group = entrez_query(db, F.chains[pdbs[e][:-1]], level) break except: time.sleep(10) else: group = get_sp_locally(pdbs[e], d, level) group = sentence_case(group) if not group in dgroup: dgroup[group] = [] if dpc: dgroup[group].append((F.chains[pdbs[e][:4]], allax[e][X - 1], allax[e][Y - 1], ax3[e])) else: dgroup[group].append((F.chains[pdbs[e][:4]], ax1[e], ax2[e], ax3[e])) temp.append(group) # dgroup[group].sort() # spslist.append(F.chains[e]) count += 1 s = set(temp) return dgroup, s else: print "Something is wrong with the files. Please check them and try again" sys.exit()
def plot(prefix, masterd, fontsize, symlog, threeD): """ will plot two axes of the PCoA, colored by taxonomic level """ fil = open(prefix + "_PCoA_axes%d_%d.equivalences" % (X, Y), "w") F = fasta(prefix + ".fasta") markers = [ "k.", "b+", "g*", "r.", "c+", "m*", "y.", "k+", "b*", "g.", "r+", "c*", "m.", "y+", "k*", "b.", "g+", "r*", "c.", "m+", "y*", ] count = 0 c = 0 fig = plt.figure() # figsize=(6.83 , 9.19), dpi=300) ax = fig.add_subplot(111) if threeD: fig3D = plt.figure() ax3D = fig3D.gca(projection="3d") ax.spines["top"].set_color("none") ax.xaxis.tick_bottom() ax.spines["right"].set_color("none") ax.yaxis.tick_left() for k in masterd: x = masterd[k][1] y = masterd[k][2] z = masterd[k][3] ax.plot(x, y, markers[c], label=k) if threeD: ax3D.plot(x, y, z, markers[c], label=k) c += 1 f = 0 for e in range(len(masterd[k][1])): ax.annotate(count, (x[f] + 0.1, y[f] + 0.1), fontsize=fontsize) if threeD: ax3D.text(x[f] + 0.1, y[f] + 0.1, z[f] + 0.1, str(count), fontsize=fontsize) fil.write(masterd[k][0][e] + "\t" + find_key(F.chains, masterd[k][0][e]) + "\t" + str(count) + "\n") count += 1 f += 1 if symlog: ax.set_xscale("symlog") ax.set_yscale("symlog") ax.set_xlabel("Axis %d (symmetrical log)" % (X), fontsize=fontsize) ax.set_ylabel("Axis %d (symmetrical log)" % (Y), fontsize=fontsize) else: ax.set_xlabel("Axis %d" % (X), fontsize=fontsize) ax.set_ylabel("Axis %d" % (Y), fontsize=fontsize) ax.legend(loc=0, fancybox=True, shadow=True) # bbox_to_anchor=(0.5, 1.1), ncol=4, fig.tight_layout() if threeD: ax3D.set_xlabel("Axis 1", fontsize=fontsize) ax3D.set_ylabel("Axis 2", fontsize=fontsize) ax3D.set_zlabel("Axis 3", fontsize=fontsize) ax3D.view_init(30, 45) fig3D.tight_layout() ax3D.legend(loc=0, fancybox=True, shadow=True) # bbox_to_anchor=(0.5, -0.075),ncol=4, plt.show() fig.savefig(prefix + "_Axis%d_%dPCoA.png" % (X, Y), dpi=300) if threeD: fig3D.savefig(prefix + "_3AxesPCoA.png", dpi=300) fil.close()
def number_sps(prefix, db, level, remote): ''' read a fasta and gm file file, and return a dictiorary the taxonomic level (as in domain, class, etc...) as keys and the sp as values. Also will return a set with the different groups. ''' print 'You choose taxonomic level %d:' % (level) print '\t1 normally is superkingdom or domain' print '\t>1 depends on the lineage reported in NCBI' if not remote: print "\nGoing locally:" # import dictionary P = Popen('echo $BLASTDB', shell=True, stderr=PIPE, stdout=PIPE) PATH = P.communicate()[0].strip() print '\tImporting dictionary with species and lineage from %s.' % ( PATH) print '\tIf the path above is blank, set the $BLASTDB environment.'\ ' Make sure that the TaxDB.bin is in such path' current = time.time() d = pickle.load(open('%s/TaxDB.bin' % (PATH))) print '\tTime elapsed to load the dictionary: %f' % (time.time() - current) dgroup = {} temp = [] #spslist=[] F = fasta(prefix + '.fasta') ax1, ax2, ax3, nvec, allax = parse_eigenvectors(prefix) #if len(F.n) != nvec: #print 'Some individuals are missing.' print 'Looking for the final gm file.' pdbs, ind = get_gmfile(prefix) if pdbs and nvec == ind: count = 1 for e in range(len(pdbs)): if remote: #print 'Sit thight, this will take a while, since it will go remotely to the NCBI taxnonomy database.'\ # ' Remember to run this in a weekend to avoid be blacklisted, or just because is going to take very'\ # ' long.' while 1: try: group = entrez_query(db, F.chains[pdbs[e][:-1]], level) break except: time.sleep(10) else: group = get_sp_locally(pdbs[e], d, level) group = sentence_case(group) if not group in dgroup: dgroup[group] = [] if dpc: dgroup[group].append((F.chains[pdbs[e][:4]], allax[e][X - 1], allax[e][Y - 1], ax3[e])) else: dgroup[group].append( (F.chains[pdbs[e][:4]], ax1[e], ax2[e], ax3[e])) temp.append(group) #dgroup[group].sort() #spslist.append(F.chains[e]) count += 1 s = set(temp) return dgroup, s else: print 'Something is wrong with the files. Please check them and try again' sys.exit()
def plot(prefix, masterd, fontsize, symlog, threeD): ''' will plot two axes of the PCoA, colored by taxonomic level ''' fil = open(prefix + '_PCoA_axes%d_%d.equivalences' % (X, Y), 'w') F = fasta(prefix + '.fasta') markers = [ 'k.', 'b+', 'g*', 'r.', 'c+', 'm*', 'y.', 'k+', 'b*', 'g.', 'r+', 'c*', 'm.', 'y+', 'k*', 'b.', 'g+', 'r*', 'c.', 'm+', 'y*' ] count = 0 c = 0 fig = plt.figure() #figsize=(6.83 , 9.19), dpi=300) ax = fig.add_subplot(111) if threeD: fig3D = plt.figure() ax3D = fig3D.gca(projection='3d') ax.spines['top'].set_color('none') ax.xaxis.tick_bottom() ax.spines['right'].set_color('none') ax.yaxis.tick_left() for k in masterd: x = masterd[k][1] y = masterd[k][2] z = masterd[k][3] ax.plot(x, y, markers[c], label=k) if threeD: ax3D.plot(x, y, z, markers[c], label=k) c += 1 f = 0 for e in range(len(masterd[k][1])): ax.annotate(count, (x[f] + 0.1, y[f] + 0.1), fontsize=fontsize) if threeD: ax3D.text(x[f] + 0.1, y[f] + 0.1, z[f] + 0.1, str(count), fontsize=fontsize) fil.write(masterd[k][0][e] + '\t' + find_key(F.chains, masterd[k][0][e]) + '\t' + str(count) + '\n') count += 1 f += 1 if symlog: ax.set_xscale("symlog") ax.set_yscale("symlog") ax.set_xlabel('Axis %d (symmetrical log)' % (X), fontsize=fontsize) ax.set_ylabel('Axis %d (symmetrical log)' % (Y), fontsize=fontsize) else: ax.set_xlabel('Axis %d' % (X), fontsize=fontsize) ax.set_ylabel('Axis %d' % (Y), fontsize=fontsize) ax.legend( loc=0, #bbox_to_anchor=(0.5, 1.1), ncol=4, fancybox=True, shadow=True) fig.tight_layout() if threeD: ax3D.set_xlabel('Axis 1', fontsize=fontsize) ax3D.set_ylabel('Axis 2', fontsize=fontsize) ax3D.set_zlabel('Axis 3', fontsize=fontsize) ax3D.view_init(30, 45) fig3D.tight_layout() ax3D.legend( loc=0, #bbox_to_anchor=(0.5, -0.075),ncol=4, fancybox=True, shadow=True) plt.show() fig.savefig(prefix + '_Axis%d_%dPCoA.png' % (X, Y), dpi=300) if threeD: fig3D.savefig(prefix + '_3AxesPCoA.png', dpi=300) fil.close()