Ejemplo n.º 1
0
def masterd4nonSp(prefix):
    F = fasta(prefix + ".fasta")
    ax1, ax2, ax3, nvec, allax = parse_eigenvectors(prefix)
    pdbs, ind = get_gmfile(prefix)
    equi = equi2dict(prefix)
    ld = list2dict()
    groupset = set(ld.values())
    dgroup = {}
    if pdbs and nvec == ind:
        for e in range(len(pdbs)):
            group = ld[equi[pdbs[e]]]
            if not group in dgroup:
                dgroup[group] = []
            if dpc:
                dgroup[group].append((F.chains[pdbs[e][:4]], allax[e][X - 1], allax[e][Y - 1], ax3[e]))
            else:
                dgroup[group].append((F.chains[pdbs[e][:4]], ax1[e], ax2[e], ax3[e]))
        return dgroup, groupset
    else:
        print "Something is wrong with the files. Please check them and try again"
        sys.exit()
Ejemplo n.º 2
0
def masterd4nonSp(prefix):
    F = fasta(prefix + '.fasta')
    ax1, ax2, ax3, nvec, allax = parse_eigenvectors(prefix)
    pdbs, ind = get_gmfile(prefix)
    equi = equi2dict(prefix)
    ld = list2dict()
    groupset = set(ld.values())
    dgroup = {}
    if pdbs and nvec == ind:
        for e in range(len(pdbs)):
            group = ld[equi[pdbs[e]]]
            if not group in dgroup:
                dgroup[group] = []
            if dpc:
                dgroup[group].append((F.chains[pdbs[e][:4]], allax[e][X - 1],
                                      allax[e][Y - 1], ax3[e]))
            else:
                dgroup[group].append(
                    (F.chains[pdbs[e][:4]], ax1[e], ax2[e], ax3[e]))
        return dgroup, groupset
    else:
        print 'Something is wrong with the files. Please check them and try again'
        sys.exit()
Ejemplo n.º 3
0
def number_sps(prefix, db, level, remote):
    """
	read a fasta and gm file file, and return a dictiorary the taxonomic level 
	(as in domain, class, etc...) as keys and the sp as values. 
	Also will return a set with the different groups.
	"""
    print "You choose taxonomic level %d:" % (level)
    print "\t1 normally is superkingdom or domain"
    print "\t>1 depends on the lineage reported in NCBI"
    if not remote:
        print "\nGoing locally:"
        # import dictionary
        P = Popen("echo $BLASTDB", shell=True, stderr=PIPE, stdout=PIPE)
        PATH = P.communicate()[0].strip()
        print "\tImporting dictionary with species and lineage from %s." % (PATH)
        print "\tIf the path above is blank, set the $BLASTDB environment." " Make sure that the TaxDB.bin is in such path"
        current = time.time()
        d = pickle.load(open("%s/TaxDB.bin" % (PATH)))
        print "\tTime elapsed to load the dictionary: %f" % (time.time() - current)
    dgroup = {}
    temp = []
    # spslist=[]
    F = fasta(prefix + ".fasta")
    ax1, ax2, ax3, nvec, allax = parse_eigenvectors(prefix)
    # if len(F.n) != nvec:
    # print 'Some individuals are missing.'
    print "Looking for the final gm file."
    pdbs, ind = get_gmfile(prefix)
    if pdbs and nvec == ind:
        count = 1
        for e in range(len(pdbs)):
            if remote:
                # print 'Sit thight, this will take a while, since it will go remotely to the NCBI taxnonomy database.'\
                #      ' Remember to run this in a weekend to avoid be blacklisted, or just because is going to take very'\
                #      ' long.'
                while 1:
                    try:
                        group = entrez_query(db, F.chains[pdbs[e][:-1]], level)
                        break
                    except:
                        time.sleep(10)
            else:
                group = get_sp_locally(pdbs[e], d, level)

            group = sentence_case(group)

            if not group in dgroup:
                dgroup[group] = []
            if dpc:
                dgroup[group].append((F.chains[pdbs[e][:4]], allax[e][X - 1], allax[e][Y - 1], ax3[e]))
            else:
                dgroup[group].append((F.chains[pdbs[e][:4]], ax1[e], ax2[e], ax3[e]))
            temp.append(group)
            # dgroup[group].sort()
            # spslist.append(F.chains[e])
            count += 1
        s = set(temp)
        return dgroup, s
    else:
        print "Something is wrong with the files. Please check them and try again"
        sys.exit()
Ejemplo n.º 4
0
def plot(prefix, masterd, fontsize, symlog, threeD):
    """ will plot two axes of the PCoA, colored by taxonomic level """
    fil = open(prefix + "_PCoA_axes%d_%d.equivalences" % (X, Y), "w")
    F = fasta(prefix + ".fasta")
    markers = [
        "k.",
        "b+",
        "g*",
        "r.",
        "c+",
        "m*",
        "y.",
        "k+",
        "b*",
        "g.",
        "r+",
        "c*",
        "m.",
        "y+",
        "k*",
        "b.",
        "g+",
        "r*",
        "c.",
        "m+",
        "y*",
    ]
    count = 0
    c = 0
    fig = plt.figure()  # figsize=(6.83 , 9.19), dpi=300)
    ax = fig.add_subplot(111)
    if threeD:
        fig3D = plt.figure()
        ax3D = fig3D.gca(projection="3d")
    ax.spines["top"].set_color("none")
    ax.xaxis.tick_bottom()
    ax.spines["right"].set_color("none")
    ax.yaxis.tick_left()
    for k in masterd:
        x = masterd[k][1]
        y = masterd[k][2]
        z = masterd[k][3]
        ax.plot(x, y, markers[c], label=k)
        if threeD:
            ax3D.plot(x, y, z, markers[c], label=k)
        c += 1
        f = 0
        for e in range(len(masterd[k][1])):
            ax.annotate(count, (x[f] + 0.1, y[f] + 0.1), fontsize=fontsize)
            if threeD:
                ax3D.text(x[f] + 0.1, y[f] + 0.1, z[f] + 0.1, str(count), fontsize=fontsize)
            fil.write(masterd[k][0][e] + "\t" + find_key(F.chains, masterd[k][0][e]) + "\t" + str(count) + "\n")
            count += 1
            f += 1
    if symlog:
        ax.set_xscale("symlog")
        ax.set_yscale("symlog")
        ax.set_xlabel("Axis %d (symmetrical log)" % (X), fontsize=fontsize)
        ax.set_ylabel("Axis %d (symmetrical log)" % (Y), fontsize=fontsize)
    else:
        ax.set_xlabel("Axis %d" % (X), fontsize=fontsize)
        ax.set_ylabel("Axis %d" % (Y), fontsize=fontsize)
    ax.legend(loc=0, fancybox=True, shadow=True)  # bbox_to_anchor=(0.5, 1.1), ncol=4,
    fig.tight_layout()
    if threeD:
        ax3D.set_xlabel("Axis 1", fontsize=fontsize)
        ax3D.set_ylabel("Axis 2", fontsize=fontsize)
        ax3D.set_zlabel("Axis 3", fontsize=fontsize)
        ax3D.view_init(30, 45)
        fig3D.tight_layout()
        ax3D.legend(loc=0, fancybox=True, shadow=True)  # bbox_to_anchor=(0.5, -0.075),ncol=4,
    plt.show()
    fig.savefig(prefix + "_Axis%d_%dPCoA.png" % (X, Y), dpi=300)
    if threeD:
        fig3D.savefig(prefix + "_3AxesPCoA.png", dpi=300)
    fil.close()
Ejemplo n.º 5
0
def number_sps(prefix, db, level, remote):
    '''
	read a fasta and gm file file, and return a dictiorary the taxonomic level 
	(as in domain, class, etc...) as keys and the sp as values. 
	Also will return a set with the different groups.
	'''
    print 'You choose taxonomic level %d:' % (level)
    print '\t1 normally is superkingdom or domain'
    print '\t>1 depends on the lineage reported in NCBI'
    if not remote:
        print "\nGoing locally:"
        # import dictionary
        P = Popen('echo $BLASTDB', shell=True, stderr=PIPE, stdout=PIPE)
        PATH = P.communicate()[0].strip()
        print '\tImporting dictionary with species and lineage from %s.' % (
            PATH)
        print '\tIf the path above is blank, set the $BLASTDB environment.'\
              ' Make sure that the TaxDB.bin is in such path'
        current = time.time()
        d = pickle.load(open('%s/TaxDB.bin' % (PATH)))
        print '\tTime elapsed to load the dictionary: %f' % (time.time() -
                                                             current)
    dgroup = {}
    temp = []
    #spslist=[]
    F = fasta(prefix + '.fasta')
    ax1, ax2, ax3, nvec, allax = parse_eigenvectors(prefix)
    #if len(F.n) != nvec:
    #print 'Some individuals are missing.'
    print 'Looking for the final gm file.'
    pdbs, ind = get_gmfile(prefix)
    if pdbs and nvec == ind:
        count = 1
        for e in range(len(pdbs)):
            if remote:
                #print 'Sit thight, this will take a while, since it will go remotely to the NCBI taxnonomy database.'\
                #      ' Remember to run this in a weekend to avoid be blacklisted, or just because is going to take very'\
                #      ' long.'
                while 1:
                    try:
                        group = entrez_query(db, F.chains[pdbs[e][:-1]], level)
                        break
                    except:
                        time.sleep(10)
            else:
                group = get_sp_locally(pdbs[e], d, level)

            group = sentence_case(group)

            if not group in dgroup:
                dgroup[group] = []
            if dpc:
                dgroup[group].append((F.chains[pdbs[e][:4]], allax[e][X - 1],
                                      allax[e][Y - 1], ax3[e]))
            else:
                dgroup[group].append(
                    (F.chains[pdbs[e][:4]], ax1[e], ax2[e], ax3[e]))
            temp.append(group)
            #dgroup[group].sort()
            #spslist.append(F.chains[e])
            count += 1
        s = set(temp)
        return dgroup, s
    else:
        print 'Something is wrong with the files. Please check them and try again'
        sys.exit()
Ejemplo n.º 6
0
def plot(prefix, masterd, fontsize, symlog, threeD):
    ''' will plot two axes of the PCoA, colored by taxonomic level '''
    fil = open(prefix + '_PCoA_axes%d_%d.equivalences' % (X, Y), 'w')
    F = fasta(prefix + '.fasta')
    markers = [
        'k.', 'b+', 'g*', 'r.', 'c+', 'm*', 'y.', 'k+', 'b*', 'g.', 'r+', 'c*',
        'm.', 'y+', 'k*', 'b.', 'g+', 'r*', 'c.', 'm+', 'y*'
    ]
    count = 0
    c = 0
    fig = plt.figure()  #figsize=(6.83 , 9.19), dpi=300)
    ax = fig.add_subplot(111)
    if threeD:
        fig3D = plt.figure()
        ax3D = fig3D.gca(projection='3d')
    ax.spines['top'].set_color('none')
    ax.xaxis.tick_bottom()
    ax.spines['right'].set_color('none')
    ax.yaxis.tick_left()
    for k in masterd:
        x = masterd[k][1]
        y = masterd[k][2]
        z = masterd[k][3]
        ax.plot(x, y, markers[c], label=k)
        if threeD:
            ax3D.plot(x, y, z, markers[c], label=k)
        c += 1
        f = 0
        for e in range(len(masterd[k][1])):
            ax.annotate(count, (x[f] + 0.1, y[f] + 0.1), fontsize=fontsize)
            if threeD:
                ax3D.text(x[f] + 0.1,
                          y[f] + 0.1,
                          z[f] + 0.1,
                          str(count),
                          fontsize=fontsize)
            fil.write(masterd[k][0][e] + '\t' +
                      find_key(F.chains, masterd[k][0][e]) + '\t' +
                      str(count) + '\n')
            count += 1
            f += 1
    if symlog:
        ax.set_xscale("symlog")
        ax.set_yscale("symlog")
        ax.set_xlabel('Axis %d (symmetrical log)' % (X), fontsize=fontsize)
        ax.set_ylabel('Axis %d (symmetrical log)' % (Y), fontsize=fontsize)
    else:
        ax.set_xlabel('Axis %d' % (X), fontsize=fontsize)
        ax.set_ylabel('Axis %d' % (Y), fontsize=fontsize)
    ax.legend(
        loc=0,  #bbox_to_anchor=(0.5, 1.1), ncol=4,
        fancybox=True,
        shadow=True)
    fig.tight_layout()
    if threeD:
        ax3D.set_xlabel('Axis 1', fontsize=fontsize)
        ax3D.set_ylabel('Axis 2', fontsize=fontsize)
        ax3D.set_zlabel('Axis 3', fontsize=fontsize)
        ax3D.view_init(30, 45)
        fig3D.tight_layout()
        ax3D.legend(
            loc=0,  #bbox_to_anchor=(0.5, -0.075),ncol=4,
            fancybox=True,
            shadow=True)
    plt.show()
    fig.savefig(prefix + '_Axis%d_%dPCoA.png' % (X, Y), dpi=300)
    if threeD:
        fig3D.savefig(prefix + '_3AxesPCoA.png', dpi=300)
    fil.close()