コード例 #1
0
def main():
    arguments = myTools.checkArgs([("genesFiles", str)], [("minChrSize", int, 1)], __doc__)

    genome = myGenomes.Genome(arguments["genesFiles"])

    # print >> sys.stderr, genome
    # print >> sys.stdout, "Chr","Length"
    for (chrom, l) in genome.lstGenes.items():
        if len(l) >= arguments["minChrSize"]:
            print(chrom, len(l), file=sys.stdout)
コード例 #2
0
#!/usr/bin/env python3
"""
	Parcourt un fichier de genome et enleve les genes inclus dans un autre
"""

import sys
import collections
import itertools
import operator

from LibsDyogen import myFile, myTools, myGenomes

# Arguments
arguments = myTools.checkArgs([("genome", file)], [], __doc__)

genome = myGenomes.Genome(arguments["genome"])

for c in genome.lstGenes:

    lref = list(genome.lstGenes[c])
    lref.sort(key=operator.attrgetter("beginning"))

    lnew = list(genome.lstGenes[c])
    lnew.sort(key=operator.attrgetter("end"))

    comb = myTools.myCombinator()
    for (g1, g2) in zip(lref, lnew):
        if g1 != g2:
            comb.addLink([g1, g2])

    removed = set()
コード例 #3
0
#!/usr/bin/env python3

import sys
import collections

from LibsDyogen import myFile, myTools, myGenomes

arguments = myTools.checkArgs([
    ("genesFile", file), ("transcriptsCoords", file)
], [
    ("useShortestTranscript", bool, True), ("sortOn5", bool, True),
    ("authorizedBiotypes", str, "protein_coding")
], "Cree une liste ordonnee des genes en tenant compte du plus petit transcrit"
                              )

genome = myGenomes.Genome(arguments["genesFile"])
biotypes = set(arguments["authorizedBiotypes"].split(","))

# Chargement de la liste des transcrits
lstTrans = collections.defaultdict(list)
f = myFile.myTSV.reader(arguments["transcriptsCoords"])
for l in f.csvobject:
    if l[-1] in biotypes:
        lstTrans[l[0]].append((int(l[2]), int(l[3]), l[1]))
f.file.close()

for chrom in genome.lstGenes:

    # Creation de la liste a trier
    tmp = []
    for gene in genome.lstGenes[chrom]:
コード例 #4
0
def getGenome(e):
    if e in phylTree.listSpecies:
        return myGenomes.Genome(arguments["genesFile"] % phylTree.fileName[e])
    else:
        return myGenomes.Genome(arguments["ancGenesFile"] %
                                phylTree.fileName[e])
コード例 #5
0
	Blocs de syntenie entre deux especes
"""

import sys

from LibsDyogen import myPhylTree, myGenomes, myFile, myTools, myMaths, myDiags

# Arguments
modesOrthos = list(myDiags.OrthosFilterType._keys)
arguments = myTools.checkArgs( \
 [("genome1",file), ("genome2",file), ("ancGenes",file)], \
 [("fusionThreshold",int,-1), ("sameStrand",bool,True), ("orthosFilter",str,modesOrthos), ("minimalLength",int,2)], \
 __doc__ \
)

genome1 = myGenomes.Genome(arguments["genome1"])
genome2 = myGenomes.Genome(arguments["genome2"])
ancGenes = myGenomes.Genome(arguments["ancGenes"])
orthosFilter = myDiags.OrthosFilterType[modesOrthos.index(
    arguments["orthosFilter"])]

statsDiags = []
for ((c1,d1),(c2,d2),daa) in myDiags.calcDiags(genome1, genome2, ancGenes, \
 fusionThreshold=arguments["fusionThreshold"], sameStrand=arguments["sameStrand"], orthosFilter=orthosFilter, minChromLength=arguments["minimalLength"]):

    l = len(daa)
    if l < arguments["minimalLength"]:
        continue
    statsDiags.append(l)

    dic1 = genome1.lstGenes[c1]
コード例 #6
0
#!/usr/bin/env python3
"""
	Renvoie les listes des devenirs de chaque gene le long des branches de l'arbre phylogenetique
"""

from LibsDyogen import myMaths, myTools, myGenomes, myPhylTree

arguments = myTools.checkArgs([("phylTree.conf", file), ("rootSpecies", str)],
                              [("genesFile", str, ""),
                               ("ancGenesFile", str, "")], __doc__)

phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])
# Chargement des tous les fichiers
genes = {}
for e in phylTree.listSpecies:
    genes[e] = myGenomes.Genome(arguments["genesFile"] % phylTree.fileName[e])
for a in phylTree.listAncestr:
    genes[a] = myGenomes.Genome(arguments["ancGenesFile"] %
                                phylTree.fileName[a])


def transformName(esp, xxx_todo_changeme):
    (c, i) = xxx_todo_changeme
    return genes[esp].lstGenes[c][i].names[0]


def do(node):
    for (e, _) in phylTree.items.get(node, []):
        res = {}
        seen = set([
            transformName(e, (c, i)) for (c, l) in genes[e].lstGenes.items()
コード例 #7
0
arguments = myTools.checkArgs(
    [("phylTree.conf", file), ("ancGenome", file), ("target", str)],
    [("in:genesFiles", str, ""), ("withPos", bool, False)],
    __doc__
)

# loading species tree
phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])
# Extant species list to load
listSpecies = phylTree.getTargetsSpec(arguments["target"])
newlistSpecies = sorted(listSpecies)

print(myFile.myTSV.printLine(
    ["Anc_chr", "Begin", "End", "Strand", "AncGene", '\t'.join(x for x in newlistSpecies)]), file=sys.stdout)

ancGenome = myGenomes.Genome(arguments["ancGenome"])

genome = {}
for esp in listSpecies:
    # loading extant genome
    if phylTree.isChildOf(esp, arguments["target"]):
        genome[esp] = myGenomes.Genome(arguments["in:genesFiles"] % phylTree.fileName[esp])

desc = {}
for genes in ancGenome:
    strModern = collections.defaultdict(list)
    ancGene1 = genes.names
    #print >> sys.stderr, ancGene1
    desc[ancGene1[0]] = ancGene1[1:]
    #print >> sys.stderr, desc[ancGene1[0]]
コード例 #8
0
#!/usr/bin/env python3
"""
	Convertit un genome (suite de diagonales) en genome (suite de genes)
"""

import sys

import itertools
from LibsDyogen import myTools, myGenomes

arguments = myTools.checkArgs([("contigsFile", file), ("ancGenesFile", file)],
                              [], __doc__)

ancGenes = myGenomes.Genome(arguments["ancGenesFile"])

genome = myGenomes.Genome(arguments["contigsFile"], ancGenes=ancGenes)

genome.printEnsembl(sys.stdout)
コード例 #9
0
"""
	Compute the conservation of each adjacency between extant and ancestral genome
	Usage:
		./cmpIntervals.py ../data/ancGenomes/genome.Boreoeutheria.list.bz2 ../data/genes/genesST.H**o.sapiens.list.bz2
"""

import sys
import itertools

from LibsDyogen import myTools, myGenomes

# Arguments:
arguments = myTools.checkArgs([("ancGenome", file), ("modernGenome", file)],
                              [("minimalLength", int, 0)], __doc__)

ancGenome = myGenomes.Genome(arguments["ancGenome"])
genome = myGenomes.Genome(arguments["modernGenome"])


# Genome rewritting
def rewriteGenome(genome):
    newGenome = {}
    for chrom in genome.chrList[
            myGenomes.ContigType.Chromosome] + genome.chrList[
                myGenomes.ContigType.Scaffold]:
        if len(genome.lstGenes[chrom]) >= abs(arguments["minimalLength"]):
            newGenome[chrom] = [(gene.names[0], gene.strand)
                                for gene in genome.lstGenes[chrom]]
    return newGenome

コード例 #10
0
    def drawMatrix():
        # Matrix

        print("Display ", end=' ', file=sys.stderr)

        if arguments["sortBySize"]:
            chr1.sort(key=lambda c: len(genome1.lstGenes[c]), reverse=True)
            chr2.sort(key=lambda c: len(genome2.lstGenes[c]), reverse=True)

        myPsOutput.printPsHeader()
        if arguments["ps:backgroundColor"] != "":
            myPsOutput.drawBox(0, 0, 21, 29.7, arguments["ps:backgroundColor"],
                               arguments["ps:backgroundColor"])
        sys.stderr.write('.')
        colors = myGenomes.Genome(
            arguments["matrix:colorFile"]
        ) if arguments["matrix:colorFile"] != "" else None

        # Initialisations
        nb = sum([len(table12[c]) for c in table12])
        scaleX = 19. / float(nb)
        scaleY = 19. / float(sum([len(table21[c]) for c in table21
                                  ])) if arguments["matrix:scaleY"] else scaleX
        dp = scaleX if arguments["matrix:pointSize"] < 0 else arguments[
            "matrix:pointSize"]
        sys.stderr.write('.')

        def prepareGenome(dicOrthos, lst, func):
            i = 0
            y = 0
            lstNum = {}
            for c in lst:
                func(c, y, len(dicOrthos[c]))
                y += len(dicOrthos[c])
                for (gene, _) in dicOrthos[c]:
                    lstNum[(c, gene)] = i
                    i += 1
            func(None, y, None)
            return lstNum

        dl1 = float(sum([len(table21[c]) for c in table21])) * scaleY

        def line1(c, x, l):
            myPsOutput.drawLine(1 + x * scaleX, 1, 0, dl1,
                                arguments["matrix:penColor"])
            if c:
                myPsOutput.drawText(1 + (x + l / 2) * scaleX, 0.7, c,
                                    arguments["matrix:penColor"])

        def line2(c, x, l):
            myPsOutput.drawLine(1, 1 + x * scaleY, 19, 0,
                                arguments["matrix:penColor"])
            if c:
                print("90 rotate")
                myPsOutput.drawText(1 + (x + l / 2) * scaleY, -0.9, c,
                                    arguments["matrix:penColor"])
                print("-90 rotate")

        lstNum1 = prepareGenome(table12, chr1, line1)
        sys.stderr.write('.')
        lstNum2 = prepareGenome(table21, chr2, line2)
        sys.stderr.write('.')

        print("0 setlinewidth")

        for c1 in table12:
            for (i1, t) in table12[c1]:
                xx = 1 + float(lstNum1[(c1, i1)]) * scaleX
                for (c2, i2) in t:

                    coul = arguments["matrix:defaultColor"]
                    if colors is not None:
                        tmp = set(
                            colors.getPosition(genome1.lstGenes[c1][i1].names +
                                               genome2.lstGenes[c2][i2].names))
                        for (c, i) in genesAnc.getPosition(
                                genome1.lstGenes[c1][i1].names +
                                genome2.lstGenes[c2][i2].names):
                            tmp.update(
                                colors.getPosition(
                                    genesAnc.lstGenes[c][i].names))
                        if len(tmp) > 0:
                            coul = tmp.pop()[0]

                    yy = 1 + lstNum2[(c2, i2)] * scaleY
                    myPsOutput.drawBox(xx, yy, dp, dp, coul, coul)

        myPsOutput.drawText(
            4, 0.3, arguments["referenceGenome"] if arguments["reverse"] else
            arguments["studiedGenome"], arguments["matrix:penColor"])
        print("90 rotate")
        myPsOutput.drawText(
            4, -0.5, arguments["studiedGenome"] if arguments["reverse"] else
            arguments["referenceGenome"], arguments["matrix:penColor"])
        print("-90 rotate")
        myPsOutput.printPsFooter()
        print(" OK", file=sys.stderr)
コード例 #11
0
def main():
    arguments = myTools.checkArgs([("studiedGenome", myTools.File),
                                   ("referenceGenome", myTools.File),
                                   ("orthologuesList", myTools.File)],
                                  [("includeGaps", bool, False),
                                   ("includeScaffolds", bool, True),
                                   ("includeRandoms", bool, False),
                                   ("includeNones", bool, False),
                                   ("reverse", bool, False),
                                   ("mode", modes, "drawMatrix"),
                                   ("orthoslist:fullgenenames", bool, False),
                                   ("orthoschr:minHomology", int, 90),
                                   ("minChrSize", int, 0),
                                   ("matrix:scaleY", bool, False),
                                   ("matrix:pointSize", float, -1),
                                   ("sortBySize", bool, False),
                                   ("matrix:colorFile", str, ""),
                                   ("matrix:defaultColor", str, "black"),
                                   ("matrix:penColor", str, "black"),
                                   ("karyo:landscape", bool, False),
                                   ("ps:backgroundColor", str, "")], __doc__)

    # Chargement des fichiers
    genesAnc = myGenomes.Genome(arguments["orthologuesList"])
    genome1 = myGenomes.Genome(arguments["studiedGenome"], ancGenes=genesAnc)
    genome2 = myGenomes.Genome(arguments["referenceGenome"], ancGenes=genesAnc)
    if arguments["reverse"]:
        (genome1, genome2) = (genome2, genome1)

    chr1 = []
    chr2 = []
    chr1.extend(genome1.chrList[myGenomes.ContigType.Chromosome])
    chr2.extend(genome2.chrList[myGenomes.ContigType.Chromosome])
    if arguments["includeScaffolds"]:
        chr1.extend(genome1.chrList[myGenomes.ContigType.Scaffold])
        chr2.extend(genome2.chrList[myGenomes.ContigType.Scaffold])
    if arguments["includeRandoms"]:
        chr1.extend(genome1.chrList[myGenomes.ContigType.Random])
        chr2.extend(genome2.chrList[myGenomes.ContigType.Random])
    if arguments["includeNones"]:
        chr1.extend(genome1.chrList[myGenomes.ContigType.none])
        chr2.extend(genome2.chrList[myGenomes.ContigType.none])
    print(len(chr1), len(chr2), file=sys.stderr)

    chr1 = [
        c for c in chr1 if len(genome1.lstGenes[c]) >= arguments["minChrSize"]
    ]
    chr2 = [
        c for c in chr2 if len(genome2.lstGenes[c]) >= arguments["minChrSize"]
    ]

    table12 = genome1.buildOrthosTable(chr1, genome2, chr2,
                                       arguments["includeGaps"], genesAnc)
    table21 = genome2.buildOrthosTable(chr2, genome1, chr1,
                                       arguments["includeGaps"], genesAnc)

    #
    # Matrix of orthologs
    ######################################
    def drawMatrix():
        # Matrix

        print("Display ", end=' ', file=sys.stderr)

        if arguments["sortBySize"]:
            chr1.sort(key=lambda c: len(genome1.lstGenes[c]), reverse=True)
            chr2.sort(key=lambda c: len(genome2.lstGenes[c]), reverse=True)

        myPsOutput.printPsHeader()
        if arguments["ps:backgroundColor"] != "":
            myPsOutput.drawBox(0, 0, 21, 29.7, arguments["ps:backgroundColor"],
                               arguments["ps:backgroundColor"])
        sys.stderr.write('.')
        colors = myGenomes.Genome(
            arguments["matrix:colorFile"]
        ) if arguments["matrix:colorFile"] != "" else None

        # Initialisations
        nb = sum([len(table12[c]) for c in table12])
        scaleX = 19. / float(nb)
        scaleY = 19. / float(sum([len(table21[c]) for c in table21
                                  ])) if arguments["matrix:scaleY"] else scaleX
        dp = scaleX if arguments["matrix:pointSize"] < 0 else arguments[
            "matrix:pointSize"]
        sys.stderr.write('.')

        def prepareGenome(dicOrthos, lst, func):
            i = 0
            y = 0
            lstNum = {}
            for c in lst:
                func(c, y, len(dicOrthos[c]))
                y += len(dicOrthos[c])
                for (gene, _) in dicOrthos[c]:
                    lstNum[(c, gene)] = i
                    i += 1
            func(None, y, None)
            return lstNum

        dl1 = float(sum([len(table21[c]) for c in table21])) * scaleY

        def line1(c, x, l):
            myPsOutput.drawLine(1 + x * scaleX, 1, 0, dl1,
                                arguments["matrix:penColor"])
            if c:
                myPsOutput.drawText(1 + (x + l / 2) * scaleX, 0.7, c,
                                    arguments["matrix:penColor"])

        def line2(c, x, l):
            myPsOutput.drawLine(1, 1 + x * scaleY, 19, 0,
                                arguments["matrix:penColor"])
            if c:
                print("90 rotate")
                myPsOutput.drawText(1 + (x + l / 2) * scaleY, -0.9, c,
                                    arguments["matrix:penColor"])
                print("-90 rotate")

        lstNum1 = prepareGenome(table12, chr1, line1)
        sys.stderr.write('.')
        lstNum2 = prepareGenome(table21, chr2, line2)
        sys.stderr.write('.')

        print("0 setlinewidth")

        for c1 in table12:
            for (i1, t) in table12[c1]:
                xx = 1 + float(lstNum1[(c1, i1)]) * scaleX
                for (c2, i2) in t:

                    coul = arguments["matrix:defaultColor"]
                    if colors is not None:
                        tmp = set(
                            colors.getPosition(genome1.lstGenes[c1][i1].names +
                                               genome2.lstGenes[c2][i2].names))
                        for (c, i) in genesAnc.getPosition(
                                genome1.lstGenes[c1][i1].names +
                                genome2.lstGenes[c2][i2].names):
                            tmp.update(
                                colors.getPosition(
                                    genesAnc.lstGenes[c][i].names))
                        if len(tmp) > 0:
                            coul = tmp.pop()[0]

                    yy = 1 + lstNum2[(c2, i2)] * scaleY
                    myPsOutput.drawBox(xx, yy, dp, dp, coul, coul)

        myPsOutput.drawText(
            4, 0.3, arguments["referenceGenome"] if arguments["reverse"] else
            arguments["studiedGenome"], arguments["matrix:penColor"])
        print("90 rotate")
        myPsOutput.drawText(
            4, -0.5, arguments["studiedGenome"] if arguments["reverse"] else
            arguments["referenceGenome"], arguments["matrix:penColor"])
        print("-90 rotate")
        myPsOutput.printPsFooter()
        print(" OK", file=sys.stderr)

    #
    # Draw the karyotype of the first species according to the Chr colors of the second one
    ##############################################################################################
    def drawKaryotype():
        (lx, ly) = myPsOutput.printPsHeader(arguments["karyo:landscape"])
        if arguments["ps:backgroundColor"] != "":
            myPsOutput.drawBox(0, 0, lx, ly, arguments["ps:backgroundColor"],
                               arguments["ps:backgroundColor"])

        data = []
        for c in chr1:
            newl = []
            for (_, val) in table12.get(c, []):
                if len(val) == 0:
                    newl.append(None)
                else:
                    newl.append(val[0][0])
            data.append((c, newl))

        print("Display ...", end=' ', file=sys.stderr)
        myKaryoDrawer.drawKaryo(data,
                                arguments,
                                x0=1,
                                y0=1,
                                lx=lx - 2,
                                ly=ly - 2,
                                bysize=arguments["sortBySize"])
        myPsOutput.printPsFooter()
        print("OK", file=sys.stderr)

    #
    # displays a tabular text with the number of orthologs for each pair of chromosomes
    ###################################################################################
    def printOrthologuesCount():
        print(myFile.myTSV.printLine([""] + chr2))
        for c1 in chr1:
            count = collections.defaultdict(int)
            for (i1, t) in table12[c1]:
                for (c2, i2) in t:
                    count[c2] += 1
            print(myFile.myTSV.printLine([c1] + [count[c2] for c2 in chr2]))

    #
    # For each gene of the first species, returns a list of orthologues in the second one
    ######################################################################################
    def printOrthologuesList():
        def printGene(g):
            s = list(g)
            s[-1] = "/".join(
                s[-1]) if arguments["orthoslist:fullgenenames"] else s[-1][0]
            return s

        for c1 in chr1:
            for (i1, t) in sorted(table12[c1]):
                g1 = genome1.lstGenes[c1][i1]
                for (c2, i2) in sorted(t):
                    print(
                        myFile.myTSV.printLine(
                            printGene(g1) +
                            printGene(genome2.lstGenes[c2][i2])))

    #
    # Displays the difference in gene contents
    ####################################################
    def printGeneDiff():
        def getGeneTxt(g):
            return "/".join(g.names) + ":%s:%d-%d:%d" % g[:4]

        all = set()
        combin = myTools.myCombinator()
        for c1 in table12:
            for (i1, t) in table12[c1]:
                combin.addLink([(1, c1, i1)] + [(2, c2, i2) for (c2, i2) in t])
        for c2 in table21:
            for (i2, t) in table21[c2]:
                combin.addLink([(2, c2, i2)] + [(1, c1, i1) for (c1, i1) in t])
        for g in combin:
            e1 = [
                getGeneTxt(genome1.lstGenes[c][i]) for (x, c, i) in g if x == 1
            ]
            e2 = [
                getGeneTxt(genome2.lstGenes[c][i]) for (x, c, i) in g if x == 2
            ]
            if len(e1) == 0:
                assert len(e2) == 1
                print("+", end=' ')
            elif len(e2) == 0:
                assert len(e1) == 1
                print("-", end=' ')
            elif (len(e1) == 1) and (len(e2) == 1):
                print("=", end=' ')
            elif (len(e1) > 1) and (len(e2) == 1):
                print("--", end=' ')
            elif (len(e1) == 1) and (len(e2) > 1):
                print("++", end=' ')
            else:
                print("**", end=' ')
            print(" ".join(e1 + e2))

    #
    # Displays rearrangements
    #########################
    def printOrthologousChrom():
        for c1 in chr1:
            count = collections.defaultdict(int)
            for (i1, t) in table12[c1]:
                for (c2, i2) in t:
                    count[c2] += 1
            res = [c1]
            t = sorted(iter(count.items()), key=operator.itemgetter(1))
            n = (sum(count.values()) *
                 arguments["orthoschr:minHomology"]) / 100
            while n > 0:
                x = t.pop()
                res.append("%s (%d)" % x)
                n -= x[1]
            print(myFile.myTSV.printLine(res))

    locals()[arguments["mode"]]()