Beispiel #1
0
def main():
    logging.basicConfig()
    logger.setLevel(logging.INFO)

    arguments = myTools.checkArgs(
        [("phylTree.conf",myTools.File), ("ensemblTree",myTools.File)],
        [("flatten",bool,False), ("rebuild",bool,False), ("fam",bool,False),
         ("cutoff",str,"-1"), ("defaultFamName",str,"FAM%08d"),
         ("scoreMethod",int,[1,2,3]), ("newNodeID",float,1e8),
         ("recurs",bool,False), ("indicator",bool,False), ("debug",bool,False)],
        __doc__)
    if arguments['debug']: logger.setLevel(logging.DEBUG)

    myProteinTree.nextNodeID = int(arguments["newNodeID"])  # For the rebuild step.
    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

    hasLowScore = setupScoring(phylTree,
                               arguments["scoreMethod"],
                               arguments["cutoff"])

    prottrees = myProteinTree.loadTree(arguments["ensemblTree"])

    prottrees = process(prottrees, phylTree, hasLowScore,
                        arguments["defaultFamName"], arguments["flatten"],
                        arguments["rebuild"], arguments["recurs"],
                        arguments["indicator"])

    if arguments["fam"]:
        # Will not work on previous versions of ToolsDyogen.
        from treeTools.ALL.extractGeneFamilies import extractGeneFamilies
        count, dupCount, geneFamilies = extractGeneFamilies(phylTree, prottrees)
    else:
        for tree in prottrees:
            tree.printTree(sys.stdout)
Beispiel #2
0
def main():
    arguments = myTools.checkArgs([("genesFiles", str)], [("minChrSize", int, 1)], __doc__)

    genome = myGenomes.Genome(arguments["genesFiles"])

    # print >> sys.stderr, genome
    # print >> sys.stdout, "Chr","Length"
    for (chrom, l) in genome.lstGenes.items():
        if len(l) >= arguments["minChrSize"]:
            print(chrom, len(l), file=sys.stdout)
def main():
    arguments = myTools.checkArgs([("phylTree.conf", myTools.File)], [], __doc__)

    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

    for a in phylTree.listAncestr:
        for (f1, f2) in itertools.combinations([f for (f, _) in phylTree.items[a]], 2):

            l1 = [e for e in phylTree.species[f1]]
            l2 = [e for e in phylTree.species[f2]]
            for (e1, e2) in itertools.product(l1, l2):
                print("%s\t%s\t%s" % (e1, e2, a), file=sys.stdout)
def main():
    arguments = myTools.checkArgs([("phylTree.conf", myTools.File),
                                   ("ensemblTree", myTools.File)],
                                  [("newNodeID", int, int(1e9)),
                                   ("reuseNames", bool, False)], __doc__)

    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])
    setrecursionlimit(20000)
    # !important
    myProteinTree.nextNodeID = arguments["newNodeID"]

    count, dupCount, geneFamilies = extractGeneFamilies(
        phylTree, processTrees(arguments["ensemblTree"], phylTree),
        arguments["reuseNames"])
Beispiel #5
0
def main():
    arguments = myTools.checkArgs([("proteinTree", myTools.File),
                                   ("gene_name", str)], [], __doc__)

    # Information on ancestral node
    def printAncNode(node):
        txt = [node]
        d = tree.info[node].pop('Duplication', None)
        if tree.info[node].pop("dubious_duplication", None):
            txt.append("DUBIOUS_DUPLICATION")
        elif (d == 1) and ("duplication_confidence_score" in tree.info[node]):
            txt.append("ROOT_DUPLICATION")
        elif d == 2:
            txt.append("DUPLICATION")
        elif d == 3:
            txt.append("EDITED_DUPLICATION")
        else:
            txt.append("SPECIATION")
        txt.append(tree.info[node].pop("taxon_name", None))
        txt.append(tree.info[node].pop("family_name", None))
        txt.append(tree.info[node].pop("Bootstrap", None))
        txt.append(tree.info[node].pop("duplication_confidence_score", None))
        print(myFile.myTSV.printLine(txt))

    # Information on Gene
    def printGeneNode(node):
        txt = [node]
        txt.append("GENE")
        txt.append(tree.info[node].pop("taxon_name", None))
        txt.append(tree.info[node].pop("gene_name", None))
        print(myFile.myTSV.printLine(txt))

    # Recursive loop on the gene family
    def do(node):
        if node in tree.data:
            for (g, d) in tree.data[node]:
                if do(g):
                    printAncNode(node)
                    return True
        elif tree.info[node]["gene_name"] == arguments["gene_name"]:
            printGeneNode(node)
            return True
        return False

    # searching for the good gene tree
    for tree in myProteinTree.loadTree(arguments["proteinTree"]):
        if do(tree.root):
            break
Beispiel #6
0
def main():
    arguments = myTools.checkArgs([("gffFile", file)], [], __doc__)

    gff = Gff3(arguments["gffFile"])
    genes = [
        line for line in gff.lines
        if line['line_type'] == 'feature' and line['type'] == 'mRNA'
    ]

    for gene in genes:
        #print >> sys.stdout,  gene['seqid'], gene['start'], gene['end'], gene['strand'], gene['attributes']['ID']
        if gene['strand'] == "+":
            gene['strand'] = '1'
        else:
            gene['strand'] = '-1'
        print(myFile.myTSV.printLine([
            gene['seqid'], gene['start'], gene['end'], gene['strand'],
            gene['attributes']['ID']
        ]),
              file=sys.stdout)
Beispiel #7
0
def main():
    arguments = myTools.checkArgs([("phylTree.conf", myTools.File)],
                                  [("fromNewick", bool, True)], __doc__)

    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

    if arguments["fromNewick"]:

        # Returns the phyltree format (with indentation)
        def do(node, indent):
            node = node.replace("*", "")
            names = myFile.myTSV.printLine([node] + [
                x for x in phylTree.commonNames.get(node, "")
                if isinstance(x, str) and (x != node)
            ],
                                           delim="|")
            print(("\t" * indent) + "%s" % names)
            if node in phylTree.items:
                for (f, _) in phylTree.items[node]:
                    do(f, indent + 1)

        do(phylTree.root, 0)

    else:
        # Returns the newick tree
        def convertToFlatFile(anc):

            a = phylTree.fileName[anc]  # anc.replace(' ', '.')
            if anc in phylTree.listSpecies:
                return a
            else:
                return "(" + ",".join([
                    convertToFlatFile(e) + ":" + str(l)
                    for (e, l) in phylTree.items[anc]
                ]) + ")%s|%d" % (a, phylTree.ages[anc])

        print(convertToFlatFile(phylTree.root), ";")
Beispiel #8
0
def main():
    # Arguments
    arguments = myTools.checkArgs([("phylTree.conf", myTools.File),
                                   ("proteinTree", myTools.File)],
                                  [("out:ancGenesFiles", str, ""),
                                   ("reuseNames", bool, False)],
                                  __doc__)

    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])
    proteinTrees = myProteinTree.loadTree(arguments["proteinTree"])

    count, dupCount, geneFamilies = extractGeneFamilies(phylTree,
                                                        proteinTrees,
                                                        arguments["reuseNames"])

    outTemplate = arguments["out:ancGenesFiles"]
    if outTemplate:
        for (anc, lst) in geneFamilies.items():
            print("Ecriture des familles de %s ..." % anc, end=' ', file=sys.stderr)
            f = myFile.openFile(outTemplate % phylTree.fileName[anc], "w")
            for gg in lst:
                print(" ".join(gg), file=f)
            f.close()
            print(len(lst), "OK", file=sys.stderr)
Beispiel #9
0
def main():
    arguments = myTools.checkArgs([("phylTree.conf", myTools.File),
                                   ("iniTree", myTools.File),
                                   ("rootSpecies", str)], [], __doc__)

    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

    # Returns a list of nodes under the new root species
    #########################################################
    def search(node):
        if phylTree.isChildOf(tree.info[node]['taxon_name'],
                              arguments["rootSpecies"]):
            return [node]
        elif node in tree.data:
            r = []
            for (g, _) in tree.data[node]:
                r.extend(search(g))
            return r
        else:
            return []

    nb = 0
    for tree in myProteinTree.loadTree(arguments["iniTree"]):
        l = search(tree.root)
        nb += len(l)
        if len(l) == 1:
            tree.info[l[0]]["tree_name"] = tree.info[tree.root]["tree_name"]
            myProteinTree.printTree(sys.stdout, tree.data, tree.info, l[0])
        else:
            for (i, r) in enumerate(l):
                tree.info[r]["tree_name"] = tree.info[
                    tree.root]["tree_name"] + myProteinTree.getDupSuffix(
                        i + 1, True)
                myProteinTree.printTree(sys.stdout, tree.data, tree.info, r)

    print(nb, "extracted trees", file=sys.stderr)
#!/usr/bin/env python3
"""
	Renvoie un arbre phylogenetique des especes avec les valeurs medianes issues des arbres de proteines
"""

import collections
import sys

from LibsDyogen import myTools, myPhylTree, myProteinTree

arguments = myTools.checkArgs([("phylTree.conf", file), ("proteinTree", file)],
                              [], __doc__)

phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

lengths = collections.defaultdict(list)


# Parcours recursif de la famille de genes
def do(node):
    print("NEW TREE", file=sys.stderr)
    if node in tree.data:
        t1 = tree.info[node]['taxon_name']
        for (g, d) in tree.data[node]:
            # Une distance ne peut etre prise qu'entre deux noeuds de speciation
            if (tree.info[node]['Duplication']
                    == 0) and (tree.info[g]['Duplication'] == 0):
                t2 = tree.info[g]['taxon_name']
                # Les deux noeuds doivent etre strictement consecutifs
                if (phylTree.parent[t2].name == t1) and (d != 0):
                    lengths[(t1, t2)].append(d)
#!/usr/bin/env python3
"""
	Renvoie les listes des devenirs de chaque gene le long des branches de l'arbre phylogenetique
"""

import sys

from LibsDyogen import myFile, myMaths, myTools, myGenomes, myPhylTree

# Arguments
arguments = myTools.checkArgs([("phylTree.conf", file), ("genesFile", str),
                               ("ancGenesFile", str)], [], __doc__)

# Chargement des tous les fichiers
phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])
genes = {}
for e in phylTree.listSpecies:
    genes[e] = myGenomes.Genome(arguments["genesFile"] % phylTree.fileName[e])
for a in phylTree.listAncestr:
    genes[a] = myGenomes.Genome(arguments["ancGenesFile"] %
                                phylTree.fileName[a])


def transformName(esp, xxx_todo_changeme):
    (c, i) = xxx_todo_changeme
    if esp in phylTree.items:
        return i
    else:
        return str(c) + "|" + str(i)

Beispiel #12
0
def main():
    arguments = myTools.checkArgs([("iniTree", myTools.File)], [], __doc__)

    for tree in myProteinTree.loadTree(arguments["iniTree"]):
        next
#!/usr/bin/env python3
"""
	Renvoie un tableau de statistiques de rearrangement le long des branches et un arbre de especes tenant compte de ces rearrangements.
"""

import sys

from LibsDyogen import myFile, myMaths, myTools, myGenomes, myPhylTree

# Argument:
arguments = myTools.checkArgs([("phylTree.conf", myTools.File)],
                              [("onlyOrthos", bool, False),
                               ("in:genesFiles", str, ""),
                               ("in:ancGenesFiles", str, ""),
                               ("in:diagsFiles", str, ""),
                               ("out:treeFile", str, "out.nwk"),
                               ("out:statFile", str, "out.txt"),
                               ("colNames", bool, True)], __doc__)

# Chargement des tous les fichiers
###################################
phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

genes = {}
diags = {}
dicDiags = {}

for e in phylTree.listSpecies:
    # Les genes des especes modernes
    genes[e] = myGenomes.Genome(arguments["in:genesFiles"] %
                                phylTree.fileName[e])
#!/usr/bin/env python3
"""
	Renvoie les listes des devenirs de chaque gene le long des branches de l'arbre phylogenetique
"""

import sys
import collections

from LibsDyogen import myDiags, myMaths, myTools, myGenomes, myPhylTree

# Argument:
arguments = myTools.checkArgs( \
    [("phylTree.conf", file)], \
    [("IN.genesFile", str, ""), ("IN.ancGenesFile", str, ""), ("IN.diagsFile", str, "")], \
    __doc__ \
    )

# Chargement des tous les fichiers
###################################
phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

genes = {}
diags = {}
dicDiags = {}

for e in phylTree.listSpecies:
    # Les genes des especes modernes
    genes[e] = myGenomes.Genome(arguments["IN.genesFile"] %
                                phylTree.fileName[e])
    diags[e] = [[g] for g in range(len(list(genes[e])))]
#! /usr/bin/env python
"""
        From a species tree, print the number of extant species and ancetors.
	Optional: print the list of species
	
	Usage:	getInfoOnSpeciesTree.py PhylTree.conf
		getInfoOnSpeciesTree.py PhylTree.conf +speciesList +ancList
"""

import sys

from LibsDyogen import myFile, myTools, myPhylTree

arguments = myTools.checkArgs([("phylTree.conf", myTools.File)],
                              [("speciesList", bool, False),
                               ("ancList", bool, False)], __doc__)

phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

if arguments["speciesList"]:
    print("Extant Species:",
          ",".join(x for x in phylTree.listSpecies),
          file=sys.stdout)
if arguments["ancList"]:
    print(file=sys.stdout)
    print("Ancestral Species:",
          ",".join(x for x in phylTree.listAncestr),
          file=sys.stdout)
print(file=sys.stdout)
print("Extant Species:", len(phylTree.listSpecies), file=sys.stdout)
print("Ancetral Species:", len(phylTree.listAncestr), file=sys.stdout)
Beispiel #16
0
#!/usr/bin/env python3

import sys
import collections

from LibsDyogen import myFile, myTools, myGenomes

arguments = myTools.checkArgs([
    ("genesFile", file), ("transcriptsCoords", file)
], [
    ("useShortestTranscript", bool, True), ("sortOn5", bool, True),
    ("authorizedBiotypes", str, "protein_coding")
], "Cree une liste ordonnee des genes en tenant compte du plus petit transcrit"
                              )

genome = myGenomes.Genome(arguments["genesFile"])
biotypes = set(arguments["authorizedBiotypes"].split(","))

# Chargement de la liste des transcrits
lstTrans = collections.defaultdict(list)
f = myFile.myTSV.reader(arguments["transcriptsCoords"])
for l in f.csvobject:
    if l[-1] in biotypes:
        lstTrans[l[0]].append((int(l[2]), int(l[3]), l[1]))
f.file.close()

for chrom in genome.lstGenes:

    # Creation de la liste a trier
    tmp = []
    for gene in genome.lstGenes[chrom]:
	Run the XMLfile BIOMART Query
	Usage:
		./ENSEMBL.biomartQuery.py XMLfiles/BIOMART.HumanProteinCodingGene.xml   -> will generate ouput.txt
		./ENSEMBL.biomartQuery.py XMLfiles/BIOMART.HumanProteinCodingGene.xml -outputFileName=HumanProteinCodingGene.txt
"""

from __future__ import print_function

import sys
import urllib.request, urllib.parse, urllib.error

from LibsDyogen import myFile, myTools

# Arguments
arguments = myTools.checkArgs(
    [("xmlRequest", myTools.File)],
    [("biomartServer", str, "http://www.ensembl.org/biomart/martservice"),
     ("outputFileName", str, "output.txt")], __doc__)

# La requete
with myFile.openFile(arguments["xmlRequest"], "r") as f:
    request = f.read()

print("Downloading XML Query", end=' ', file=sys.stderr)
urllib.request.urlretrieve(arguments["biomartServer"],
                           filename=arguments["outputFileName"],
                           data=urllib.parse.urlencode({
                               "query": request
                           }).encode())
print("OK", file=sys.stderr)
#!/usr/bin/env python3
"""
	Read file of numbers and print statistics:
	Min  [Q25/Q50/Q75]  [N75/N50/N25]   Max   [Mean/Stddev-Length]
	
	
	Usage: ./printStats.py filename 
		./printStats.py filename +long +colNames
	
"""

from LibsDyogen import myFile, myMaths, myTools

arguments = myTools.checkArgs([("file", file)], [("long", bool, False),
                                                 ("colNames", bool, False)],
                              __doc__)

lst = []
f = myFile.openFile(arguments["file"], 'r')

for l in f:
    c = l.split()
    for x in c:
        try:
            x = int(x)
        except ValueError:
            x = float(x)
        lst.append(x)
f.close()

# returns results
Beispiel #19
0
#! /usr/bin/env python3

"""
	Extract Newick or NHX trees from Phyltree protein trees

	usage:
		./ALL.extractNewickTrees.py GeneTrees.bz2 +withDist +withNHXTags +withAncSpeciesNames +withAncGenesNames
"""

# Librairies
import sys

import LibsDyogen.myTools       as myTools
import LibsDyogen.myProteinTree as myProteinTree

# Arguments
arguments = myTools.checkArgs( [("proteinTree",myTools.File)],
                               [("withDist",bool,False),
                                ("withNHXTags",bool,False),
                                ("withAncSpeciesNames",bool,False),
                                ("withAncGenesNames",bool,False)], __doc__ )

print("Mise en forme des arbres ...", end=' ', file=sys.stderr)
nb = 0
for tree in myProteinTree.loadTree(arguments["proteinTree"]):
	tree.printNewick(sys.stdout, withDist=arguments["withDist"], withTags=arguments["withNHXTags"], withAncSpeciesNames=arguments["withAncSpeciesNames"], withAncGenesNames=arguments["withAncGenesNames"])
	nb += 1
print("%d arbres OK" % nb, file=sys.stderr)

Beispiel #20
0
        Pere    Fils    Valeur(nbDup)
"""

# Librairies
import sys
import math

from LibsDyogen import myFile, myMaths, myTools, myPhylTree, myPsOutput

# Arguments
arguments = myTools.checkArgs([("phylTree.conf", file)],
                              [("landscape", bool, False),
                               ("printSpecies", bool, True),
                               ("printAncestors", bool, True),
                               ("printAges", bool, False),
                               ("lengthFile", str, ""), ("colorFile", str, ""),
                               ("funcLength", str, ""), ("funcColor", str, ""),
                               ("root", str, ""), ("min", float, None),
                               ("max", float, None)], __doc__)

phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

(largeur, hauteur) = myPsOutput.printPsHeader(landscape=arguments["landscape"])

root = arguments["root"] if arguments[
    "root"] in phylTree.items else phylTree.root
funcLength = (lambda x, a: x) if arguments["funcLength"] == "" else eval(
    arguments["funcLength"])
#funcColor = (lambda x, a: x) if arguments["funcColor"] == "" else eval(arguments["funcColor"])
# This is free software, you may copy, modify and/or distribute this work under the terms of the GNU General Public License, version 3 (GPL v3) or later and the CeCiLL v2 license in France
"""
        extract the ancGenes from the forest of gene trees
"""

import sys
import collections

from LibsDyogen import myFile
from LibsDyogen import myTools
from LibsDyogen import myPhylTree
from LibsDyogen import myProteinTree

# arguments
arguments = myTools.checkArgs([("speciesTree", myTools.File),
                               ("geneTreeForest", myTools.File)],
                              [("out:ancGenes", str, ""),
                               ("reuseNames", bool, False)], __doc__)

speciesTree = myPhylTree.PhylogeneticTree(arguments["speciesTree"])
# duplication counter
dupCount = collections.defaultdict(int)


def futureName(name, dup):
    if dup >= 2:
        dupCount[name] += 1
        # if there is a duplication we need to add a suffix
        return name + myProteinTree.getDupSuffix(dupCount[name], False)
    else:
        return name
#!/usr/bin/env python3
"""
	Blocs de syntenie entre deux especes
"""

import sys

from LibsDyogen import myPhylTree, myGenomes, myFile, myTools, myMaths, myDiags

# Arguments
modesOrthos = list(myDiags.OrthosFilterType._keys)
arguments = myTools.checkArgs( \
 [("genome1",file), ("genome2",file), ("ancGenes",file)], \
 [("fusionThreshold",int,-1), ("sameStrand",bool,True), ("orthosFilter",str,modesOrthos), ("minimalLength",int,2)], \
 __doc__ \
)

genome1 = myGenomes.Genome(arguments["genome1"])
genome2 = myGenomes.Genome(arguments["genome2"])
ancGenes = myGenomes.Genome(arguments["ancGenes"])
orthosFilter = myDiags.OrthosFilterType[modesOrthos.index(
    arguments["orthosFilter"])]

statsDiags = []
for ((c1,d1),(c2,d2),daa) in myDiags.calcDiags(genome1, genome2, ancGenes, \
 fusionThreshold=arguments["fusionThreshold"], sameStrand=arguments["sameStrand"], orthosFilter=orthosFilter, minChromLength=arguments["minimalLength"]):

    l = len(daa)
    if l < arguments["minimalLength"]:
        continue
    statsDiags.append(l)
#!/usr/bin/env python3
"""
Extrait (des genomes reels) la liste des evenements de
duplications/pertes/gains sur chaque branche de l'arbre
"""

from LibsDyogen import myMaths, myTools, myGenomes, myPhylTree

arguments = myTools.checkArgs([("phylTree.conf", file)],
                              [("rootSpecies", str, ""),
                               ("genesFile", str, ""),
                               ("ancGenesFile", str, "")], __doc__)

phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])


@myTools.memoize
def getGenome(e):
    if e in phylTree.listSpecies:
        return myGenomes.Genome(arguments["genesFile"] % phylTree.fileName[e])
    else:
        return myGenomes.Genome(arguments["ancGenesFile"] %
                                phylTree.fileName[e])


def transformName(esp, xxx_todo_changeme):
    (c, i) = xxx_todo_changeme
    return getGenome(esp).lstGenes[c][i].names[0]


def do(node):
#!/usr/bin/env python3
"""
	Convertit un genome (suite de diagonales) en genome (suite de genes)
"""

import sys

import itertools
from LibsDyogen import myTools, myGenomes

arguments = myTools.checkArgs([("contigsFile", file), ("ancGenesFile", file)],
                              [], __doc__)

ancGenes = myGenomes.Genome(arguments["ancGenesFile"])

genome = myGenomes.Genome(arguments["contigsFile"], ancGenes=ancGenes)

genome.printEnsembl(sys.stdout)
#!/usr/bin/env python3
"""
	Renvoie pour chaque gene ancestral le decompte des evenements qu'il subit sur chaque branche
"""

import sys

from LibsDyogen import myTools, myGenomes, myPhylTree

arguments = myTools.checkArgs([("phylTree.conf", file), ("rootSpecies", str)],
                              [("genesFiles", str, ""),
                               ("ancGenesFiles", str, ""),
                               ("countDup", bool, True),
                               ("countLoss", bool, True)], __doc__)

phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

# Chargement des tous les fichiers
genes = {}
todo = {}
for e in phylTree.listSpecies:
    genes[e] = myGenomes.Genome(arguments["genesFiles"] % phylTree.fileName[e])
for a in phylTree.listAncestr:
    genes[a] = myGenomes.Genome(arguments["ancGenesFiles"] %
                                phylTree.fileName[a])
    todo[a] = set(g.names[0] for g in genes[a])

allnames = set()
for a in phylTree.listAncestr:
    allnames.update(todo[a])
print(len(allnames), file=sys.stderr)
Beispiel #26
0
#!/usr/bin/env python3
"""
	Convertit un genome (scaffolds = suite de contigs) en genome (uniquement des contigs)
"""

import sys

from LibsDyogen import myDiags, myFile, myTools, myGenomes

arguments = myTools.checkArgs([("scaffoldsFile", file), ("contigsFile", file)],
                              [], __doc__)

(diags, singletons) = myDiags.loadIntegr(arguments["scaffoldsFile"])

ref = {}
f = myFile.openFile(arguments["contigsFile"], "r")
for (i, l) in enumerate(f):
    ref[i + 1] = l
f.close()

for (chrom, weights) in diags:
    li = []
    ls = []
    lw = []
    n = 0
    for (i, (c, s)) in enumerate(chrom):
        t = ref.pop(c)[:-1].split("\t")
        if i >= 1:
            lw.append(weights[i - 1])
        n += len(t[2].split())
        if s > 0:
Beispiel #27
0
#!/usr/bin/env python3
"""
	Parcourt un fichier de genome et enleve les genes inclus dans un autre
"""

import sys
import collections
import itertools
import operator

from LibsDyogen import myFile, myTools, myGenomes

# Arguments
arguments = myTools.checkArgs([("genome", file)], [], __doc__)

genome = myGenomes.Genome(arguments["genome"])

for c in genome.lstGenes:

    lref = list(genome.lstGenes[c])
    lref.sort(key=operator.attrgetter("beginning"))

    lnew = list(genome.lstGenes[c])
    lnew.sort(key=operator.attrgetter("end"))

    comb = myTools.myCombinator()
    for (g1, g2) in zip(lref, lnew):
        if g1 != g2:
            comb.addLink([g1, g2])

    removed = set()
    Transform an ancestral genome in tabular format with descendant species genes (one column by species), with modern position or not

    usage:
        ./formatTabularAncGenome.py PhylTree.conf genome.Boreoeutheria.list.bz2 Boreoeutheria  -in:genesFiles=genes/genesST.%s.list.bz2 +withPos > genome.Boreoeutheria.WithDescendant.list
"""


import sys
import collections

from LibsDyogen import myFile, myPhylTree, myGenomes, myTools


arguments = myTools.checkArgs(
    [("phylTree.conf", file), ("ancGenome", file), ("target", str)],
    [("in:genesFiles", str, ""), ("withPos", bool, False)],
    __doc__
)

# loading species tree
phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])
# Extant species list to load
listSpecies = phylTree.getTargetsSpec(arguments["target"])
newlistSpecies = sorted(listSpecies)

print(myFile.myTSV.printLine(
    ["Anc_chr", "Begin", "End", "Strand", "AncGene", '\t'.join(x for x in newlistSpecies)]), file=sys.stdout)

ancGenome = myGenomes.Genome(arguments["ancGenome"])

genome = {}
Beispiel #29
0
            elif unit == 'Mb':
                factor = 0.000001
            else:
                assert unit is None
                factor = 1
            res = '%1.0f%s' % (x * factor, unit if unit is not None else '')
        return res
    return f


# arguments
arguments = myTools.checkArgs(
    [
        ("genome", myTools.File),
    ],
    [
        ("removeUnofficialChrNames", bool, False),
        ("orderChromosomesBy", bool, 'names'),
        ('mode', bool, 'distribOnChr')
    ],
    __doc__)
assert arguments['orderChromosomesBy'] in {'decreasingNbOfGenes', 'names'}
assert arguments['mode'] in {'geneLengths', 'distribOnChr', 'distribOnChrs', 'overlap',
                             'correlationChromNbGenes', 'longestIntergene', 'minGeneLength'}
# longestIntergene computes the longer intergene, i.e. space between two genes.
# This gives the length of the longer rearrangement possibly unseen, except telomeres.

def readerDependingOnFileWithDebAndEnd(fileName):
        flb = myFile.firstLineBuffer(myFile.openFile(fileName, 'r'))
        c = flb.firstLine.split("\t")
        if len(c) == 6:
            print("(c, beg, end, s, gName, transcriptName) -> (c, s, gName)", file=sys.stderr)
#!/usr/bin/python3
"""
Find Strong ancGenes Families 1-1 (no duplication, no loss in descendants.
"""

import sys

from LibsDyogen import myFile, myMaths, myTools, myPhylTree

arguments = myTools.checkArgs([("phylTree.conf", file), ("target", str),
                               ("IN.ancGenesFiles", str),
                               ("OUT.ancGenesFiles", str)],
                              [("except2XSpecies", bool, True)], __doc__)

phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])
target = phylTree.officialName[arguments["target"]]

if arguments["except2XSpecies"] == "True":

    lstAncGenomes = [
        x for x in phylTree.listAncestr if
        phylTree.dicParents[x][target] == target and x not in phylTree.lstEsp2X
    ]
    lstModernGenomes = [
        x for x in phylTree.listSpecies if
        phylTree.dicParents[x][target] == target and x not in phylTree.lstEsp2X
    ]

else:
    lstAncGenomes = [
        x for x in phylTree.listAncestr