Python PhylogeneticTree Exemples, LibsDyogen.myPhylTree.PhylogeneticTree Python Exemples

Exemple #1

0

Afficher le fichier

def main():
    logging.basicConfig()
    logger.setLevel(logging.INFO)

    arguments = myTools.checkArgs(
        [("phylTree.conf",myTools.File), ("ensemblTree",myTools.File)],
        [("flatten",bool,False), ("rebuild",bool,False), ("fam",bool,False),
         ("cutoff",str,"-1"), ("defaultFamName",str,"FAM%08d"),
         ("scoreMethod",int,[1,2,3]), ("newNodeID",float,1e8),
         ("recurs",bool,False), ("indicator",bool,False), ("debug",bool,False)],
        __doc__)
    if arguments['debug']: logger.setLevel(logging.DEBUG)

    myProteinTree.nextNodeID = int(arguments["newNodeID"])  # For the rebuild step.
    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

    hasLowScore = setupScoring(phylTree,
                               arguments["scoreMethod"],
                               arguments["cutoff"])

    prottrees = myProteinTree.loadTree(arguments["ensemblTree"])

    prottrees = process(prottrees, phylTree, hasLowScore,
                        arguments["defaultFamName"], arguments["flatten"],
                        arguments["rebuild"], arguments["recurs"],
                        arguments["indicator"])

    if arguments["fam"]:
        # Will not work on previous versions of ToolsDyogen.
        from treeTools.ALL.extractGeneFamilies import extractGeneFamilies
        count, dupCount, geneFamilies = extractGeneFamilies(phylTree, prottrees)
    else:
        for tree in prottrees:
            tree.printTree(sys.stdout)

Exemple #2

0

Afficher le fichier

Fichier : reconciledtree2paralogytree.py Projet : DyogenIBENS/Phylorgs

def main(inputnwk,
         outputnwk,
         ensembl_version=ENSEMBL_VERSION,
         phyltreefile=PHYLTREEFILE,
         treebest=False,
         include_singleton_branches=False,
         include_singleton_root=False):

    phyltree = myPhylTree.PhylogeneticTree(
        phyltreefile.format(ensembl_version))

    ancgene2sp = re.compile(r'(' + r'root|' + r'|'.join(
        list(phyltree.listSpecies) +
        sorted(phyltree.listAncestr, key=lambda a: len(a), reverse=True)).
                            replace(' ', '\.') + r')(.*)$')

    genetree = ete3.Tree(inputnwk or stdin.read(), format=1)

    get_taxon = get_taxon_treebest if treebest else get_taxon

    with (open(outputnwk, 'w') if outputnwk else stdout) as out:
        for paralogy in buildparalogies(genetree, get_taxon, ancgene2sp,
                                        ensembl_version,
                                        include_singleton_branches):
            outtext = paralogy.write(format=1,
                                     format_root_node=True,
                                     features=['S', 'D', 'P', 'A'])
            out.write(outtext + '\n')

Exemple #3

0

Afficher le fichier

Fichier : pruneSpeciesFromForest.py Projet : DyogenIBENS/ToolsDyogen_py3

def main(forestfile, phyltreefile, speciesfile, invert=False):
    phyltree = myPhylTree.PhylogeneticTree(phyltreefile)
    with open(speciesfile) as f:
        badspecies = [line.rstrip() for line in f if not line.startswith('#')]

    subroot, subtree = phyltree.getSubTree(badspecies)

    for tree in myProteinTree.loadTree(forestfile):

Exemple #4

0

Afficher le fichier

def extractMultipleGeneTrees(proteinTree, family_name, field='family_name',
         toNewick=False, withAncSpeciesNames=False, withAncGenesNames=False,
         withTags=False, phyltree=None, output=None, force=False,
         mkdirs=False, firstmatch=False):
    if phyltree:
        phyltree = myPhylTree.PhylogeneticTree(phyltree)

    family_names = dict.fromkeys(family_name, 0)

    for tree in myProteinTree.loadTree(proteinTree):
        family = tree.info[tree.root][field].split('.')[0]
        if family in family_names:
            print("Found", family, end=' ', file=sys.stderr)
            wasfound = family_names[family]
            outfile = output.format(genetree=family) if output else '<stdout>'
            if os.path.isfile(outfile) and not wasfound and not firstmatch and not force:
                #if family_names[family] == 0:
                #FIXME so that you can omit the --force option but append to file
                print("%s exists. Skipping. (use --force)" % outfile, file=sys.stderr)
                family_names.pop(family)
            else:
                if phyltree is not None:
                    #markLowScore(tree, hasLowScore)
                    #flattenTree
                    #
                    tree.rebuildTree(phyltree)
                #TODO: start in new thread.
                filemode = 'a' if wasfound else 'w'
                try:
                    out = open(outfile, filemode) if output else sys.stdout
                except IOError:
                    if mkdirs:
                        os.makedirs(os.path.split(outfile)[0])
                        out = open(outfile, filemode)
                    else:
                        raise

                if toNewick:
                    print("Output to newick format", file=sys.stderr)
                    tree.printNewick(out, withDist=True, withTags=withTags,
                                     withAncSpeciesNames=withAncSpeciesNames,
                                     withAncGenesNames=withAncGenesNames,
                                     withID=withTags)
                else:
                    tree.printTree(out)
                if output: out.close()
                if firstmatch:
                    family_names.pop(family)
                else:
                    family_names[family] += 1
        if firstmatch and not family_names:
            break

    notfound = set((fam for fam,wasfound in family_names.items() if not wasfound))
    if notfound:
        print('WARNING: %d names were not found in field %r: %s' % (
              len(notfound), field, ' '.join(notfound)), file=sys.stderr)

Exemple #5

0

Afficher le fichier

 def load_phyltree(self, phyltreefile=None, ensembl_version=None):
     phyltreefile = phyltreefile if phyltreefile else self.phyltreefile
     ensembl_version = ensembl_version if ensembl_version else \
                       self.ensembl_version
     self.phyltree = PhylTree.PhylogeneticTree(
                         os.path.expanduser(
                             phyltreefile.format(ensembl_version)))
     self.taxa.sort(key=lambda t: -self.phyltree.ages[t])
     self.taxa_evt.sort(key=lambda te: -self.phyltree.ages[te[0]])

Exemple #6

0

Afficher le fichier

def time_fromspeciestreeIO(treefile, phyltreefile, outfile=None):
    phyltree = myPhylTree.PhylogeneticTree(phyltreefile)
    with open(treefile) as f:
        lines = f.readlines()
    with (open(outfile, 'w') if outfile else stdout) as out:
        for treetxt in read_multinewick(lines):
            tree = ete3.Tree(treetxt, format=1)
            time_fromspeciestree(tree, phyltree)
            newick = tree.write(format=1, format_root_node=False)
            out.write(newick + '\n')

Exemple #7

0

Afficher le fichier

def main(phyltreefile, genomequal, refphyltreefile=None, column_qual='Qual'):
    phyltree = myPhylTree.PhylogeneticTree(phyltreefile)
    if refphyltreefile: refphyltree = myPhylTree.PhylogeneticTree(refphyltreefile)

    with open(genomequal, newline='') as gf:
        csvrd = csv.DictReader(gf, dialect='excel-tab')
        species_qual = {row['Species'].rstrip().replace('.', ' '): int(row[column_qual])
                        for row in csvrd}
        # Warning multiple subspecies per species with different qualities

    lstEsp2X = set()
    lstEsp6X = set()
    lstEspFull = set()
    commonNames = {}

    for sp in phyltree.listSpecies:
        try:
            q = species_qual[sp]
        except KeyError:
            logger.error('No quality found for %s', sp)
            q = 1
        if q <= 3:
            lstEspFull.add(sp)
        elif q <= 4:
            lstEsp6X.add(sp)
        else:
            lstEsp2X.add(sp)

        if refphyltreefile:
            try:
                names = refphyltree.commonNames[sp]
                commonNames[sp] = [n for n in names if isinstance(n, str)]
            except KeyError:
                logger.error('No %r common names found in reference tree', sp)

    setattr(phyltree, 'lstEsp2X', lstEsp2X)
    setattr(phyltree, 'lstEsp6X', lstEsp6X)
    setattr(phyltree, 'lstEspFull', lstEspFull)
    if refphyltreefile: setattr(phyltree, 'commonNames', commonNames)

    phyltree.printNewick(commonnames=True, symbols=True)

Exemple #8

0

Afficher le fichier

Fichier : printLastCommonAncestors.py Projet : DyogenIBENS/ToolsDyogen_py3

def main():
    arguments = myTools.checkArgs([("phylTree.conf", myTools.File)], [], __doc__)

    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

    for a in phylTree.listAncestr:
        for (f1, f2) in itertools.combinations([f for (f, _) in phylTree.items[a]], 2):

            l1 = [e for e in phylTree.species[f1]]
            l2 = [e for e in phylTree.species[f2]]
            for (e1, e2) in itertools.product(l1, l2):
                print("%s\t%s\t%s" % (e1, e2, a), file=sys.stdout)

Exemple #9

0

Afficher le fichier

Fichier : Ensembl2GenomicusForest.py Projet : DyogenIBENS/ToolsDyogen_py3

def main():
    arguments = myTools.checkArgs([("phylTree.conf", myTools.File),
                                   ("ensemblTree", myTools.File)],
                                  [("newNodeID", int, int(1e9)),
                                   ("reuseNames", bool, False)], __doc__)

    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])
    setrecursionlimit(20000)
    # !important
    myProteinTree.nextNodeID = arguments["newNodeID"]

    count, dupCount, geneFamilies = extractGeneFamilies(
        phylTree, processTrees(arguments["ensemblTree"], phylTree),
        arguments["reuseNames"])

Exemple #10

0

Afficher le fichier

Fichier : phyltree_prune_badqualspecies.py Projet : DyogenIBENS/Phylorgs

def main(phyltreefile, specieslistfile=None, reverse=False):
    phyltree = myPhylTree.PhylogeneticTree(phyltreefile)
    if specieslistfile is None:
        badspeciesset = phyltree.lstEsp2X | phyltree.lstEsp6X
    else:
        with open(specieslistfile) as f:
            badspeciesset = set(line.rstrip() for line in f)
        if badspeciesset - phyltree.listSpecies:
            print('WARNING: unknown species:',
                  ', '.join(badspeciesset-phyltree.listSpecies),
                  file=stderr)
    if reverse:
        badspeciesset = phyltree.listSpecies - badspeciesset
    phyltree.pruneSpecies(badspeciesset, inplace=True)
    phyltree.printNewick(commonnames=True, symbols=True)

Exemple #11

0

Afficher le fichier

Fichier : phyltree.py Projet : DyogenIBENS/ToolsDyogen_py3

def main(phyltreefile, attr=None, keys=None):
    phyltree = myPhylTree.PhylogeneticTree(phyltreefile)
    if not attr:
        output = ('Available attributes:\n' + '\n'.join(
            '%-25s %s' % (a, type(getattr(phyltree, a)))
            for a in sorted(phyltree.__dict__)
            if not a.startswith('_') and not callable(getattr(phyltree, a))))
        print(output)
        return

    value = getattr(phyltree, attr)
    if isinstance(value, (str, int, float)):
        print(value)
        return

    if keys:

        def getvalue(key, default=''):
            try:
                return value[key]
            except KeyError:
                return ''

        try:
            values = [(k, getvalue(k)) for k in keys]
        except TypeError:

            def getvalue(key):
                return key in value  # what if value is a string?

            values = [(k, getvalue(k)) for k in keys]

        if len(keys) > 1:
            output = '\n'.join('%s\t%s' % item for item in values)
            print(output)
            return
        else:
            value = values[0][1]

    try:
        output = '\n'.join('%s\t%s' % (k, v) for k, v in sorted(value.items()))
    except (AttributeError, TypeError):
        try:
            output = '\n'.join(str(v) for v in sorted(value))
        except TypeError:
            output = '%s\n%s' % (type(value), value)

    print(output)

Exemple #12

0

Afficher le fichier

def detachAfter(phyltree, nodes):
    """Transform the given nodes into leaves (remove their subtrees)."""
    items = deepcopy(phyltree.items)
    officialname = deepcopy(phyltree.officialName)

    for node in nodes:
        for descendant in phyltree.allDescendants[node]:
            if descendant != node:
                officialname.pop(descendant)
            items.pop(descendant,
                      None)  # Needed here because of an assertion test in
            # `reinitTree` that doesn't like disconnected nodes.

    newtree = myPhylTree.PhylogeneticTree((items, phyltree.root, officialname))
    newtree.reinitTree()
    return newtree

Exemple #13

0

Afficher le fichier

Fichier : genetree_prune_species.py Projet : DyogenIBENS/Phylorgs

def main(treefile,
         speciesfile,
         ensembl_version,
         from_phyltree=False,
         outfile=None,
         keep_single_node_trees=False):
    if from_phyltree:
        phyltree = myPhylTree.PhylogeneticTree(speciesfile)
        specieslist = phyltree.listSpecies
    else:
        with open(speciesfile) as f:
            specieslist = [
                line.rstrip() for line in f if not line.startswith('#')
            ]

    outtrees = []

    with open(treefile) as newick:
        for tree in read_multinewick(newick):
            tree = ete3.Tree(tree, format=1)
            features = set.union(*(n.features for n in tree.traverse())) \
                        - set(('name', 'dist', 'support'))

            if from_phyltree:
                keptleaves = [
                    l for l in tree.iter_leaves()
                    if (ultimate_seq2sp(l.name, ensembl_version) in specieslist
                        )
                ]
            else:
                keptleaves = [
                    l for l in tree.iter_leaves() if (ultimate_seq2sp(
                        l.name, ensembl_version) not in specieslist)
                ]
            newtree = thin(tree, keptleaves)
            if newtree and (len(newtree) > 1 or keep_single_node_trees):
                newnewick = tree.write(outfile=None,
                                       format=1,
                                       format_root_node=True)
                outtrees.append(newnewick)

    if outfile is not None and outtrees:
        outfile = open(outfile, 'w')
    if outtrees:
        print('\n'.join(outtrees), file=outfile)
    if outfile is not None: outfile.close()

Exemple #14

0

Afficher le fichier

def main(conversionfile, treefile=None, parser='ete3'):
    conversion = load_conversion(conversionfile)

    if parser == 'ete3':
        if treefile is None:
            treefile = stdin.read()
        tree = ete3.Tree(treefile, format=1)
    elif parser == 'myPhylTree':
        from LibsDyogen import myPhylTree
        if treefile is None:
            treefile = stdin
        tree = PhylTree_to_ete3(myPhylTree.PhylogeneticTree(treefile),
                                nosinglechild=False)

    rename(tree, conversion)

    print(tree.write(format=1, format_root_node=True))

Exemple #15

0

Afficher le fichier

Fichier : ProtTree_prune_species.py Projet : DyogenIBENS/Phylorgs

def main(phyltreefile, forestfile=None):
    #with open(badspecieslistfile) as f:
    #    badspecies = [line.rstrip() for line in f if not line.startswith('#')]
    phyltree = myPhylTree.PhylogeneticTree(phyltreefile)

    if forestfile is None:
        forestfile = stdin
    for tree in myProteinTree.loadTree(forestfile):
        keptleaves = set(
            (leaf for leaf in set(tree.info).difference(tree.data)
             if tree.info[leaf]['taxon_name'] in phyltree.allNames))
        newroot, _ = thin_prottree(tree, tree.root, 0, keptleaves)
        #print('DEBUG: newroot =', newroot)
        #print('DEBUG: newdata =', tree.data)
        #print('DEBUG: newinfo =', ' '.join(str(x) for x in tree.info.keys()))
        if newroot is not None:
            fix_thinned_dups(phyltree, tree, newroot)
            tree.printTree(stdout, newroot)
        else:
            logger.warning('Discard tree %d', tree.root)

Exemple #16

0

Afficher le fichier

def main():
    arguments = myTools.checkArgs([("phylTree.conf", myTools.File)],
                                  [("fromNewick", bool, True)], __doc__)

    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

    if arguments["fromNewick"]:

        # Returns the phyltree format (with indentation)
        def do(node, indent):
            node = node.replace("*", "")
            names = myFile.myTSV.printLine([node] + [
                x for x in phylTree.commonNames.get(node, "")
                if isinstance(x, str) and (x != node)
            ],
                                           delim="|")
            print(("\t" * indent) + "%s" % names)
            if node in phylTree.items:
                for (f, _) in phylTree.items[node]:
                    do(f, indent + 1)

        do(phylTree.root, 0)

    else:
        # Returns the newick tree
        def convertToFlatFile(anc):

            a = phylTree.fileName[anc]  # anc.replace(' ', '.')
            if anc in phylTree.listSpecies:
                return a
            else:
                return "(" + ",".join([
                    convertToFlatFile(e) + ":" + str(l)
                    for (e, l) in phylTree.items[anc]
                ]) + ")%s|%d" % (a, phylTree.ages[anc])

        print(convertToFlatFile(phylTree.root), ";")

Exemple #17

0

Afficher le fichier

def main():
    # Arguments
    arguments = myTools.checkArgs([("phylTree.conf", myTools.File),
                                   ("proteinTree", myTools.File)],
                                  [("out:ancGenesFiles", str, ""),
                                   ("reuseNames", bool, False)],
                                  __doc__)

    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])
    proteinTrees = myProteinTree.loadTree(arguments["proteinTree"])

    count, dupCount, geneFamilies = extractGeneFamilies(phylTree,
                                                        proteinTrees,
                                                        arguments["reuseNames"])

    outTemplate = arguments["out:ancGenesFiles"]
    if outTemplate:
        for (anc, lst) in geneFamilies.items():
            print("Ecriture des familles de %s ..." % anc, end=' ', file=sys.stderr)
            f = myFile.openFile(outTemplate % phylTree.fileName[anc], "w")
            for gg in lst:
                print(" ".join(gg), file=f)
            f.close()
            print(len(lst), "OK", file=sys.stderr)

Exemple #18

0

Afficher le fichier

def main():
    arguments = myTools.checkArgs([("phylTree.conf", myTools.File),
                                   ("iniTree", myTools.File),
                                   ("rootSpecies", str)], [], __doc__)

    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

    # Returns a list of nodes under the new root species
    #########################################################
    def search(node):
        if phylTree.isChildOf(tree.info[node]['taxon_name'],
                              arguments["rootSpecies"]):
            return [node]
        elif node in tree.data:
            r = []
            for (g, _) in tree.data[node]:
                r.extend(search(g))
            return r
        else:
            return []

    nb = 0
    for tree in myProteinTree.loadTree(arguments["iniTree"]):
        l = search(tree.root)
        nb += len(l)
        if len(l) == 1:
            tree.info[l[0]]["tree_name"] = tree.info[tree.root]["tree_name"]
            myProteinTree.printTree(sys.stdout, tree.data, tree.info, l[0])
        else:
            for (i, r) in enumerate(l):
                tree.info[r]["tree_name"] = tree.info[
                    tree.root]["tree_name"] + myProteinTree.getDupSuffix(
                        i + 1, True)
                myProteinTree.printTree(sys.stdout, tree.data, tree.info, r)

    print(nb, "extracted trees", file=sys.stderr)

Exemple #19

0

Afficher le fichier

Fichier : generax_speciesrates_plot.py Projet : DyogenIBENS/Phylorgs

def main():
    default_figsize = (15, 10)
    parser = ap.ArgumentParser(description=__doc__)
    parser.add_argument('speciestreefile')
    parser.add_argument('ratefile')
    parser.add_argument('ratecolumn', nargs='?', default='D')
    parser.add_argument('-o', '--outfile', help='Plot output file')
    args = parser.parse_args()

    #tree = ete3.Tree(args.speciestreefile, format=1)
    phyltree = myPhylTree.PhylogeneticTree(args.speciestreefile)
    rates = pd.read_csv(args.ratefile,
                        sep='\t',
                        names=['taxon', 'length', 'D', 'L', 'T'],
                        index_col=0)

    #edge_colors = pd.Series({n: rates.D[n.name] for n in tree.traverse()})
    if args.outfile is None:
        plt.switch_backend('TkAgg')

    lines, anc_coords, _ = plottree(
        phyltree,
        get_logitems,  #phyltree_methods.get_items,
        phyltree_methods.get_label,
        edge_colors=rates[args.ratecolumn],  #edge_colors)
        edge_cmap='viridis',
        label_nodes=True,
        label_params={'alpha': 0.7},
        age_from_root=True)
    #lines.axes.set_xscale('log') # Fail
    plt.gcf().set_size_inches(default_figsize)

    if args.outfile:
        plt.savefig(args.outfile, bbox_inches=False)
    else:
        plt.show()

Exemple #20

0

Afficher le fichier

Fichier : extractGeneEvents.py Projet : DyogenIBENS/ToolsDyogen_py3

#!/usr/bin/env python3
"""
Extrait (des genomes reels) la liste des evenements de
duplications/pertes/gains sur chaque branche de l'arbre
"""

from LibsDyogen import myMaths, myTools, myGenomes, myPhylTree

arguments = myTools.checkArgs([("phylTree.conf", file)],
                              [("rootSpecies", str, ""),
                               ("genesFile", str, ""),
                               ("ancGenesFile", str, "")], __doc__)

phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])


@myTools.memoize
def getGenome(e):
    if e in phylTree.listSpecies:
        return myGenomes.Genome(arguments["genesFile"] % phylTree.fileName[e])
    else:
        return myGenomes.Genome(arguments["ancGenesFile"] %
                                phylTree.fileName[e])


def transformName(esp, xxx_todo_changeme):
    (c, i) = xxx_todo_changeme
    return getGenome(esp).lstGenes[c][i].names[0]


def do(node):

Exemple #21

0

Afficher le fichier

def iter_from_phyltree(treefile, *args, **kwargs):
    from LibsDyogen import myPhylTree
    yield myPhylTree.PhylogeneticTree(treefile, *args, **kwargs)

Exemple #22

0

Afficher le fichier

    )
stat_params['alfsahmmc'] = stat_params['alfsa'] = stat_params['al']
stat_params['codemlfsahmmc'] = stat_params['codemlfsa'] = stat_params['codeml'] = stat_params['cl']
stat_params['cleaningfsa'] = stat_params['cleaning']

dataset_params = ["freq_null_dist", "freq_null_t", "freq_null_dS", "freq_null_dN",
                  "null_dist_before", "null_t_before", "null_dS_before", "null_dN_before",
                  "null_dist_after", "null_t_after", "null_dS_after", "null_dN_after"]
                 # + ["Ndup", "Nspe"] 
rate_params = [statname % m for statname in ('%s_rate', '%s_rate_std') for m in MEASURES]
dataset_params_dS = ['freq_null_dS', 'null_dS_before', 'null_dS_after']
rate_params_dS = ['dS_rate_local', 'dS_rate_std_local', 'dS_rate_nonlocal', 'dS_rate_std_nonlocal']


workspace = Path.home() / 'ws7'
phyltree = myPhylTree.PhylogeneticTree(str(workspace / 'DUPLI_data93/PhylTree.TimeTree201901.Ensembl93-like.goodQual.nwk'))
timetree_ages_CI = pd.read_csv(str(workspace / 'databases/timetree/Primates_conf-int_201909.txt'),
                       sep='\s+', header=None, index_col=0,
                       names=['taxon', 'timetree_CI95_inf', 'timetree_CI95_sup'])#.rename_axis(index='taxon')
dosreis_ages_CI = pd.read_csv(
        str(workspace / 'databases/DosReis2018_Primates-timescale/Primates_dates.tsv'),
        sep='\t', header=0, index_col=0, keep_default_na=False)

ordered_simii_anc = ['Platyrrhini',
                     'Cebidae',
                     'Catarrhini',
                     'Cercopithecidae',
                     'Cercopithecinae',
                     'Papionini',
                     'Macaca',
                     'Hominoidea',

Exemple #23

0

Afficher le fichier

Fichier : mkODS.py Projet : DyogenIBENS/ToolsDyogen_py3

def main():
    # Arguments
    arguments = myTools.checkArgs( \
        [("phylTree.conf", myTools.File), ("dirList", myTools.FileList(1))], \
        [("diagsFile", str, "diags/integr/diags.%s.list.bz2"), ("outputODS", str, "")], \
        __doc__ \
        )

    # L'arbre phylogenetique
    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

    # except KeyError:
    lstEspeces = sorted(set(phylTree.listAncestr))

    allCutoff = arguments["dirList"]

    titles = [
        "AncGenes", "Blocks", "Genes in blocks", "%Cov", "NbInt", "%CovInt",
        "Min", "25%", "50%", "75%", "N75", "N50", "N25", "Max", "Mean",
        "LongBlocks"
    ]

    alldata = {}
    alldiff = {}
    allEvents = []

    for cutoff in allCutoff:
        # allEvents.append(cutoff.replace(".refine32-all.fuseSingletons-all.halfInsert-all.groups","").replace("denovo-",""))
        allEvents.append(cutoff)
    for events in allEvents:

        print(events, "...", end=' ', file=sys.stderr)

        # Recuperation des donnees de longueur de blocs
        alldata[events] = data = {}
        for e in lstEspeces:
            # print >> sys.stderr, e, "...",
            f = myFile.openFile(
                events + "/" + (arguments["diagsFile"] % phylTree.fileName[e]),
                "r")
            lst = []

            sing = 0
            tot = 0
            interv = 0
            for l in f:
                x = int(l.split("\t")[1])
                tot += x
                if x >= 2:
                    lst.append(x)
                    interv += (x - 1)
                else:
                    sing += 1
            f.close()

            data[e] = [
                e, phylTree.ages[e], tot,
                len(lst), tot - sing, (100. * (tot - sing)) / tot, interv,
                (100. * interv) / (tot - 20.)
            ]
            data[e].extend(myMaths.myStats.valSummary(lst)[:-2])

            # on trie la liste des blocks par taille de blocks.
            lstSort = list(lst)
            lstSort.sort()
            # print  >> sys.stderr, lst
            nbBlock = 0
            ValKaryo75 = (tot - sing) * 75 / 100
            Karyo75 = 0
            while Karyo75 < ValKaryo75:
                tmp = lstSort.pop()
                Karyo75 += tmp
                nbBlock += 1

            data[e].append(nbBlock)
            print(e, "...", nbBlock, "...", end=' ', file=sys.stderr)
        if events == allEvents[0]:
            ref = data

        print("OK", file=sys.stderr)

    if arguments["outputODS"] == "":
        for events in allEvents:
            print(events, file=sys.stdout)
            print(myFile.myTSV.printLine(["Ancestor", "Age (My)"] + titles))
            for e in lstEspeces:
                print(myFile.myTSV.printLine(alldata[events][e]))
        if events in alldiff:
            print(
                myFile.myTSV.printLine(
                    ["Ancestor", "Age (My)", "%Useful Gene Loss"] + titles))
            for e in lstEspeces:
                print(myFile.myTSV.printLine(alldiff[events][e]))

    else:
        import odf.opendocument
        from odfpy_datatable import DataTable

        textdoc = odf.opendocument.OpenDocumentSpreadsheet()

        for events in allEvents:
            # valevents = events.split("/")[-1]
            valevents = events
            # Premiere table avec les stats brutes
            val = [["Ancestor", "Age (My)"] + titles]
            for e in lstEspeces:
                val.append(alldata[events][e])

            table = DataTable(val)
            table.datasourcehaslabels = "both"
            t = table()
            t.setAttribute('name', valevents)
            textdoc.spreadsheet.addElement(t)

        # Table specifique pour un ancetre
        for esp in lstEspeces:
            # continue
            val = [["events"] + titles]
            for events in allEvents:
                # valevents = events.split("/")[-1]
                valevents = events
                val.append([valevents] + alldata[events][esp][2:])

            table = DataTable(val)
            table.datasourcehaslabels = "both"
            t = table()
            t.setAttribute('name', esp)
            textdoc.spreadsheet.addElement(t)

        # Resume final

        val = [["N50"] + ["events"] + [esp for esp in lstEspeces]]
        for events in allEvents:
            # valevents = events.split("/")[-1]
            valevents = events
            val.append([""] + [valevents] +
                       [alldata[events][e][13] for e in lstEspeces])

        val.append(["Mean"] + ["events"] + [esp for esp in lstEspeces])
        for events in allEvents:
            # valevents = events.split("/")[-1]
            valevents = events
            val.append([""] + [valevents] +
                       [alldata[events][e][16] for e in lstEspeces])

        val.append(["NbBlocks"] + ["events"] + [esp for esp in lstEspeces])
        for events in allEvents:
            # valevents = events.split("/")[-1]
            valevents = events
            val.append([""] + [valevents] +
                       [alldata[events][e][3] for e in lstEspeces])
        val.append(["MaxLength"] + ["events"] + [esp for esp in lstEspeces])
        for events in allEvents:
            # valevents = events.split("/")[-1]
            valevents = events
            val.append([""] + [valevents] +
                       [alldata[events][e][15] for e in lstEspeces])
        val.append(["LongBlocks"] + ["events"] + [esp for esp in lstEspeces])
        for events in allEvents:
            # valevents = events.split("/")[-1]
            valevents = events
            val.append([""] + [valevents] +
                       [alldata[events][e][17] for e in lstEspeces])

        table = DataTable(val)
        table.datasourcehaslabels = "both"
        t = table()
        t.setAttribute('name', "Summary")
        textdoc.spreadsheet.addElement(t)

        textdoc.save(arguments["outputODS"])

Exemple #24

0

Afficher le fichier

def main():
    # Arguments
    arguments = myTools.checkArgs( \
        [("phylTree.conf", myTools.File), ("dirList", myTools.FileList(1))], \
        [("diagsFile", str, "diags/integr/final/anc/diags.%s.list.bz2"), ("outputODS", str, "")], \
        __doc__ \
        )

    # L'arbre phylogenetique
    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

    # Liste des especes dans le bon ordre
    todo = set(phylTree.listAncestr)
    try:
        l1 = phylTree.dicLinks["Euteleostomi"]["H**o sapiens"][:-1]
        todo.difference_update(l1)
        l2 = phylTree.dicLinks["Glires"]["Murinae"]
        todo.difference_update(l2)
        l3 = [e for e in todo if phylTree.isChildOf(e, "Mammalia")]
        l3 = sorted(l3, key=lambda e: phylTree.ages[e], reverse=True)
        todo.difference_update(l3)
        l4 = [e for e in todo if phylTree.isChildOf(e, "Clupeocephala")]
        l4 = sorted(l4, key=lambda e: phylTree.ages[e], reverse=True)
        todo.difference_update(l4)
        l5 = [e for e in todo if phylTree.isChildOf(e, "Amniota")]
        l5 = sorted(l5, key=lambda e: phylTree.ages[e], reverse=True)
        todo.difference_update(l5)
        l6 = sorted(todo, key=lambda e: phylTree.ages[e], reverse=True)
        lstEspeces = l6 + l5 + l4 + l1 + l3 + l2
    except KeyError:
        lstEspeces = sorted(phylTree.listAncestr)
    # lstEspeces = l5

    # lstEspeces = ["Euteleostomi", "Amniota", "Boreoeutheria"]

    allCutoff = arguments["dirList"]

    titles = ["AncGenes", "Blocks", "Genes in blocks", "%Cov", "NbInt", "%CovInt", "Min", "25%", "50%", "75%", "N75", "N50",
              "N25", "Max", "Mean"]

    alldata = {}
    alldiff = {}

    for cutoff in allCutoff:

        print(cutoff, "...", end=' ', file=sys.stderr)

        # Recuperation des donnees de longueur de blocs
        alldata[cutoff] = data = {}
        for e in lstEspeces:

            f = myFile.openFile(cutoff + "/" + (arguments["diagsFile"] % phylTree.fileName[e]), "r")
            lst = []
            sing = 0
            tot = 0
            interv = 0
            for l in f:
                x = int(l.split("\t")[1])
                tot += x
                if x >= 2:
                    lst.append(x)
                    interv += (x - 1)
                else:
                    sing += 1
            f.close()

            data[e] = [e, phylTree.ages[e], tot, len(lst), tot - sing, (100. * (tot - sing)) / tot, interv,
                       (100. * interv) / (tot - 20.)]
            data[e].extend(myMaths.myStats.valSummary(lst)[:-2])

        if cutoff == allCutoff[0]:
            ref = data
        # else:

        alldiff[cutoff] = diff = {}
        for e in lstEspeces:
            newdata = [(x - ref[e][i] if i >= 2 else x) for (i, x) in enumerate(data[e])]
            newdata.insert(2, 100 * (1. - float(newdata[4]) / newdata[2]) if newdata[2] != 0 else None)
            diff[e] = newdata
        print("OK", file=sys.stderr)

    if arguments["outputODS"] == "":
        for cutoff in allCutoff:
            print(myFile.myTSV.printLine(["Ancestor", "Age (My)"] + titles))
            for e in lstEspeces:
                print(myFile.myTSV.printLine(alldata[cutoff][e]))
        if cutoff in alldiff:
            print(myFile.myTSV.printLine(["Ancestor", "Age (My)", "%Useful Gene Loss"] + titles))
            for e in lstEspeces:
                print(myFile.myTSV.printLine(alldiff[cutoff][e]))

    else:
        import odf.opendocument
        from odfpy_datatable import DataTable

        textdoc = odf.opendocument.OpenDocumentSpreadsheet()

        for cutoff in allCutoff:
            valCutoff = cutoff.split("/")[-1]

            # Premiere table avec les stats brutes
            val = [["Ancestor", "Age (My)"] + titles]
            for e in lstEspeces:
                val.append(alldata[cutoff][e])

            table = DataTable(val)
            table.datasourcehaslabels = "both"
            t = table()
            t.setAttribute('name', valCutoff)
            textdoc.spreadsheet.addElement(t)

            if cutoff in alldiff:

                # Deuxieme table avec les differences par rapport a la reference
                val = [["Ancestor", "Age (My)", "%Useful Gene Loss"] + titles]
                for e in lstEspeces:
                    val.append(alldiff[cutoff][e])

                table = DataTable(val)
                table.datasourcehaslabels = "both"
                t = table()
                t.setAttribute('name', "d" + valCutoff)
                textdoc.spreadsheet.addElement(t)

        # Table specifique pour un ancetre
        for esp in lstEspeces:
            # continue
            val = [["cutoff"] + titles]
            for cutoff in allCutoff:
                valCutoff = cutoff.split("/")[-1]
                val.append([valCutoff] + alldata[cutoff][esp][2:])

            table = DataTable(val)
            table.datasourcehaslabels = "both"
            t = table()
            t.setAttribute('name', esp)
            textdoc.spreadsheet.addElement(t)


        # Resume final
        val = [["cutoff", "Mean gain", "Median gain", "N50 gain", "%Cov gain", "%CovInt gain", "BlockLength %gain (mean)",
                "BlockLength %gain (Median)", "BlockLength %gain (N50)", "Cov %gain", "CovInt %gain"]]
        for cutoff in allCutoff:
            valCutoff = cutoff.split("/")[-1]
            val.append([valCutoff] + [myMaths.myStats.mean([alldiff[cutoff][e][i] for e in lstEspeces]) for i in
                                      [17, 12, 14, 6, 8]] +
                       [myMaths.myStats.mean([100 * float(
                           alldata[cutoff][e][i - 1] - alldata[allCutoff[0]][e][i - 1]) / alldata[allCutoff[0]][e][i - 1]
                                                    for e in lstEspeces]) for i in [17, 12, 14, 6, 8]]
                       )
        table = DataTable(val)
        table.datasourcehaslabels = "both"
        t = table()
        t.setAttribute('name', "cutoff")
        textdoc.spreadsheet.addElement(t)

        textdoc.save(arguments["outputODS"])

Exemple #25

0

Afficher le fichier

def main():
    # Arguments
    arguments = myTools.checkArgs( \
        [("phylTree.conf", file), ("dirList", myTools.FileList(1))], \
        [("diagsFile", str, "diags/integr/final/anc/diags.%s.list.bz2"), ("outputODS", str, "")], \
        __doc__ \
        )

    # L'arbre phylogenetique
    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])
    todo = set(phylTree.listAncestr)
    try:
        l1 = phylTree.dicLinks["Euteleostomi"]["H**o sapiens"][:-1]
        todo.difference_update(l1)
        l2 = phylTree.dicLinks["Glires"]["Murinae"]
        todo.difference_update(l2)
        l3 = [e for e in todo if phylTree.isChildOf(e, "Mammalia")]
        l3 = sorted(l3, key=lambda e: phylTree.ages[e], reverse=True)
        todo.difference_update(l3)
        l4 = [e for e in todo if phylTree.isChildOf(e, "Clupeocephala")]
        l4 = sorted(l4, key=lambda e: phylTree.ages[e], reverse=True)
        todo.difference_update(l4)
        l5 = [e for e in todo if phylTree.isChildOf(e, "Amniota")]
        l5 = sorted(l5, key=lambda e: phylTree.ages[e], reverse=True)
        todo.difference_update(l5)
        l6 = sorted(todo, key=lambda e: phylTree.ages[e], reverse=True)
        lstEspeces = l6 + l5 + l4 + l1 + l3 + l2
    except KeyError:
        lstEspeces = sorted(phylTree.listAncestr)
    # lstEspeces = l5

    # lstEspeces = ["Euteleostomi", "Amniota", "Boreoeutheria"]

    allCutoff = arguments["dirList"]

    titles = [
        "AncGenes", "Blocks", "Genes in blocks", "%Cov", "NbInt", "%CovInt",
        "Min", "25%", "50%", "75%", "N75", "N50", "N25", "WeigthedAverage",
        "Max", "Mean", "LongBlocks"
    ]

    alldata = {}
    alldiff = {}
    allEvents = []

    for cutoff in allCutoff:
        # allEvents.append(cutoff.replace(".refine32-all.fuseSingletons-all.halfInsert-all.groups","").replace("denovo-",""))
        allEvents.append(cutoff)
    for events in allEvents:

        print(events, "...", end=' ', file=sys.stderr)

        # Recuperation des donnees de longueur de blocs
        alldata[events] = data = {}
        for e in lstEspeces:
            # print >> sys.stderr, e, "...",
            f = myFile.openFile(
                events + "/" + (arguments["diagsFile"] % phylTree.fileName[e]),
                "r")
            lst = []

            sing = 0
            tot = 0
            interv = 0
            for l in f:
                x = int(l.split("\t")[1])
                tot += x
                if x >= 2:
                    lst.append(x)
                    interv += (x - 1)
                else:
                    sing += 1
            f.close()

            data[e] = [
                e, phylTree.ages[e], tot,
                len(lst), tot - sing, (100. * (tot - sing)) / tot, interv,
                (100. * interv) / (tot - 20.)
            ]
            data[e].extend(myMaths.myStats.valSummary2(lst)[:-2])

            # on trie la liste des blocks par taille de blocks.
            lstSort = list(lst)
            lstSort.sort()
            # print  >> sys.stderr, lst
            nbBlock = 0
            ValKaryo75 = (tot - sing) * 75 / 100
            Karyo75 = 0
            while Karyo75 < ValKaryo75:
                tmp = lstSort.pop()
                Karyo75 += tmp
                nbBlock += 1

            data[e].append(nbBlock)
            print(e, "...", nbBlock, "...", end=' ', file=sys.stderr)
        if events == allEvents[0]:
            ref = data
        # else:

        # alldiff[events] = diff = {}
        #	for e in lstEspeces:
        #		newdata = [(x-ref[e][i] if i >= 2 else x) for (i,x) in enumerate(data[e])]
        #		newdata.insert(2, 100*(1.-float(newdata[4])/newdata[2]) if newdata[2] != 0 else None)
        #		diff[e] = newdata
        print("OK", file=sys.stderr)

    if arguments["outputODS"] == "":
        for events in allEvents:
            print(events, file=sys.stdout)
            print(myFile.myTSV.printLine(["Ancestor", "Age (My)"] + titles))
            for e in lstEspeces:
                print(myFile.myTSV.printLine(alldata[events][e]))
        if events in alldiff:
            print(
                myFile.myTSV.printLine(
                    ["Ancestor", "Age (My)", "%Useful Gene Loss"] + titles))
            for e in lstEspeces:
                print(myFile.myTSV.printLine(alldiff[events][e]))

    else:
        import odf.opendocument
        from odfpy_datatable import DataTable

        textdoc = odf.opendocument.OpenDocumentSpreadsheet()

        for events in allEvents:
            # valevents = events.split("/")[-1]
            valevents = events
            # Premiere table avec les stats brutes
            val = [["Ancestor", "Age (My)"] + titles]
            for e in lstEspeces:
                val.append(alldata[events][e])

            table = DataTable(val)
            table.datasourcehaslabels = "both"
            t = table()
            t.setAttribute('name', valevents)
            textdoc.spreadsheet.addElement(t)

        # if events in alldiff:
        #
        #			# Deuxieme table avec les differences par rapport a la reference
        #			val = [["Ancestor", "Age (My)", "%Useful Gene Loss"] + titles]
        #			for e in lstEspeces:
        #				val.append(alldiff[events][e])
        #
        #			table = DataTable(val)
        #			table.datasourcehaslabels = "both"
        #			t = table()
        #			t.setAttribute('name', "d"+valevents)
        #			textdoc.spreadsheet.addElement(t)

        # Table specifique pour un ancetre
        for esp in lstEspeces:
            # continue
            val = [["events"] + titles]
            for events in allEvents:
                # valevents = events.split("/")[-1]
                valevents = events
                val.append([valevents] + alldata[events][esp][2:])

            table = DataTable(val)
            table.datasourcehaslabels = "both"
            t = table()
            t.setAttribute('name', esp)
            textdoc.spreadsheet.addElement(t)

        # Resume final
        # val = [["events", "Mean gain", "Median gain", "N50 gain", "%Cov gain", "%CovInt gain", "BlockLength %gain (mean)", "BlockLength %gain (Median)", "BlockLength %gain (N50)", "Cov %gain", "CovInt %gain"]]
        #	for events in allEvents:
        #		valevents = events.split("/")[-1]
        #		val.append( [valevents] + [myMaths.myStats.mean([alldiff[events][e][i] for e in lstEspeces]) for i in [17, 12, 14, 6, 8]] +
        #			[myMaths.myStats.mean([100*float(alldata[events][e][i-1]-alldata[allEvents[0]][e][i-1])/alldata[allEvents[0]][e][i-1] for e in lstEspeces]) for i in [17, 12, 14, 6, 8]]
        #		)
        #	table = DataTable(val)
        #	table.datasourcehaslabels = "both"
        #	t = table()
        #	t.setAttribute('name', "events")
        #	textdoc.spreadsheet.addElement(t)

        # Pour les courbes
        val = [["AncGenes"] + ["events"] + [esp for esp in lstEspeces]]
        for events in allEvents:
            # valevents = events.split("/")[-1]
            valevents = events
            val.append([""] + [valevents] +
                       [alldata[events][e][2] for e in lstEspeces])

        val.append(["WeigthedAverage"] + ["events"] +
                   [esp for esp in lstEspeces])
        for events in allEvents:
            # valevents = events.split("/")[-1]
            valevents = events
            val.append([""] + [valevents] +
                       [int(alldata[events][e][15]) for e in lstEspeces])

        val.append(["N50"] + ["events"] + [esp for esp in lstEspeces])

        for events in allEvents:
            # valevents = events.split("/")[-1]
            valevents = events
            val.append([""] + [valevents] +
                       [alldata[events][e][13] for e in lstEspeces])

        val.append(["Mean"] + ["events"] + [esp for esp in lstEspeces])
        for events in allEvents:
            # valevents = events.split("/")[-1]
            valevents = events
            val.append([""] + [valevents] +
                       [alldata[events][e][17] for e in lstEspeces])

        val.append(["NbBlocks"] + ["events"] + [esp for esp in lstEspeces])
        for events in allEvents:
            # valevents = events.split("/")[-1]
            valevents = events
            val.append([""] + [valevents] +
                       [alldata[events][e][3] for e in lstEspeces])
        val.append(["MaxLength"] + ["events"] + [esp for esp in lstEspeces])
        for events in allEvents:
            # valevents = events.split("/")[-1]
            valevents = events
            val.append([""] + [valevents] +
                       [alldata[events][e][16] for e in lstEspeces])
        val.append(["LongBlocks"] + ["events"] + [esp for esp in lstEspeces])
        for events in allEvents:
            # valevents = events.split("/")[-1]
            valevents = events
            val.append([""] + [valevents] +
                       [alldata[events][e][18] for e in lstEspeces])

        table = DataTable(val)
        table.datasourcehaslabels = "both"
        t = table()
        t.setAttribute('name', "Summary")
        textdoc.spreadsheet.addElement(t)

        textdoc.save(arguments["outputODS"])

Exemple #26

0

Afficher le fichier

Fichier : ancGenesFromGeneTrees.py Projet : DyogenIBENS/LibsDyogen_py3

import sys
import collections

from LibsDyogen import myFile
from LibsDyogen import myTools
from LibsDyogen import myPhylTree
from LibsDyogen import myProteinTree

# arguments
arguments = myTools.checkArgs([("speciesTree", myTools.File),
                               ("geneTreeForest", myTools.File)],
                              [("out:ancGenes", str, ""),
                               ("reuseNames", bool, False)], __doc__)

speciesTree = myPhylTree.PhylogeneticTree(arguments["speciesTree"])
# duplication counter
dupCount = collections.defaultdict(int)


def futureName(name, dup):
    if dup >= 2:
        dupCount[name] += 1
        # if there is a duplication we need to add a suffix
        return name + myProteinTree.getDupSuffix(dupCount[name], False)
    else:
        return name


def getRoots(node, previousAnc, lastWrittenAnc):
    """finds out the roots in gene families"""

Exemple #27

0

Afficher le fichier

Fichier : getNbComparisons.py Projet : DyogenIBENS/ToolsDyogen_py3

def main():
    # Arguments
    arguments = myTools.checkArgs( \
        [("phylTree.conf", myTools.File)], [("diags", str, ""), ("colNames", bool, False)], \
        __doc__ \
        )

    # L'arbre phylogenetique
    phylTree = myPhylTree.PhylogeneticTree(arguments["phylTree.conf"])

    if (arguments["colNames"]):
        print(myFile.myTSV.printLine([
            "Ancestor", "NbComp", "Nb(In/Out)Comp", "Nb(In/In)Comp", "Age",
            "MeanSize_OfBlocks", "N50Size_OfBlocks", "WASize_OfBlocks",
            "NbComp/Age"
        ]),
              file=sys.stdout)

    for anc in phylTree.listAncestr:
        # nb d'outgroup:
        ###############

        nb_outgroup = len(phylTree.outgroupSpecies[anc])

        # nb d'Ingroups.
        ##############
        nbInSpec = [len(phylTree.species[x]) for (x, _) in phylTree.items[anc]]

        l = [len(phylTree.species[x]) for (x, _) in phylTree.items[anc]]
        # for (x,_) in phylTree.items[anc]:
        #	print >> sys.stderr, phylTree.species[x]
        l.append(nb_outgroup)

        # Comp InSpecies/OutGroups
        #########################

        compInOut = sum(nb_outgroup * n1 for n1 in nbInSpec)

        # Comp InSpecies/InSpecies
        #########################

        compInIn = sum(n1 * n2
                       for (n1, n2) in itertools.combinations(nbInSpec, 2))

        nbc = sum(n1 * n2 for (n1, n2) in itertools.combinations(l, 2))

        # quid des blocs.
        ###############

        totalStat = []
        if (arguments["diags"] != ""):
            r = []
            f = myFile.openFile(arguments["diags"] % phylTree.fileName[anc],
                                "r")
            for line in f:
                x = int(line.split("\t")[1])
                if x > 1:
                    r.append(x)
            f.close()
            #lll = float(sum(r)) / len(r)
            totalStat = myMaths.myStats.valSummary2(r)
        else:
            lll = "NONE"
        ###############

        print(
            myFile.myTSV.printLine([
                anc, nbc, compInOut, compInIn, phylTree.ages[anc],
                totalStat[9], totalStat[6],
                int(totalStat[7]),
                float(nbc) / phylTree.ages[anc]
            ]))

Exemple #28

0

Afficher le fichier

def main():
    args = myTools.checkArgs([('phyltree', myTools.File)], [], __doc__)
    phyltree = myPhylTree.PhylogeneticTree(args['phyltree'])

    for taxon, age in sorted(phyltree.ages.items(), key=lambda x: (x[1], x[0])):
        print(taxon + '\t' + '%7g' % age)

Exemple #29

0

Afficher le fichier

Fichier : test_markLowScoreDup.py Projet : DyogenIBENS/ToolsDyogen_py3

# Context
phyl_items = {
    'Hominoidea': [('Hominidae', 1), ('Nomascus leucogenys', 1)],
    'Hominidae': [('Homininae', 1), ('Pongo abelii', 1)],
    'Homininae': [('HomoPan', 1), ('Gorilla gorilla', 1)],
    'HomoPan': [('Pan', 1), ('H**o sapiens', 1)],
    'Pan': [('Pan troglodytes', 1), ('Pan paniscus', 1)]
}
phyl_officialNames = {
    name: name
    for name in (set(phyl_items)
                 | set(t for v in phyl_items.values() for t, _ in v))
}

phyltree = myPhylTree.PhylogeneticTree(
    (phyl_items, 'Hominoidea', phyl_officialNames),
    skipInit=False,  # No effect if giving items
    stream=stderr)
phyltree.reinitTree(stream=stderr)

data = [
    myProteinTree.ProteinTree(data={1: [(2, 0.1), (3, 0.1)]},
                              info={
                                  1: {
                                      'Duplication': 2,
                                      'taxon_name': 'H**o sapiens'
                                  },
                                  2: {
                                      'Duplication': 0,
                                      'taxon_name': 'H**o sapiens'
                                  },
                                  3: {

Exemple #30

0

Afficher le fichier

def processData(data):

    tree = myPhylTree.PhylogeneticTree(io.StringIO(data))

    def printTree(indent, node):
        global nodeid, ntree
        print("%sid\t%d" % (indent, nodeid))
        nodeid += 1

        info = {}

        if "B" in tree.info[node]:
            info["Bootstrap"] = int(tree.info[node]["B"])

        if "D" in tree.info[node] and tree.info[node]["D"] == "N":
            info["Duplication"] = 0
            if "E" in tree.info[node]:
                info["taxon_lost"] = tree.info[node]["E"].split(
                    "=-$")[1].split("-")
                if "S" in tree.info[node]:
                    info["taxon_name"] = string.capitalize(
                        tree.info[node]["S"])
            else:
                if "S" in tree.info[node]:
                    info["taxon_name"] = string.capitalize(
                        tree.info[node]["S"])

        elif "D" in tree.info[node] and tree.info[node]["D"] == "Y":

            if "DD" in tree.info[node] and tree.info[node]["DD"] == "Y":
                info["Duplication"] = 1
                info["dubious_duplication"] = 1

                if "E" in tree.info[node]:
                    info["taxon_lost"] = tree.info[node]["E"].split(
                        "=$-")[1].split("-")

                    if "S" in tree.info[node]:
                        info["taxon_name"] = string.capitalize(
                            tree.info[node]["S"])

                        if "SIS" in tree.info[node]:
                            info["duplication_confidence_score"] = float(
                                tree.info[node]["SIS"]) / 100
                else:

                    if "S" in tree.info[node]:
                        info["taxon_name"] = string.capitalize(
                            tree.info[node]["S"])

                        if "SIS" in tree.info[node]:
                            info["duplication_confidence_score"] = float(
                                tree.info[node]["SIS"]) / 100
            else:
                info["Duplication"] = 2
                if "E" in tree.info[node]:
                    info["taxon_lost"] = tree.info[node]["E"].split(
                        "=$-")[1].split("-")

                    if "S" in tree.info[node]:
                        info["taxon_name"] = string.capitalize(
                            tree.info[node]["S"])

                        if "SIS" in tree.info[node]:
                            info["duplication_confidence_score"] = float(
                                tree.info[node]["SIS"]) / 100
                else:
                    if "S" in tree.info[node]:
                        info["taxon_name"] = string.capitalize(
                            tree.info[node]["S"])

                        if "SIS" in tree.info[node]:
                            info["duplication_confidence_score"] = float(
                                tree.info[node]["SIS"]) / 100

        elif "E" in tree.info[node]:
            info["taxon_lost"] = tree.info[node]["E"].split("=$-")[1].split(
                "-")

            if "S" in tree.info[node]:
                info["Duplication"] = 0
                info["taxon_name"] = string.capitalize(tree.info[node]["S"])

        elif "S" in tree.info[node]:
            info["Duplication"] = 0
            info["taxon_name"] = string.capitalize(tree.info[node]["S"])

        if indent == "":
            info["tree_name"] = "TreeBeST%06d" % ntree
            ntree += 1

        if node not in tree.items:
            # modifié par alex pour garder le "_" dans le nom de gene!
            ############################################################

            #x = node.rpartition("_")[0]
            x = node

            # fin de modif Alex

            info["gene_name"] = x

        print("%sinfo\t%s" % (indent, info))

        if node in tree.items:
            indent = indent + "\t"
            for (e, l) in tree.items[node]:
                print("%slen\t%g" % (indent, l))
                printTree(indent, e)

    printTree("", tree.root)