Python DataSet.read 예제들, dendropy.DataSet.read Python 예제들

예제 #1

0

파일 보기

 def phylogeneticTreeFromFile(self, treefile, file_format):
     dataset = Dataset()
     dataset.read(open(treefile, 'rU'), schema=file_format)
     dendropy_tree = dataset.tree_lists[0][0]
     tree = PhylogeneticTree(dendropy_tree)
     tree.calc_splits()
     delete_outdegree_one(tree._tree)
     return tree

예제 #2

0

파일 보기

파일: test_phylogenetic_tree.py 프로젝트: faircloth-lab/sate-core

 def phylogeneticTreeFromFile(self, treefile, file_format):
     dataset = Dataset()
     dataset.read(open(treefile, 'rU'), schema=file_format)
     dendropy_tree = dataset.tree_lists[0][0]
     tree = PhylogeneticTree(dendropy_tree)
     tree.calc_splits()
     delete_outdegree_one(tree._tree)
     return tree

예제 #3

0

파일 보기

파일: tree.py 프로젝트: sfeng1030/Sepp

 def read_tree_from_file(self, treefile, file_format):
     dataset = Dataset()
     dataset.read(open(treefile, 'rU'), schema=file_format)
     dendropy_tree = dataset.trees_blocks[0][0]
     self._tree = dendropy_tree
     self.n_leaves = self.count_leaves()

예제 #4

0

파일 보기

파일: treedepth.py 프로젝트: nuin/DendroPy

    parser.add_option(
        "--generate-test-tree",
        action="store_true",
        dest="gen_test_tree",
        help="Generate ultrametric tree to test function.",
    )
    (options, args) = parser.parse_args()

    if options.gen_test_tree:
        simple_test_tree()
        exit(0)

    if len(args) > 1:
        sys.exit("At most one argument (a newick tree string with branch lengths) can be specified")
    if len(args) == 1:
        s = args[0]
    else:
        newick = sys.stdin.read()
        s = StringIO.StringIO(newick)

    prec = options.prec
    d = DataSet()

    d.read(s, schema=options.schema, rooted=True)
    if len(d.trees_blocks) == 0:
        sys.exit("No trees found in file.")
    tree = d.trees_blocks[0][0]
    tree.calc_node_ages(attr_name="age", check_prec=options.prec)

    sys.stdout.write("%f\n" % tree.seed_node.age)

예제 #5

0

파일 보기

from cStringIO import StringIO
from dendropy import DataSet
from dendropy.utility.error import DataParseError
from dendropy.utility.textutils import escape_nexus_token

if __name__ == '__main__':
    '''
    This method converts Newick to Nexon by splitting up the mod_name made in
    taxonomyToNewick.py to 'name' and 'ottolid'. Nexon is later used for TAG's input
    trees. 
    '''
    output = sys.stdout
    fo = open(sys.argv[1], "rU")
    dataset = DataSet()
    try:
        dataset.read(stream=fo, schema="Newick")
    except DataParseError as dfe:
        raise ValueError(str(dfe))
    if len(dataset.taxon_sets) != 1:
        raise ValueError("Expecting one set of taxa in %s" % f)
    if len(dataset.tree_lists) != 1:
        raise ValueError("Expecting one tree in %s" % f)
    taxon_set = dataset.taxon_sets[0]
    tree_list = dataset.tree_lists[0]
    number_of_taxon = len(taxon_set)
    branch_counter = 0
    code_list = [StringIO() for i in taxon_set]
    otu_counter = 0
    node_counter = 0
    edge_counter = 0
    for n, tree in enumerate(tree_list):

예제 #6

0

파일 보기

                      '--schema',
                      dest='schema',
                      type='str',
                      default="newick",
                      help='The format/schema of the input data')
    parser.add_option('-g',
                      '--gordon',
                      dest='gordons',
                      action="store_true",
                      default=False,
                      help="Specify to use the Gordon's strict consensus")
    (options, args) = parser.parse_args()
    if len(args) == 0:
        sys.exit("Expecting a filename as an argument")
    schema = options.schema.upper()

    trees = []
    taxon_set = TaxonSet()
    dataset = DataSet(taxon_set=taxon_set)
    if schema == "PHYLIP":
        schema = "NEWICK"
    for f in args:
        fo = open(f, "rU")
        dataset.read(stream=fo, schema=schema)
    for tl in dataset.tree_lists:
        trees.extend(tl)

    o = inplace_strict_consensus_merge(trees,
                                       gordons_supertree=options.gordons)
    sys.stdout.write("%s;\n" % str(o))

예제 #7

0

파일 보기

파일: long_branch_symmdiff.py 프로젝트: BioinformaticsArchive/DendroPy

    try:
        cutoff = int(options.cutoff)
    except ValueError:
        try:
            cutoff = float(options.cutoff)
        except ValueError:
            sys.exit('Expecting the cutoff to be a number found "%s"' % options.cutoff)

    trees = []
    taxon_set = TaxonSet()
    dataset = DataSet(taxon_set=taxon_set)
    if schema == "PHYLIP":
        schema = "NEWICK"
    for f in args:
        fo = open(f, "rU")
        dataset.read(stream=fo, schema=schema)
    for tl in dataset.tree_lists:
        trees.extend(tl)

    sd_mat = long_branch_symmdiff(trees, cutoff)
    o = sys.stdout
    if options.paup:
        o.write("%s\n" % "\t".join(["tree"] + [str(1+i) for i in xrange(len(sd_mat))]))
        for n, row in enumerate(sd_mat):
            o.write("%d\t%s\n" % ((n + 1), "\t".join([str(i) for i in row[:1 + n]])))
    else:
        for row in sd_mat:
            o.write("%s\n" % "\t".join([str(i) for i in row]))

예제 #8

0

파일 보기

파일: pars.py 프로젝트: mza0150/eebprogramming

                      help="Verbose execution mode")

    (options, args) = parser.parse_args()
    if len(args) == 0:
        sys.exit("Expecting a filename as an argument")

    if options.verbose:
        _LOG.setLevel(logging.DEBUG)

    tree_index = 0

    try:
        for f in args:
            fo = open(f, "rU")
            dataset = DataSet()
            dataset.read(stream=fo, schema="NEXUS")

            if len(dataset.taxon_sets) != 1:
                raise ValueError("Expecting one set of taxa in %s" % f)
            taxon_set = dataset.taxon_sets[0]

            if len(dataset.tree_lists) != 1:
                raise ValueError("Expecting one tree block in %s" % f)
            tree_list = dataset.tree_lists[0]

            if len(dataset.char_matrices) != 1:
                raise ValueError("Expecting one character matrix in %s" % f)
            char_mat = dataset.char_matrices[0]

            num_char = len(char_mat[0])
            taxon_to_state_set = char_mat.create_taxon_to_state_set_map()

예제 #9

0

파일 보기

파일: newick_to_nexon.py 프로젝트: mtholder/supertree-study

from cStringIO import StringIO
from dendropy import DataSet
from dendropy.utility.error import DataParseError
from dendropy.utility.textutils import escape_nexus_token

if __name__ == '__main__':
    '''
    This method converts Newick to Nexon by splitting up the mod_name made in
    taxonomyToNewick.py to 'name' and 'ottolid'. Nexon is later used for TAG's input
    trees. 
    '''
    output = sys.stdout
    fo = open(sys.argv[1], "rU")
    dataset = DataSet()
    try:
        dataset.read(stream=fo, schema="Newick")
    except DataParseError as dfe:
        raise ValueError(str(dfe))
    if len(dataset.taxon_sets) != 1:
        raise ValueError("Expecting one set of taxa in %s" % f)
    if len(dataset.tree_lists) != 1:
        raise ValueError("Expecting one tree in %s" % f)
    taxon_set = dataset.taxon_sets[0]
    tree_list = dataset.tree_lists[0]
    number_of_taxon = len(taxon_set)
    branch_counter = 0
    code_list = [StringIO() for i in taxon_set]
    otu_counter = 0
    node_counter= 0
    edge_counter=0
    for n,tree in enumerate(tree_list):

예제 #10

0

파일 보기

파일: treedepth.py 프로젝트: wrightaprilm/DendroPy

    parser.add_option("--generate-test-tree",
                      action="store_true",
                      dest="gen_test_tree",
                      help="Generate ultrametric tree to test function.")
    (options, args) = parser.parse_args()

    if options.gen_test_tree:
        simple_test_tree()
        exit(0)

    if len(args) > 1:
        sys.exit(
            "At most one argument (a newick tree string with branch lengths) can be specified"
        )
    if len(args) == 1:
        s = args[0]
    else:
        newick = sys.stdin.read()
        s = StringIO.StringIO(newick)

    prec = options.prec
    d = DataSet()

    d.read(s, schema=options.schema, rooted=True)
    if len(d.trees_blocks) == 0:
        sys.exit("No trees found in file.")
    tree = d.trees_blocks[0][0]
    tree.calc_node_ages(attr_name='age', check_prec=options.prec)

    sys.stdout.write("%f\n" % tree.seed_node.age)

예제 #11

0

파일 보기

파일: tree.py 프로젝트: BioinformaticsArchive/sepp

 def read_tree_from_file(self, treefile, file_format):
     dataset = Dataset()
     dataset.read(open(treefile, 'rU'), schema=file_format)
     dendropy_tree = dataset.trees_blocks[0][0]
     self._tree = dendropy_tree
     self.n_leaves = self.count_leaves()

예제 #12

0

파일 보기

파일: sankoff.py 프로젝트: kmiddleton/eebprogramming

    return sankoff(node_list, step_matrix=step_matrix, taxa_to_state_set_map=taxa_to_states)

if __name__ == '__main__':
    from optparse import OptionParser
    parser = OptionParser()
    (options, args) = parser.parse_args()
    if len(args) == 0:
        sys.exit("Expecting a filename as an argument")

    tree_index = 0

    try:
        for f in args:
            fo = open(f, "rU")
            dataset = DataSet()
            dataset.read(stream=fo, schema="NEXUS")

            if len(dataset.taxon_sets) != 1:
                raise ValueError("Expecting one set of taxa in %s" % f)
            taxon_set = dataset.taxon_sets[0]

            if len(dataset.tree_lists) != 1:
                raise ValueError("Expecting one tree block in %s" % f)
            tree_list = dataset.tree_lists[0]

            if len(dataset.char_matrices) != 1:
                raise ValueError("Expecting one character matrix in %s" % f)
            char_mat = dataset.char_matrices[0]

            num_char = len(char_mat[0])
            taxon_to_state_set = char_mat.create_taxon_to_state_set_map()

예제 #13

0

파일 보기

파일: prob_synapo.py 프로젝트: BioinformaticsArchive/DendroPy

        sys.exit('Tree file not found: "%s"' % opts.tree)

    tree_file_objs = [open(f, "rU") for f in tree_filepaths]

    if opts.output_filepath is None:
        output_dest = sys.stdout
    else:
        output_fpath = os.path.expanduser(os.path.expandvars(opts.output_filepath))
        if not confirm_overwrite(output_fpath, opts.replace):
            sys.exit(1)
        output_dest = open(output_fpath, "w")


    dataset = DataSet()
    ts = dendropy.TaxonSet()
    dataset.read(stream=open(opts.data, 'rU'), schema='NEXUS', taxon_set=ts)
    if len(dataset.char_matrices) != 1:
        sys.exit("Currently the script only supports data files with a single character matrix")
    if len(dataset.tree_lists) > 0:
        sys.exit("Currently the script does not support trees in the data file")
    dataset.read(stream=open(opts.tree, 'rU'), schema='NEXUS', taxon_set=ts)
    if (len(dataset.tree_lists) != 1) or len(dataset.tree_lists[0]) != 1:
        sys.exit("Currently the script only not tree files with a single tree")

    if ts.get_taxon(label=BOGUS_TAXON_LABELS[0]) or ts.get_taxon(label=BOGUS_TAXON_LABELS[1]):
        sys.exit('Give me a break. You really have a taxon named "%s" or "%s" in your data!?\nI refuse to deal with this file.\n' % (BOGUS_TAXON_LABELS[0], BOGUS_TAXON_LABELS[1]))
    tree = dataset.tree_lists[0][0]

    matrix = dataset.char_matrices[0]
    if len(matrix.state_alphabets) != 1:
        sys.exit('Expecting the character matrix to have exactly one "state alphabet". Found %d' % len(matrix.state_alphabets))

예제 #14

0

파일 보기

        sys.exit('Tree file not found: "%s"' % opts.tree)

    tree_file_objs = [open(f, "rU") for f in tree_filepaths]

    if opts.output_filepath is None:
        output_dest = sys.stdout
    else:
        output_fpath = os.path.expanduser(
            os.path.expandvars(opts.output_filepath))
        if not confirm_overwrite(output_fpath, opts.replace):
            sys.exit(1)
        output_dest = open(output_fpath, "w")

    dataset = DataSet()
    ts = dendropy.TaxonSet()
    dataset.read(stream=open(opts.data, 'rU'), schema='NEXUS', taxon_set=ts)
    if len(dataset.char_matrices) != 1:
        sys.exit(
            "Currently the script only supports data files with a single character matrix"
        )
    if len(dataset.tree_lists) > 0:
        sys.exit(
            "Currently the script does not support trees in the data file")
    dataset.read(stream=open(opts.tree, 'rU'), schema='NEXUS', taxon_set=ts)
    if (len(dataset.tree_lists) != 1) or len(dataset.tree_lists[0]) != 1:
        sys.exit("Currently the script only not tree files with a single tree")

    if ts.get_taxon(label=BOGUS_TAXON_LABELS[0]) or ts.get_taxon(
            label=BOGUS_TAXON_LABELS[1]):
        sys.exit(
            'Give me a break. You really have a taxon named "%s" or "%s" in your data!?\nI refuse to deal with this file.\n'